[PATCH 0/3] rewrite fastpath routines

2023-10-10 Thread Vamsi Attunuru
This series adds new fastpath routines for cn10k & cn9k endpoint
devices and supports 32B Tx desciptor format which improves the
performance.

Shijith Thotton (1):
  net/octeon_ep: support 32B IQ descriptor size

Vamsi Attunuru (2):
  net/octeon_ep: clean up receive routine
  net/octeon_ep: add new fastpath routines

 drivers/net/octeon_ep/cnxk_ep_rx.c| 309 ++
 drivers/net/octeon_ep/cnxk_ep_tx.c| 209 +
 drivers/net/octeon_ep/cnxk_ep_vf.c|  12 +-
 drivers/net/octeon_ep/cnxk_ep_vf.h|  13 ++
 drivers/net/octeon_ep/meson.build |   2 +
 drivers/net/octeon_ep/otx2_ep_vf.c|  11 +-
 drivers/net/octeon_ep/otx_ep_common.h | 127 ++-
 drivers/net/octeon_ep/otx_ep_ethdev.c |  69 +-
 drivers/net/octeon_ep/otx_ep_rxtx.c   | 263 +++---
 drivers/net/octeon_ep/otx_ep_rxtx.h   |  38 +++-
 drivers/net/octeon_ep/otx_ep_vf.c |   8 +
 11 files changed, 804 insertions(+), 257 deletions(-)
 create mode 100644 drivers/net/octeon_ep/cnxk_ep_rx.c
 create mode 100644 drivers/net/octeon_ep/cnxk_ep_tx.c

-- 
2.25.1



[PATCH 1/3] net/octeon_ep: support 32B IQ descriptor size

2023-10-10 Thread Vamsi Attunuru
From: Shijith Thotton 

Update input queue setup to consider descriptor size in driver conf.
The default instruction size for otx2 and cnxk devices has been updated
to 32 bytes.

Signed-off-by: Shijith Thotton 
---
 drivers/net/octeon_ep/cnxk_ep_vf.c| 10 +-
 drivers/net/octeon_ep/otx2_ep_vf.c| 10 +-
 drivers/net/octeon_ep/otx_ep_common.h |  4 
 drivers/net/octeon_ep/otx_ep_rxtx.c   |  8 +++-
 drivers/net/octeon_ep/otx_ep_vf.c |  8 
 5 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.c 
b/drivers/net/octeon_ep/cnxk_ep_vf.c
index 92c2d2ca5c..7b3669fe0c 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.c
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.c
@@ -106,6 +106,14 @@ cnxk_ep_vf_setup_iq_regs(struct otx_ep_device *otx_ep, 
uint32_t iq_no)
return -EIO;
}
 
+   /* Configure input queue instruction size. */
+   if (otx_ep->conf->iq.instr_type == OTX_EP_32BYTE_INSTR)
+   reg_val &= ~(CNXK_EP_R_IN_CTL_IS_64B);
+   else
+   reg_val |= CNXK_EP_R_IN_CTL_IS_64B;
+   oct_ep_write64(reg_val, otx_ep->hw_addr + CNXK_EP_R_IN_CONTROL(iq_no));
+   iq->desc_size = otx_ep->conf->iq.instr_type;
+
/* Write the start of the input queue's ring and its size  */
oct_ep_write64(iq->base_addr_dma, otx_ep->hw_addr + 
CNXK_EP_R_IN_INSTR_BADDR(iq_no));
oct_ep_write64(iq->nb_desc, otx_ep->hw_addr + 
CNXK_EP_R_IN_INSTR_RSIZE(iq_no));
@@ -354,7 +362,7 @@ static const struct otx_ep_config default_cnxk_ep_conf = {
/* IQ attributes */
.iq= {
.max_iqs   = OTX_EP_CFG_IO_QUEUES,
-   .instr_type= OTX_EP_64BYTE_INSTR,
+   .instr_type= OTX_EP_32BYTE_INSTR,
.pending_list_size = (OTX_EP_MAX_IQ_DESCRIPTORS *
  OTX_EP_CFG_IO_QUEUES),
},
diff --git a/drivers/net/octeon_ep/otx2_ep_vf.c 
b/drivers/net/octeon_ep/otx2_ep_vf.c
index ced3a415a5..f72b8d25d7 100644
--- a/drivers/net/octeon_ep/otx2_ep_vf.c
+++ b/drivers/net/octeon_ep/otx2_ep_vf.c
@@ -256,6 +256,14 @@ otx2_vf_setup_iq_regs(struct otx_ep_device *otx_ep, 
uint32_t iq_no)
return -EIO;
}
 
+   /* Configure input queue instruction size. */
+   if (otx_ep->conf->iq.instr_type == OTX_EP_32BYTE_INSTR)
+   reg_val &= ~(SDP_VF_R_IN_CTL_IS_64B);
+   else
+   reg_val |= SDP_VF_R_IN_CTL_IS_64B;
+   oct_ep_write64(reg_val, otx_ep->hw_addr + SDP_VF_R_IN_CONTROL(iq_no));
+   iq->desc_size = otx_ep->conf->iq.instr_type;
+
/* Write the start of the input queue's ring and its size  */
oct_ep_write64(iq->base_addr_dma, otx_ep->hw_addr + 
SDP_VF_R_IN_INSTR_BADDR(iq_no));
oct_ep_write64(iq->nb_desc, otx_ep->hw_addr + 
SDP_VF_R_IN_INSTR_RSIZE(iq_no));
@@ -500,7 +508,7 @@ static const struct otx_ep_config default_otx2_ep_conf = {
/* IQ attributes */
.iq= {
.max_iqs   = OTX_EP_CFG_IO_QUEUES,
-   .instr_type= OTX_EP_64BYTE_INSTR,
+   .instr_type= OTX_EP_32BYTE_INSTR,
.pending_list_size = (OTX_EP_MAX_IQ_DESCRIPTORS *
  OTX_EP_CFG_IO_QUEUES),
},
diff --git a/drivers/net/octeon_ep/otx_ep_common.h 
b/drivers/net/octeon_ep/otx_ep_common.h
index c150cbe619..90e059cad0 100644
--- a/drivers/net/octeon_ep/otx_ep_common.h
+++ b/drivers/net/octeon_ep/otx_ep_common.h
@@ -11,6 +11,7 @@
 
 #define OTX_EP_MAX_RINGS_PER_VF(8)
 #define OTX_EP_CFG_IO_QUEUESOTX_EP_MAX_RINGS_PER_VF
+#define OTX_EP_32BYTE_INSTR (32)
 #define OTX_EP_64BYTE_INSTR (64)
 /*
  * Backpressure for SDP is configured on Octeon, and the minimum queue sizes
@@ -215,6 +216,9 @@ struct otx_ep_instr_queue {
/* Number of  descriptors in this ring. */
uint32_t nb_desc;
 
+   /* Size of the descriptor. */
+   uint8_t desc_size;
+
/* Input ring index, where the driver should write the next packet */
uint32_t host_write_index;
 
diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.c 
b/drivers/net/octeon_ep/otx_ep_rxtx.c
index b37fc8109f..5b759d759b 100644
--- a/drivers/net/octeon_ep/otx_ep_rxtx.c
+++ b/drivers/net/octeon_ep/otx_ep_rxtx.c
@@ -484,7 +484,7 @@ otx_ep_ring_doorbell(struct otx_ep_device *otx_ep 
__rte_unused,
 static inline int
 post_iqcmd(struct otx_ep_instr_queue *iq, uint8_t *iqcmd)
 {
-   uint8_t *iqptr, cmdsize;
+   uint8_t *iqptr;
 
/* This ensures that the read index does not wrap around to
 * the same position if queue gets full before OCTEON 9 could
@@ -494,10 +494,8 @@ post_iqcmd(struct otx_ep_instr_queue *iq, uint8_t *iqcmd)
return OTX_EP_IQ_SEND_FAILED;
 
/* Copy cmd into iq */
-   cmdsize = 64;
-   iqptr   = iq->base_addr + (iq->host_write_index << 6

[PATCH 2/3] net/octeon_ep: clean up receive routine

2023-10-10 Thread Vamsi Attunuru
Patch improves Rx routine and pkt count update routines,
packet count update routines need to drain inflight ISM
memory updates while decrementing the packet count register.

Signed-off-by: Vamsi Attunuru 
---
 drivers/net/octeon_ep/otx_ep_rxtx.c | 162 
 1 file changed, 68 insertions(+), 94 deletions(-)

diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.c 
b/drivers/net/octeon_ep/otx_ep_rxtx.c
index 5b759d759b..ea7c9a5d62 100644
--- a/drivers/net/octeon_ep/otx_ep_rxtx.c
+++ b/drivers/net/octeon_ep/otx_ep_rxtx.c
@@ -442,7 +442,14 @@ otx_vf_update_read_index(struct otx_ep_instr_queue *iq)
 * when count above halfway to saturation.
 */
rte_write32(val, iq->inst_cnt_reg);
-   *iq->inst_cnt_ism = 0;
+   rte_mb();
+
+   rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+   while (__atomic_load_n(iq->inst_cnt_ism, __ATOMIC_RELAXED) >= 
val) {
+   rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+   rte_mb();
+   }
+
iq->inst_cnt_ism_prev = 0;
}
rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
@@ -565,9 +572,7 @@ prepare_xmit_gather_list(struct otx_ep_instr_queue *iq, 
struct rte_mbuf *m, uint
 
finfo = &iq->req_list[iq->host_write_index].finfo;
*dptr = rte_mem_virt2iova(finfo->g.sg);
-   ih->s.tlen = pkt_len + ih->s.fsz;
-   ih->s.gsz = frags;
-   ih->s.gather = 1;
+   ih->u64 |= ((1ULL << 62) | ((uint64_t)frags << 48) | (pkt_len + 
ih->s.fsz));
 
while (frags--) {
finfo->g.sg[(j >> 2)].ptr[(j & mask)] = rte_mbuf_data_iova(m);
@@ -750,36 +755,26 @@ otx2_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, 
uint16_t nb_pkts)
 static uint32_t
 otx_ep_droq_refill(struct otx_ep_droq *droq)
 {
-   struct otx_ep_droq_desc *desc_ring;
+   struct otx_ep_droq_desc *desc_ring = droq->desc_ring;
struct otx_ep_droq_info *info;
struct rte_mbuf *buf = NULL;
uint32_t desc_refilled = 0;
 
-   desc_ring = droq->desc_ring;
-
while (droq->refill_count && (desc_refilled < droq->nb_desc)) {
-   /* If a valid buffer exists (happens if there is no dispatch),
-* reuse the buffer, else allocate.
-*/
-   if (droq->recv_buf_list[droq->refill_idx] != NULL)
-   break;
-
buf = rte_pktmbuf_alloc(droq->mpool);
/* If a buffer could not be allocated, no point in
 * continuing
 */
-   if (buf == NULL) {
+   if (unlikely(!buf)) {
droq->stats.rx_alloc_failure++;
break;
}
info = rte_pktmbuf_mtod(buf, struct otx_ep_droq_info *);
-   memset(info, 0, sizeof(*info));
+   info->length = 0;
 
droq->recv_buf_list[droq->refill_idx] = buf;
desc_ring[droq->refill_idx].buffer_ptr =
rte_mbuf_data_iova_default(buf);
-
-
droq->refill_idx = otx_ep_incr_index(droq->refill_idx, 1,
droq->nb_desc);
 
@@ -791,21 +786,18 @@ otx_ep_droq_refill(struct otx_ep_droq *droq)
 }
 
 static struct rte_mbuf *
-otx_ep_droq_read_packet(struct otx_ep_device *otx_ep,
-   struct otx_ep_droq *droq, int next_fetch)
+otx_ep_droq_read_packet(struct otx_ep_device *otx_ep, struct otx_ep_droq 
*droq, int next_fetch)
 {
volatile struct otx_ep_droq_info *info;
-   struct rte_mbuf *droq_pkt2 = NULL;
-   struct rte_mbuf *droq_pkt = NULL;
-   struct rte_net_hdr_lens hdr_lens;
-   struct otx_ep_droq_info *info2;
+   struct rte_mbuf *mbuf_next = NULL;
+   struct rte_mbuf *mbuf = NULL;
uint64_t total_pkt_len;
uint32_t pkt_len = 0;
int next_idx;
 
-   droq_pkt  = droq->recv_buf_list[droq->read_idx];
-   droq_pkt2  = droq->recv_buf_list[droq->read_idx];
-   info = rte_pktmbuf_mtod(droq_pkt, struct otx_ep_droq_info *);
+   mbuf = droq->recv_buf_list[droq->read_idx];
+   info = rte_pktmbuf_mtod(mbuf, struct otx_ep_droq_info *);
+
/* make sure info is available */
rte_rmb();
if (unlikely(!info->length)) {
@@ -826,32 +818,25 @@ otx_ep_droq_read_packet(struct otx_ep_device *otx_ep,
assert(0);
}
}
+
if (next_fetch) {
next_idx = otx_ep_incr_index(droq->read_idx, 1, droq->nb_desc);
-   droq_pkt2  = droq->recv_buf_list[next_idx];
-   info2 = rte_pktmbuf_mtod(droq_pkt2, struct otx_ep_droq_info *);
-   rte_prefetch_non_temporal((const 

[PATCH 3/3] net/octeon_ep: add new fastpath routines

2023-10-10 Thread Vamsi Attunuru
Adds new fastpath routines for cn10k & cn9k endpoint
devices and assigns the fastpath routines based on
the offload flags.

Patch also adds misc changes to improve performance
and code-readability.

Signed-off-by: Vamsi Attunuru 
---
 drivers/net/octeon_ep/cnxk_ep_rx.c| 309 ++
 drivers/net/octeon_ep/cnxk_ep_tx.c| 209 +
 drivers/net/octeon_ep/cnxk_ep_vf.c|   2 +
 drivers/net/octeon_ep/cnxk_ep_vf.h|  13 ++
 drivers/net/octeon_ep/meson.build |   2 +
 drivers/net/octeon_ep/otx2_ep_vf.c|   1 +
 drivers/net/octeon_ep/otx_ep_common.h | 125 ++-
 drivers/net/octeon_ep/otx_ep_ethdev.c |  69 +-
 drivers/net/octeon_ep/otx_ep_rxtx.c   |  93 +---
 drivers/net/octeon_ep/otx_ep_rxtx.h   |  38 +++-
 10 files changed, 704 insertions(+), 157 deletions(-)

diff --git a/drivers/net/octeon_ep/cnxk_ep_rx.c 
b/drivers/net/octeon_ep/cnxk_ep_rx.c
new file mode 100644
index 00..74f0011283
--- /dev/null
+++ b/drivers/net/octeon_ep/cnxk_ep_rx.c
@@ -0,0 +1,309 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2023 Marvell.
+ */
+
+#include "otx_ep_common.h"
+#include "otx2_ep_vf.h"
+#include "otx_ep_rxtx.h"
+
+static inline int
+cnxk_ep_rx_refill_mbuf(struct otx_ep_droq *droq, uint32_t count)
+{
+   struct otx_ep_droq_desc *desc_ring = droq->desc_ring;
+   struct rte_mbuf **recv_buf_list = droq->recv_buf_list;
+   uint32_t refill_idx = droq->refill_idx;
+   struct rte_mbuf *buf;
+   uint32_t i;
+   int rc;
+
+   rc = rte_pktmbuf_alloc_bulk(droq->mpool, &recv_buf_list[refill_idx], 
count);
+   if (unlikely(rc)) {
+   droq->stats.rx_alloc_failure++;
+   return rc;
+   }
+
+   for (i = 0; i < count; i++) {
+   buf = recv_buf_list[refill_idx];
+   desc_ring[refill_idx].buffer_ptr = 
rte_mbuf_data_iova_default(buf);
+   refill_idx++;
+   }
+
+   droq->refill_idx = otx_ep_incr_index(droq->refill_idx, count, 
droq->nb_desc);
+   droq->refill_count -= count;
+
+   return 0;
+}
+
+static inline void
+cnxk_ep_rx_refill(struct otx_ep_droq *droq)
+{
+   uint32_t desc_refilled = 0, count;
+   uint32_t nb_desc = droq->nb_desc;
+   uint32_t refill_idx = droq->refill_idx;
+   int rc;
+
+   if (unlikely(droq->read_idx == refill_idx))
+   return;
+
+   if (refill_idx < droq->read_idx) {
+   count = droq->read_idx - refill_idx;
+   rc = cnxk_ep_rx_refill_mbuf(droq, count);
+   if (unlikely(rc)) {
+   droq->stats.rx_alloc_failure++;
+   return;
+   }
+   desc_refilled = count;
+   } else {
+   count = nb_desc - refill_idx;
+   rc = cnxk_ep_rx_refill_mbuf(droq, count);
+   if (unlikely(rc)) {
+   droq->stats.rx_alloc_failure++;
+   return;
+   }
+
+   desc_refilled = count;
+   count = droq->read_idx;
+   rc = cnxk_ep_rx_refill_mbuf(droq, count);
+   if (unlikely(rc)) {
+   droq->stats.rx_alloc_failure++;
+   return;
+   }
+   desc_refilled += count;
+   }
+
+   /* Flush the droq descriptor data to memory to be sure
+* that when we update the credits the data in memory is
+* accurate.
+*/
+   rte_io_wmb();
+   rte_write32(desc_refilled, droq->pkts_credit_reg);
+}
+
+static inline uint32_t
+cnxk_ep_check_rx_pkts(struct otx_ep_droq *droq)
+{
+   uint32_t new_pkts;
+   uint32_t val;
+
+   /* Batch subtractions from the HW counter to reduce PCIe traffic
+* This adds an extra local variable, but almost halves the
+* number of PCIe writes.
+*/
+   val = __atomic_load_n(droq->pkts_sent_ism, __ATOMIC_RELAXED);
+   new_pkts = val - droq->pkts_sent_ism_prev;
+   droq->pkts_sent_ism_prev = val;
+
+   if (val > (uint32_t)(1 << 31)) {
+   /* Only subtract the packet count in the HW counter
+* when count above halfway to saturation.
+*/
+   rte_write64((uint64_t)val, droq->pkts_sent_reg);
+   rte_mb();
+
+   rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+   while (__atomic_load_n(droq->pkts_sent_ism, __ATOMIC_RELAXED) 
>= val) {
+   rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+   rte_mb();
+   }
+
+   droq->pkts_sent_ism_prev = 0;
+   }
+   rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+   droq->pkts_pending += new_pkts;
+
+   return new_pkts;
+}
+
+static inline int16_t __rte_hot
+cnxk_ep_rx

[PATCH v2 0/3] rewrite fastpath routines

2023-10-11 Thread Vamsi Attunuru
This series adds new fastpath routines for cn10k & cn9k endpoint
devices and supports 32B Tx desciptor format which improves the
performance.

v2 change:
- Fixed CI

Shijith Thotton (1):
  net/octeon_ep: support 32B IQ descriptor size

Vamsi Attunuru (2):
  net/octeon_ep: clean up receive routine
  net/octeon_ep: add new fastpath routines

 drivers/net/octeon_ep/cnxk_ep_rx.c| 309 ++
 drivers/net/octeon_ep/cnxk_ep_tx.c| 209 +
 drivers/net/octeon_ep/cnxk_ep_vf.c|  12 +-
 drivers/net/octeon_ep/cnxk_ep_vf.h|  13 ++
 drivers/net/octeon_ep/meson.build |  12 +
 drivers/net/octeon_ep/otx2_ep_vf.c|  11 +-
 drivers/net/octeon_ep/otx_ep_common.h | 127 ++-
 drivers/net/octeon_ep/otx_ep_ethdev.c |  69 +-
 drivers/net/octeon_ep/otx_ep_rxtx.c   | 263 +++---
 drivers/net/octeon_ep/otx_ep_rxtx.h   |  38 +++-
 drivers/net/octeon_ep/otx_ep_vf.c |   8 +
 11 files changed, 814 insertions(+), 257 deletions(-)
 create mode 100644 drivers/net/octeon_ep/cnxk_ep_rx.c
 create mode 100644 drivers/net/octeon_ep/cnxk_ep_tx.c

-- 
2.25.1



[PATCH v2 1/3] net/octeon_ep: support 32B IQ descriptor size

2023-10-11 Thread Vamsi Attunuru
From: Shijith Thotton 

Update input queue setup to consider descriptor size in driver conf.
The default instruction size for otx2 and cnxk devices has been updated
to 32 bytes.

Signed-off-by: Shijith Thotton 
---
 drivers/net/octeon_ep/cnxk_ep_vf.c| 10 +-
 drivers/net/octeon_ep/meson.build | 10 ++
 drivers/net/octeon_ep/otx2_ep_vf.c| 10 +-
 drivers/net/octeon_ep/otx_ep_common.h |  4 
 drivers/net/octeon_ep/otx_ep_rxtx.c   |  8 +++-
 drivers/net/octeon_ep/otx_ep_vf.c |  8 
 6 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.c 
b/drivers/net/octeon_ep/cnxk_ep_vf.c
index 92c2d2ca5c..7b3669fe0c 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.c
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.c
@@ -106,6 +106,14 @@ cnxk_ep_vf_setup_iq_regs(struct otx_ep_device *otx_ep, 
uint32_t iq_no)
return -EIO;
}
 
+   /* Configure input queue instruction size. */
+   if (otx_ep->conf->iq.instr_type == OTX_EP_32BYTE_INSTR)
+   reg_val &= ~(CNXK_EP_R_IN_CTL_IS_64B);
+   else
+   reg_val |= CNXK_EP_R_IN_CTL_IS_64B;
+   oct_ep_write64(reg_val, otx_ep->hw_addr + CNXK_EP_R_IN_CONTROL(iq_no));
+   iq->desc_size = otx_ep->conf->iq.instr_type;
+
/* Write the start of the input queue's ring and its size  */
oct_ep_write64(iq->base_addr_dma, otx_ep->hw_addr + 
CNXK_EP_R_IN_INSTR_BADDR(iq_no));
oct_ep_write64(iq->nb_desc, otx_ep->hw_addr + 
CNXK_EP_R_IN_INSTR_RSIZE(iq_no));
@@ -354,7 +362,7 @@ static const struct otx_ep_config default_cnxk_ep_conf = {
/* IQ attributes */
.iq= {
.max_iqs   = OTX_EP_CFG_IO_QUEUES,
-   .instr_type= OTX_EP_64BYTE_INSTR,
+   .instr_type= OTX_EP_32BYTE_INSTR,
.pending_list_size = (OTX_EP_MAX_IQ_DESCRIPTORS *
  OTX_EP_CFG_IO_QUEUES),
},
diff --git a/drivers/net/octeon_ep/meson.build 
b/drivers/net/octeon_ep/meson.build
index e698bf9792..4538c0396e 100644
--- a/drivers/net/octeon_ep/meson.build
+++ b/drivers/net/octeon_ep/meson.build
@@ -10,3 +10,13 @@ sources = files(
 'cnxk_ep_vf.c',
 'otx_ep_mbox.c',
 )
+
+if (toolchain == 'gcc' and cc.version().version_compare('>=11.0.0'))
+error_cflags += ['-Wno-array-bounds']
+endif
+
+foreach flag: error_cflags
+if cc.has_argument(flag)
+c_args += flag
+endif
+endforeach
diff --git a/drivers/net/octeon_ep/otx2_ep_vf.c 
b/drivers/net/octeon_ep/otx2_ep_vf.c
index ced3a415a5..f72b8d25d7 100644
--- a/drivers/net/octeon_ep/otx2_ep_vf.c
+++ b/drivers/net/octeon_ep/otx2_ep_vf.c
@@ -256,6 +256,14 @@ otx2_vf_setup_iq_regs(struct otx_ep_device *otx_ep, 
uint32_t iq_no)
return -EIO;
}
 
+   /* Configure input queue instruction size. */
+   if (otx_ep->conf->iq.instr_type == OTX_EP_32BYTE_INSTR)
+   reg_val &= ~(SDP_VF_R_IN_CTL_IS_64B);
+   else
+   reg_val |= SDP_VF_R_IN_CTL_IS_64B;
+   oct_ep_write64(reg_val, otx_ep->hw_addr + SDP_VF_R_IN_CONTROL(iq_no));
+   iq->desc_size = otx_ep->conf->iq.instr_type;
+
/* Write the start of the input queue's ring and its size  */
oct_ep_write64(iq->base_addr_dma, otx_ep->hw_addr + 
SDP_VF_R_IN_INSTR_BADDR(iq_no));
oct_ep_write64(iq->nb_desc, otx_ep->hw_addr + 
SDP_VF_R_IN_INSTR_RSIZE(iq_no));
@@ -500,7 +508,7 @@ static const struct otx_ep_config default_otx2_ep_conf = {
/* IQ attributes */
.iq= {
.max_iqs   = OTX_EP_CFG_IO_QUEUES,
-   .instr_type= OTX_EP_64BYTE_INSTR,
+   .instr_type= OTX_EP_32BYTE_INSTR,
.pending_list_size = (OTX_EP_MAX_IQ_DESCRIPTORS *
  OTX_EP_CFG_IO_QUEUES),
},
diff --git a/drivers/net/octeon_ep/otx_ep_common.h 
b/drivers/net/octeon_ep/otx_ep_common.h
index c150cbe619..90e059cad0 100644
--- a/drivers/net/octeon_ep/otx_ep_common.h
+++ b/drivers/net/octeon_ep/otx_ep_common.h
@@ -11,6 +11,7 @@
 
 #define OTX_EP_MAX_RINGS_PER_VF(8)
 #define OTX_EP_CFG_IO_QUEUESOTX_EP_MAX_RINGS_PER_VF
+#define OTX_EP_32BYTE_INSTR (32)
 #define OTX_EP_64BYTE_INSTR (64)
 /*
  * Backpressure for SDP is configured on Octeon, and the minimum queue sizes
@@ -215,6 +216,9 @@ struct otx_ep_instr_queue {
/* Number of  descriptors in this ring. */
uint32_t nb_desc;
 
+   /* Size of the descriptor. */
+   uint8_t desc_size;
+
/* Input ring index, where the driver should write the next packet */
uint32_t host_write_index;
 
diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.c 
b/drivers/net/octeon_ep/otx_ep_rxtx.c
index b37fc8109f..5b759d759b 100644
--- a/drivers/net/octeon_ep/otx_ep_rxtx.c
+++ b/drivers/net/octeon_ep/otx_ep_rxtx.c
@@ -484,7 +484,7 @@ otx_ep_ri

[PATCH v2 2/3] net/octeon_ep: clean up receive routine

2023-10-11 Thread Vamsi Attunuru
Patch improves Rx routine and pkt count update routines,
packet count update routines need to drain inflight ISM
memory updates while decrementing the packet count register.

Signed-off-by: Vamsi Attunuru 
---
 drivers/net/octeon_ep/otx_ep_rxtx.c | 162 
 1 file changed, 68 insertions(+), 94 deletions(-)

diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.c 
b/drivers/net/octeon_ep/otx_ep_rxtx.c
index 5b759d759b..ea7c9a5d62 100644
--- a/drivers/net/octeon_ep/otx_ep_rxtx.c
+++ b/drivers/net/octeon_ep/otx_ep_rxtx.c
@@ -442,7 +442,14 @@ otx_vf_update_read_index(struct otx_ep_instr_queue *iq)
 * when count above halfway to saturation.
 */
rte_write32(val, iq->inst_cnt_reg);
-   *iq->inst_cnt_ism = 0;
+   rte_mb();
+
+   rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+   while (__atomic_load_n(iq->inst_cnt_ism, __ATOMIC_RELAXED) >= 
val) {
+   rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+   rte_mb();
+   }
+
iq->inst_cnt_ism_prev = 0;
}
rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
@@ -565,9 +572,7 @@ prepare_xmit_gather_list(struct otx_ep_instr_queue *iq, 
struct rte_mbuf *m, uint
 
finfo = &iq->req_list[iq->host_write_index].finfo;
*dptr = rte_mem_virt2iova(finfo->g.sg);
-   ih->s.tlen = pkt_len + ih->s.fsz;
-   ih->s.gsz = frags;
-   ih->s.gather = 1;
+   ih->u64 |= ((1ULL << 62) | ((uint64_t)frags << 48) | (pkt_len + 
ih->s.fsz));
 
while (frags--) {
finfo->g.sg[(j >> 2)].ptr[(j & mask)] = rte_mbuf_data_iova(m);
@@ -750,36 +755,26 @@ otx2_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, 
uint16_t nb_pkts)
 static uint32_t
 otx_ep_droq_refill(struct otx_ep_droq *droq)
 {
-   struct otx_ep_droq_desc *desc_ring;
+   struct otx_ep_droq_desc *desc_ring = droq->desc_ring;
struct otx_ep_droq_info *info;
struct rte_mbuf *buf = NULL;
uint32_t desc_refilled = 0;
 
-   desc_ring = droq->desc_ring;
-
while (droq->refill_count && (desc_refilled < droq->nb_desc)) {
-   /* If a valid buffer exists (happens if there is no dispatch),
-* reuse the buffer, else allocate.
-*/
-   if (droq->recv_buf_list[droq->refill_idx] != NULL)
-   break;
-
buf = rte_pktmbuf_alloc(droq->mpool);
/* If a buffer could not be allocated, no point in
 * continuing
 */
-   if (buf == NULL) {
+   if (unlikely(!buf)) {
droq->stats.rx_alloc_failure++;
break;
}
info = rte_pktmbuf_mtod(buf, struct otx_ep_droq_info *);
-   memset(info, 0, sizeof(*info));
+   info->length = 0;
 
droq->recv_buf_list[droq->refill_idx] = buf;
desc_ring[droq->refill_idx].buffer_ptr =
rte_mbuf_data_iova_default(buf);
-
-
droq->refill_idx = otx_ep_incr_index(droq->refill_idx, 1,
droq->nb_desc);
 
@@ -791,21 +786,18 @@ otx_ep_droq_refill(struct otx_ep_droq *droq)
 }
 
 static struct rte_mbuf *
-otx_ep_droq_read_packet(struct otx_ep_device *otx_ep,
-   struct otx_ep_droq *droq, int next_fetch)
+otx_ep_droq_read_packet(struct otx_ep_device *otx_ep, struct otx_ep_droq 
*droq, int next_fetch)
 {
volatile struct otx_ep_droq_info *info;
-   struct rte_mbuf *droq_pkt2 = NULL;
-   struct rte_mbuf *droq_pkt = NULL;
-   struct rte_net_hdr_lens hdr_lens;
-   struct otx_ep_droq_info *info2;
+   struct rte_mbuf *mbuf_next = NULL;
+   struct rte_mbuf *mbuf = NULL;
uint64_t total_pkt_len;
uint32_t pkt_len = 0;
int next_idx;
 
-   droq_pkt  = droq->recv_buf_list[droq->read_idx];
-   droq_pkt2  = droq->recv_buf_list[droq->read_idx];
-   info = rte_pktmbuf_mtod(droq_pkt, struct otx_ep_droq_info *);
+   mbuf = droq->recv_buf_list[droq->read_idx];
+   info = rte_pktmbuf_mtod(mbuf, struct otx_ep_droq_info *);
+
/* make sure info is available */
rte_rmb();
if (unlikely(!info->length)) {
@@ -826,32 +818,25 @@ otx_ep_droq_read_packet(struct otx_ep_device *otx_ep,
assert(0);
}
}
+
if (next_fetch) {
next_idx = otx_ep_incr_index(droq->read_idx, 1, droq->nb_desc);
-   droq_pkt2  = droq->recv_buf_list[next_idx];
-   info2 = rte_pktmbuf_mtod(droq_pkt2, struct otx_ep_droq_info *);
-   rte_prefetch_non_temporal((const 

[PATCH v2 3/3] net/octeon_ep: add new fastpath routines

2023-10-11 Thread Vamsi Attunuru
Adds new fastpath routines for cn10k & cn9k endpoint
devices and assigns the fastpath routines based on
the offload flags.

Patch also adds misc changes to improve performance
and code-readability.

Signed-off-by: Vamsi Attunuru 
---
 drivers/net/octeon_ep/cnxk_ep_rx.c| 309 ++
 drivers/net/octeon_ep/cnxk_ep_tx.c| 209 +
 drivers/net/octeon_ep/cnxk_ep_vf.c|   2 +
 drivers/net/octeon_ep/cnxk_ep_vf.h|  13 ++
 drivers/net/octeon_ep/meson.build |   2 +
 drivers/net/octeon_ep/otx2_ep_vf.c|   1 +
 drivers/net/octeon_ep/otx_ep_common.h | 125 ++-
 drivers/net/octeon_ep/otx_ep_ethdev.c |  69 +-
 drivers/net/octeon_ep/otx_ep_rxtx.c   |  93 +---
 drivers/net/octeon_ep/otx_ep_rxtx.h   |  38 +++-
 10 files changed, 704 insertions(+), 157 deletions(-)

diff --git a/drivers/net/octeon_ep/cnxk_ep_rx.c 
b/drivers/net/octeon_ep/cnxk_ep_rx.c
new file mode 100644
index 00..74f0011283
--- /dev/null
+++ b/drivers/net/octeon_ep/cnxk_ep_rx.c
@@ -0,0 +1,309 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2023 Marvell.
+ */
+
+#include "otx_ep_common.h"
+#include "otx2_ep_vf.h"
+#include "otx_ep_rxtx.h"
+
+static inline int
+cnxk_ep_rx_refill_mbuf(struct otx_ep_droq *droq, uint32_t count)
+{
+   struct otx_ep_droq_desc *desc_ring = droq->desc_ring;
+   struct rte_mbuf **recv_buf_list = droq->recv_buf_list;
+   uint32_t refill_idx = droq->refill_idx;
+   struct rte_mbuf *buf;
+   uint32_t i;
+   int rc;
+
+   rc = rte_pktmbuf_alloc_bulk(droq->mpool, &recv_buf_list[refill_idx], 
count);
+   if (unlikely(rc)) {
+   droq->stats.rx_alloc_failure++;
+   return rc;
+   }
+
+   for (i = 0; i < count; i++) {
+   buf = recv_buf_list[refill_idx];
+   desc_ring[refill_idx].buffer_ptr = 
rte_mbuf_data_iova_default(buf);
+   refill_idx++;
+   }
+
+   droq->refill_idx = otx_ep_incr_index(droq->refill_idx, count, 
droq->nb_desc);
+   droq->refill_count -= count;
+
+   return 0;
+}
+
+static inline void
+cnxk_ep_rx_refill(struct otx_ep_droq *droq)
+{
+   uint32_t desc_refilled = 0, count;
+   uint32_t nb_desc = droq->nb_desc;
+   uint32_t refill_idx = droq->refill_idx;
+   int rc;
+
+   if (unlikely(droq->read_idx == refill_idx))
+   return;
+
+   if (refill_idx < droq->read_idx) {
+   count = droq->read_idx - refill_idx;
+   rc = cnxk_ep_rx_refill_mbuf(droq, count);
+   if (unlikely(rc)) {
+   droq->stats.rx_alloc_failure++;
+   return;
+   }
+   desc_refilled = count;
+   } else {
+   count = nb_desc - refill_idx;
+   rc = cnxk_ep_rx_refill_mbuf(droq, count);
+   if (unlikely(rc)) {
+   droq->stats.rx_alloc_failure++;
+   return;
+   }
+
+   desc_refilled = count;
+   count = droq->read_idx;
+   rc = cnxk_ep_rx_refill_mbuf(droq, count);
+   if (unlikely(rc)) {
+   droq->stats.rx_alloc_failure++;
+   return;
+   }
+   desc_refilled += count;
+   }
+
+   /* Flush the droq descriptor data to memory to be sure
+* that when we update the credits the data in memory is
+* accurate.
+*/
+   rte_io_wmb();
+   rte_write32(desc_refilled, droq->pkts_credit_reg);
+}
+
+static inline uint32_t
+cnxk_ep_check_rx_pkts(struct otx_ep_droq *droq)
+{
+   uint32_t new_pkts;
+   uint32_t val;
+
+   /* Batch subtractions from the HW counter to reduce PCIe traffic
+* This adds an extra local variable, but almost halves the
+* number of PCIe writes.
+*/
+   val = __atomic_load_n(droq->pkts_sent_ism, __ATOMIC_RELAXED);
+   new_pkts = val - droq->pkts_sent_ism_prev;
+   droq->pkts_sent_ism_prev = val;
+
+   if (val > (uint32_t)(1 << 31)) {
+   /* Only subtract the packet count in the HW counter
+* when count above halfway to saturation.
+*/
+   rte_write64((uint64_t)val, droq->pkts_sent_reg);
+   rte_mb();
+
+   rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+   while (__atomic_load_n(droq->pkts_sent_ism, __ATOMIC_RELAXED) 
>= val) {
+   rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+   rte_mb();
+   }
+
+   droq->pkts_sent_ism_prev = 0;
+   }
+   rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+   droq->pkts_pending += new_pkts;
+
+   return new_pkts;
+}
+
+static inline int16_t __rte_hot
+cnxk_ep_rx

[PATCH v3 0/3] rewrite fastpath routines

2023-10-11 Thread Vamsi Attunuru
This series adds new fastpath routines for cn10k & cn9k endpoint
devices and supports 32B Tx desciptor format which improves the
performance.

v2 & v3 changes:
- Fixed CI

Shijith Thotton (1):
  net/octeon_ep: support 32B IQ descriptor size

Vamsi Attunuru (2):
  net/octeon_ep: clean up receive routine
  net/octeon_ep: add new fastpath routines

 drivers/net/octeon_ep/cnxk_ep_rx.c| 309 ++
 drivers/net/octeon_ep/cnxk_ep_tx.c| 209 +
 drivers/net/octeon_ep/cnxk_ep_vf.c|  12 +-
 drivers/net/octeon_ep/cnxk_ep_vf.h|  13 ++
 drivers/net/octeon_ep/meson.build |   2 +
 drivers/net/octeon_ep/otx2_ep_vf.c|  11 +-
 drivers/net/octeon_ep/otx_ep_common.h | 127 ++-
 drivers/net/octeon_ep/otx_ep_ethdev.c |  69 +-
 drivers/net/octeon_ep/otx_ep_rxtx.c   | 255 +++--
 drivers/net/octeon_ep/otx_ep_rxtx.h   |  38 +++-
 drivers/net/octeon_ep/otx_ep_vf.c |   8 +
 11 files changed, 801 insertions(+), 252 deletions(-)
 create mode 100644 drivers/net/octeon_ep/cnxk_ep_rx.c
 create mode 100644 drivers/net/octeon_ep/cnxk_ep_tx.c

-- 
2.25.1



[PATCH v3 1/3] net/octeon_ep: support 32B IQ descriptor size

2023-10-11 Thread Vamsi Attunuru
From: Shijith Thotton 

Update input queue setup to consider descriptor size in driver conf.
The default instruction size for otx2 and cnxk devices has been updated
to 32 bytes.

Signed-off-by: Shijith Thotton 
---
 drivers/net/octeon_ep/cnxk_ep_vf.c| 10 +-
 drivers/net/octeon_ep/otx2_ep_vf.c| 10 +-
 drivers/net/octeon_ep/otx_ep_common.h |  4 
 drivers/net/octeon_ep/otx_ep_vf.c |  8 
 4 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.c 
b/drivers/net/octeon_ep/cnxk_ep_vf.c
index 92c2d2ca5c..7b3669fe0c 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.c
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.c
@@ -106,6 +106,14 @@ cnxk_ep_vf_setup_iq_regs(struct otx_ep_device *otx_ep, 
uint32_t iq_no)
return -EIO;
}
 
+   /* Configure input queue instruction size. */
+   if (otx_ep->conf->iq.instr_type == OTX_EP_32BYTE_INSTR)
+   reg_val &= ~(CNXK_EP_R_IN_CTL_IS_64B);
+   else
+   reg_val |= CNXK_EP_R_IN_CTL_IS_64B;
+   oct_ep_write64(reg_val, otx_ep->hw_addr + CNXK_EP_R_IN_CONTROL(iq_no));
+   iq->desc_size = otx_ep->conf->iq.instr_type;
+
/* Write the start of the input queue's ring and its size  */
oct_ep_write64(iq->base_addr_dma, otx_ep->hw_addr + 
CNXK_EP_R_IN_INSTR_BADDR(iq_no));
oct_ep_write64(iq->nb_desc, otx_ep->hw_addr + 
CNXK_EP_R_IN_INSTR_RSIZE(iq_no));
@@ -354,7 +362,7 @@ static const struct otx_ep_config default_cnxk_ep_conf = {
/* IQ attributes */
.iq= {
.max_iqs   = OTX_EP_CFG_IO_QUEUES,
-   .instr_type= OTX_EP_64BYTE_INSTR,
+   .instr_type= OTX_EP_32BYTE_INSTR,
.pending_list_size = (OTX_EP_MAX_IQ_DESCRIPTORS *
  OTX_EP_CFG_IO_QUEUES),
},
diff --git a/drivers/net/octeon_ep/otx2_ep_vf.c 
b/drivers/net/octeon_ep/otx2_ep_vf.c
index ced3a415a5..f72b8d25d7 100644
--- a/drivers/net/octeon_ep/otx2_ep_vf.c
+++ b/drivers/net/octeon_ep/otx2_ep_vf.c
@@ -256,6 +256,14 @@ otx2_vf_setup_iq_regs(struct otx_ep_device *otx_ep, 
uint32_t iq_no)
return -EIO;
}
 
+   /* Configure input queue instruction size. */
+   if (otx_ep->conf->iq.instr_type == OTX_EP_32BYTE_INSTR)
+   reg_val &= ~(SDP_VF_R_IN_CTL_IS_64B);
+   else
+   reg_val |= SDP_VF_R_IN_CTL_IS_64B;
+   oct_ep_write64(reg_val, otx_ep->hw_addr + SDP_VF_R_IN_CONTROL(iq_no));
+   iq->desc_size = otx_ep->conf->iq.instr_type;
+
/* Write the start of the input queue's ring and its size  */
oct_ep_write64(iq->base_addr_dma, otx_ep->hw_addr + 
SDP_VF_R_IN_INSTR_BADDR(iq_no));
oct_ep_write64(iq->nb_desc, otx_ep->hw_addr + 
SDP_VF_R_IN_INSTR_RSIZE(iq_no));
@@ -500,7 +508,7 @@ static const struct otx_ep_config default_otx2_ep_conf = {
/* IQ attributes */
.iq= {
.max_iqs   = OTX_EP_CFG_IO_QUEUES,
-   .instr_type= OTX_EP_64BYTE_INSTR,
+   .instr_type= OTX_EP_32BYTE_INSTR,
.pending_list_size = (OTX_EP_MAX_IQ_DESCRIPTORS *
  OTX_EP_CFG_IO_QUEUES),
},
diff --git a/drivers/net/octeon_ep/otx_ep_common.h 
b/drivers/net/octeon_ep/otx_ep_common.h
index c150cbe619..90e059cad0 100644
--- a/drivers/net/octeon_ep/otx_ep_common.h
+++ b/drivers/net/octeon_ep/otx_ep_common.h
@@ -11,6 +11,7 @@
 
 #define OTX_EP_MAX_RINGS_PER_VF(8)
 #define OTX_EP_CFG_IO_QUEUESOTX_EP_MAX_RINGS_PER_VF
+#define OTX_EP_32BYTE_INSTR (32)
 #define OTX_EP_64BYTE_INSTR (64)
 /*
  * Backpressure for SDP is configured on Octeon, and the minimum queue sizes
@@ -215,6 +216,9 @@ struct otx_ep_instr_queue {
/* Number of  descriptors in this ring. */
uint32_t nb_desc;
 
+   /* Size of the descriptor. */
+   uint8_t desc_size;
+
/* Input ring index, where the driver should write the next packet */
uint32_t host_write_index;
 
diff --git a/drivers/net/octeon_ep/otx_ep_vf.c 
b/drivers/net/octeon_ep/otx_ep_vf.c
index 4f3538146b..236b7a874c 100644
--- a/drivers/net/octeon_ep/otx_ep_vf.c
+++ b/drivers/net/octeon_ep/otx_ep_vf.c
@@ -120,6 +120,14 @@ otx_ep_setup_iq_regs(struct otx_ep_device *otx_ep, 
uint32_t iq_no)
return -EIO;
}
 
+   /* Configure input queue instruction size. */
+   if (iq->desc_size == OTX_EP_32BYTE_INSTR)
+   reg_val &= ~(OTX_EP_R_IN_CTL_IS_64B);
+   else
+   reg_val |= OTX_EP_R_IN_CTL_IS_64B;
+   oct_ep_write64(reg_val, otx_ep->hw_addr + OTX_EP_R_IN_CONTROL(iq_no));
+   iq->desc_size = otx_ep->conf->iq.instr_type;
+
/* Write the start of the input queue's ring and its size  */
otx_ep_write64(iq->base_addr_dma, otx_ep->hw_addr,
   OTX_EP_R_IN_INSTR_

[PATCH v3 2/3] net/octeon_ep: clean up receive routine

2023-10-11 Thread Vamsi Attunuru
Patch improves Rx routine and pkt count update routines,
packet count update routines need to drain inflight ISM
memory updates while decrementing the packet count register.

Signed-off-by: Vamsi Attunuru 
---
 drivers/net/octeon_ep/otx_ep_rxtx.c | 162 
 1 file changed, 68 insertions(+), 94 deletions(-)

diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.c 
b/drivers/net/octeon_ep/otx_ep_rxtx.c
index b37fc8109f..4c509a419f 100644
--- a/drivers/net/octeon_ep/otx_ep_rxtx.c
+++ b/drivers/net/octeon_ep/otx_ep_rxtx.c
@@ -442,7 +442,14 @@ otx_vf_update_read_index(struct otx_ep_instr_queue *iq)
 * when count above halfway to saturation.
 */
rte_write32(val, iq->inst_cnt_reg);
-   *iq->inst_cnt_ism = 0;
+   rte_mb();
+
+   rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+   while (__atomic_load_n(iq->inst_cnt_ism, __ATOMIC_RELAXED) >= 
val) {
+   rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+   rte_mb();
+   }
+
iq->inst_cnt_ism_prev = 0;
}
rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
@@ -567,9 +574,7 @@ prepare_xmit_gather_list(struct otx_ep_instr_queue *iq, 
struct rte_mbuf *m, uint
 
finfo = &iq->req_list[iq->host_write_index].finfo;
*dptr = rte_mem_virt2iova(finfo->g.sg);
-   ih->s.tlen = pkt_len + ih->s.fsz;
-   ih->s.gsz = frags;
-   ih->s.gather = 1;
+   ih->u64 |= ((1ULL << 62) | ((uint64_t)frags << 48) | (pkt_len + 
ih->s.fsz));
 
while (frags--) {
finfo->g.sg[(j >> 2)].ptr[(j & mask)] = rte_mbuf_data_iova(m);
@@ -752,36 +757,26 @@ otx2_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, 
uint16_t nb_pkts)
 static uint32_t
 otx_ep_droq_refill(struct otx_ep_droq *droq)
 {
-   struct otx_ep_droq_desc *desc_ring;
+   struct otx_ep_droq_desc *desc_ring = droq->desc_ring;
struct otx_ep_droq_info *info;
struct rte_mbuf *buf = NULL;
uint32_t desc_refilled = 0;
 
-   desc_ring = droq->desc_ring;
-
while (droq->refill_count && (desc_refilled < droq->nb_desc)) {
-   /* If a valid buffer exists (happens if there is no dispatch),
-* reuse the buffer, else allocate.
-*/
-   if (droq->recv_buf_list[droq->refill_idx] != NULL)
-   break;
-
buf = rte_pktmbuf_alloc(droq->mpool);
/* If a buffer could not be allocated, no point in
 * continuing
 */
-   if (buf == NULL) {
+   if (unlikely(!buf)) {
droq->stats.rx_alloc_failure++;
break;
}
info = rte_pktmbuf_mtod(buf, struct otx_ep_droq_info *);
-   memset(info, 0, sizeof(*info));
+   info->length = 0;
 
droq->recv_buf_list[droq->refill_idx] = buf;
desc_ring[droq->refill_idx].buffer_ptr =
rte_mbuf_data_iova_default(buf);
-
-
droq->refill_idx = otx_ep_incr_index(droq->refill_idx, 1,
droq->nb_desc);
 
@@ -793,21 +788,18 @@ otx_ep_droq_refill(struct otx_ep_droq *droq)
 }
 
 static struct rte_mbuf *
-otx_ep_droq_read_packet(struct otx_ep_device *otx_ep,
-   struct otx_ep_droq *droq, int next_fetch)
+otx_ep_droq_read_packet(struct otx_ep_device *otx_ep, struct otx_ep_droq 
*droq, int next_fetch)
 {
volatile struct otx_ep_droq_info *info;
-   struct rte_mbuf *droq_pkt2 = NULL;
-   struct rte_mbuf *droq_pkt = NULL;
-   struct rte_net_hdr_lens hdr_lens;
-   struct otx_ep_droq_info *info2;
+   struct rte_mbuf *mbuf_next = NULL;
+   struct rte_mbuf *mbuf = NULL;
uint64_t total_pkt_len;
uint32_t pkt_len = 0;
int next_idx;
 
-   droq_pkt  = droq->recv_buf_list[droq->read_idx];
-   droq_pkt2  = droq->recv_buf_list[droq->read_idx];
-   info = rte_pktmbuf_mtod(droq_pkt, struct otx_ep_droq_info *);
+   mbuf = droq->recv_buf_list[droq->read_idx];
+   info = rte_pktmbuf_mtod(mbuf, struct otx_ep_droq_info *);
+
/* make sure info is available */
rte_rmb();
if (unlikely(!info->length)) {
@@ -828,32 +820,25 @@ otx_ep_droq_read_packet(struct otx_ep_device *otx_ep,
assert(0);
}
}
+
if (next_fetch) {
next_idx = otx_ep_incr_index(droq->read_idx, 1, droq->nb_desc);
-   droq_pkt2  = droq->recv_buf_list[next_idx];
-   info2 = rte_pktmbuf_mtod(droq_pkt2, struct otx_ep_droq_info *);
-   rte_prefetch_non_temporal((const 

[PATCH v3 3/3] net/octeon_ep: add new fastpath routines

2023-10-11 Thread Vamsi Attunuru
Adds new fastpath routines for cn10k & cn9k endpoint
devices and assigns the fastpath routines based on
the offload flags.

Patch also adds misc changes to improve performance
and code-readability.

Signed-off-by: Vamsi Attunuru 
---
 drivers/net/octeon_ep/cnxk_ep_rx.c| 309 ++
 drivers/net/octeon_ep/cnxk_ep_tx.c| 209 +
 drivers/net/octeon_ep/cnxk_ep_vf.c|   2 +
 drivers/net/octeon_ep/cnxk_ep_vf.h|  13 ++
 drivers/net/octeon_ep/meson.build |   2 +
 drivers/net/octeon_ep/otx2_ep_vf.c|   1 +
 drivers/net/octeon_ep/otx_ep_common.h | 125 ++-
 drivers/net/octeon_ep/otx_ep_ethdev.c |  69 +-
 drivers/net/octeon_ep/otx_ep_rxtx.c   |  93 +---
 drivers/net/octeon_ep/otx_ep_rxtx.h   |  38 +++-
 10 files changed, 704 insertions(+), 157 deletions(-)

diff --git a/drivers/net/octeon_ep/cnxk_ep_rx.c 
b/drivers/net/octeon_ep/cnxk_ep_rx.c
new file mode 100644
index 00..74f0011283
--- /dev/null
+++ b/drivers/net/octeon_ep/cnxk_ep_rx.c
@@ -0,0 +1,309 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2023 Marvell.
+ */
+
+#include "otx_ep_common.h"
+#include "otx2_ep_vf.h"
+#include "otx_ep_rxtx.h"
+
+static inline int
+cnxk_ep_rx_refill_mbuf(struct otx_ep_droq *droq, uint32_t count)
+{
+   struct otx_ep_droq_desc *desc_ring = droq->desc_ring;
+   struct rte_mbuf **recv_buf_list = droq->recv_buf_list;
+   uint32_t refill_idx = droq->refill_idx;
+   struct rte_mbuf *buf;
+   uint32_t i;
+   int rc;
+
+   rc = rte_pktmbuf_alloc_bulk(droq->mpool, &recv_buf_list[refill_idx], 
count);
+   if (unlikely(rc)) {
+   droq->stats.rx_alloc_failure++;
+   return rc;
+   }
+
+   for (i = 0; i < count; i++) {
+   buf = recv_buf_list[refill_idx];
+   desc_ring[refill_idx].buffer_ptr = 
rte_mbuf_data_iova_default(buf);
+   refill_idx++;
+   }
+
+   droq->refill_idx = otx_ep_incr_index(droq->refill_idx, count, 
droq->nb_desc);
+   droq->refill_count -= count;
+
+   return 0;
+}
+
+static inline void
+cnxk_ep_rx_refill(struct otx_ep_droq *droq)
+{
+   uint32_t desc_refilled = 0, count;
+   uint32_t nb_desc = droq->nb_desc;
+   uint32_t refill_idx = droq->refill_idx;
+   int rc;
+
+   if (unlikely(droq->read_idx == refill_idx))
+   return;
+
+   if (refill_idx < droq->read_idx) {
+   count = droq->read_idx - refill_idx;
+   rc = cnxk_ep_rx_refill_mbuf(droq, count);
+   if (unlikely(rc)) {
+   droq->stats.rx_alloc_failure++;
+   return;
+   }
+   desc_refilled = count;
+   } else {
+   count = nb_desc - refill_idx;
+   rc = cnxk_ep_rx_refill_mbuf(droq, count);
+   if (unlikely(rc)) {
+   droq->stats.rx_alloc_failure++;
+   return;
+   }
+
+   desc_refilled = count;
+   count = droq->read_idx;
+   rc = cnxk_ep_rx_refill_mbuf(droq, count);
+   if (unlikely(rc)) {
+   droq->stats.rx_alloc_failure++;
+   return;
+   }
+   desc_refilled += count;
+   }
+
+   /* Flush the droq descriptor data to memory to be sure
+* that when we update the credits the data in memory is
+* accurate.
+*/
+   rte_io_wmb();
+   rte_write32(desc_refilled, droq->pkts_credit_reg);
+}
+
+static inline uint32_t
+cnxk_ep_check_rx_pkts(struct otx_ep_droq *droq)
+{
+   uint32_t new_pkts;
+   uint32_t val;
+
+   /* Batch subtractions from the HW counter to reduce PCIe traffic
+* This adds an extra local variable, but almost halves the
+* number of PCIe writes.
+*/
+   val = __atomic_load_n(droq->pkts_sent_ism, __ATOMIC_RELAXED);
+   new_pkts = val - droq->pkts_sent_ism_prev;
+   droq->pkts_sent_ism_prev = val;
+
+   if (val > (uint32_t)(1 << 31)) {
+   /* Only subtract the packet count in the HW counter
+* when count above halfway to saturation.
+*/
+   rte_write64((uint64_t)val, droq->pkts_sent_reg);
+   rte_mb();
+
+   rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+   while (__atomic_load_n(droq->pkts_sent_ism, __ATOMIC_RELAXED) 
>= val) {
+   rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+   rte_mb();
+   }
+
+   droq->pkts_sent_ism_prev = 0;
+   }
+   rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+   droq->pkts_pending += new_pkts;
+
+   return new_pkts;
+}
+
+static inline int16_t __rte_hot
+cnxk_ep_rx

[PATCH v4 0/3] rewrite fastpath routines

2023-10-11 Thread Vamsi Attunuru
This series adds new fastpath routines for cn10k & cn9k endpoint
devices and supports 32B Tx desciptor format which improves the
performance.

v4 changes:
- Use rte_atomic_xxx instead of __atomic_xxx built-ins

v2 & v3 changes:
- Fixed CI

Shijith Thotton (1):
  net/octeon_ep: support 32B IQ descriptor size

Vamsi Attunuru (2):
  net/octeon_ep: clean up receive routine
  net/octeon_ep: add new fastpath routines

 drivers/net/octeon_ep/cnxk_ep_rx.c| 310 ++
 drivers/net/octeon_ep/cnxk_ep_tx.c| 210 +
 drivers/net/octeon_ep/cnxk_ep_vf.c|  12 +-
 drivers/net/octeon_ep/cnxk_ep_vf.h|  13 ++
 drivers/net/octeon_ep/meson.build |   2 +
 drivers/net/octeon_ep/otx2_ep_vf.c|  11 +-
 drivers/net/octeon_ep/otx_ep_common.h | 127 ++-
 drivers/net/octeon_ep/otx_ep_ethdev.c |  69 +-
 drivers/net/octeon_ep/otx_ep_rxtx.c   | 257 +++--
 drivers/net/octeon_ep/otx_ep_rxtx.h   |  38 +++-
 drivers/net/octeon_ep/otx_ep_vf.c |   8 +
 11 files changed, 805 insertions(+), 252 deletions(-)
 create mode 100644 drivers/net/octeon_ep/cnxk_ep_rx.c
 create mode 100644 drivers/net/octeon_ep/cnxk_ep_tx.c

-- 
2.25.1



[PATCH v4 1/3] net/octeon_ep: support 32B IQ descriptor size

2023-10-11 Thread Vamsi Attunuru
From: Shijith Thotton 

Update input queue setup to consider descriptor size in driver conf.
The default instruction size for otx2 and cnxk devices has been updated
to 32 bytes.

Signed-off-by: Shijith Thotton 
---
 drivers/net/octeon_ep/cnxk_ep_vf.c| 10 +-
 drivers/net/octeon_ep/otx2_ep_vf.c| 10 +-
 drivers/net/octeon_ep/otx_ep_common.h |  4 
 drivers/net/octeon_ep/otx_ep_vf.c |  8 
 4 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.c 
b/drivers/net/octeon_ep/cnxk_ep_vf.c
index 92c2d2ca5c..7b3669fe0c 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.c
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.c
@@ -106,6 +106,14 @@ cnxk_ep_vf_setup_iq_regs(struct otx_ep_device *otx_ep, 
uint32_t iq_no)
return -EIO;
}
 
+   /* Configure input queue instruction size. */
+   if (otx_ep->conf->iq.instr_type == OTX_EP_32BYTE_INSTR)
+   reg_val &= ~(CNXK_EP_R_IN_CTL_IS_64B);
+   else
+   reg_val |= CNXK_EP_R_IN_CTL_IS_64B;
+   oct_ep_write64(reg_val, otx_ep->hw_addr + CNXK_EP_R_IN_CONTROL(iq_no));
+   iq->desc_size = otx_ep->conf->iq.instr_type;
+
/* Write the start of the input queue's ring and its size  */
oct_ep_write64(iq->base_addr_dma, otx_ep->hw_addr + 
CNXK_EP_R_IN_INSTR_BADDR(iq_no));
oct_ep_write64(iq->nb_desc, otx_ep->hw_addr + 
CNXK_EP_R_IN_INSTR_RSIZE(iq_no));
@@ -354,7 +362,7 @@ static const struct otx_ep_config default_cnxk_ep_conf = {
/* IQ attributes */
.iq= {
.max_iqs   = OTX_EP_CFG_IO_QUEUES,
-   .instr_type= OTX_EP_64BYTE_INSTR,
+   .instr_type= OTX_EP_32BYTE_INSTR,
.pending_list_size = (OTX_EP_MAX_IQ_DESCRIPTORS *
  OTX_EP_CFG_IO_QUEUES),
},
diff --git a/drivers/net/octeon_ep/otx2_ep_vf.c 
b/drivers/net/octeon_ep/otx2_ep_vf.c
index ced3a415a5..f72b8d25d7 100644
--- a/drivers/net/octeon_ep/otx2_ep_vf.c
+++ b/drivers/net/octeon_ep/otx2_ep_vf.c
@@ -256,6 +256,14 @@ otx2_vf_setup_iq_regs(struct otx_ep_device *otx_ep, 
uint32_t iq_no)
return -EIO;
}
 
+   /* Configure input queue instruction size. */
+   if (otx_ep->conf->iq.instr_type == OTX_EP_32BYTE_INSTR)
+   reg_val &= ~(SDP_VF_R_IN_CTL_IS_64B);
+   else
+   reg_val |= SDP_VF_R_IN_CTL_IS_64B;
+   oct_ep_write64(reg_val, otx_ep->hw_addr + SDP_VF_R_IN_CONTROL(iq_no));
+   iq->desc_size = otx_ep->conf->iq.instr_type;
+
/* Write the start of the input queue's ring and its size  */
oct_ep_write64(iq->base_addr_dma, otx_ep->hw_addr + 
SDP_VF_R_IN_INSTR_BADDR(iq_no));
oct_ep_write64(iq->nb_desc, otx_ep->hw_addr + 
SDP_VF_R_IN_INSTR_RSIZE(iq_no));
@@ -500,7 +508,7 @@ static const struct otx_ep_config default_otx2_ep_conf = {
/* IQ attributes */
.iq= {
.max_iqs   = OTX_EP_CFG_IO_QUEUES,
-   .instr_type= OTX_EP_64BYTE_INSTR,
+   .instr_type= OTX_EP_32BYTE_INSTR,
.pending_list_size = (OTX_EP_MAX_IQ_DESCRIPTORS *
  OTX_EP_CFG_IO_QUEUES),
},
diff --git a/drivers/net/octeon_ep/otx_ep_common.h 
b/drivers/net/octeon_ep/otx_ep_common.h
index c150cbe619..90e059cad0 100644
--- a/drivers/net/octeon_ep/otx_ep_common.h
+++ b/drivers/net/octeon_ep/otx_ep_common.h
@@ -11,6 +11,7 @@
 
 #define OTX_EP_MAX_RINGS_PER_VF(8)
 #define OTX_EP_CFG_IO_QUEUESOTX_EP_MAX_RINGS_PER_VF
+#define OTX_EP_32BYTE_INSTR (32)
 #define OTX_EP_64BYTE_INSTR (64)
 /*
  * Backpressure for SDP is configured on Octeon, and the minimum queue sizes
@@ -215,6 +216,9 @@ struct otx_ep_instr_queue {
/* Number of  descriptors in this ring. */
uint32_t nb_desc;
 
+   /* Size of the descriptor. */
+   uint8_t desc_size;
+
/* Input ring index, where the driver should write the next packet */
uint32_t host_write_index;
 
diff --git a/drivers/net/octeon_ep/otx_ep_vf.c 
b/drivers/net/octeon_ep/otx_ep_vf.c
index 4f3538146b..236b7a874c 100644
--- a/drivers/net/octeon_ep/otx_ep_vf.c
+++ b/drivers/net/octeon_ep/otx_ep_vf.c
@@ -120,6 +120,14 @@ otx_ep_setup_iq_regs(struct otx_ep_device *otx_ep, 
uint32_t iq_no)
return -EIO;
}
 
+   /* Configure input queue instruction size. */
+   if (iq->desc_size == OTX_EP_32BYTE_INSTR)
+   reg_val &= ~(OTX_EP_R_IN_CTL_IS_64B);
+   else
+   reg_val |= OTX_EP_R_IN_CTL_IS_64B;
+   oct_ep_write64(reg_val, otx_ep->hw_addr + OTX_EP_R_IN_CONTROL(iq_no));
+   iq->desc_size = otx_ep->conf->iq.instr_type;
+
/* Write the start of the input queue's ring and its size  */
otx_ep_write64(iq->base_addr_dma, otx_ep->hw_addr,
   OTX_EP_R_IN_INSTR_

[PATCH v4 2/3] net/octeon_ep: clean up receive routine

2023-10-11 Thread Vamsi Attunuru
Patch improves Rx routine and pkt count update routines,
packet count update routines need to drain inflight ISM
memory updates while decrementing the packet count register.

Signed-off-by: Vamsi Attunuru 
---
 drivers/net/octeon_ep/otx_ep_rxtx.c | 164 
 1 file changed, 70 insertions(+), 94 deletions(-)

diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.c 
b/drivers/net/octeon_ep/otx_ep_rxtx.c
index b37fc8109f..2654e13e18 100644
--- a/drivers/net/octeon_ep/otx_ep_rxtx.c
+++ b/drivers/net/octeon_ep/otx_ep_rxtx.c
@@ -442,7 +442,15 @@ otx_vf_update_read_index(struct otx_ep_instr_queue *iq)
 * when count above halfway to saturation.
 */
rte_write32(val, iq->inst_cnt_reg);
-   *iq->inst_cnt_ism = 0;
+   rte_mb();
+
+   rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+   while (rte_atomic_load_explicit(iq->inst_cnt_ism, 
rte_memory_order_relaxed) >=
+  val) {
+   rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+   rte_mb();
+   }
+
iq->inst_cnt_ism_prev = 0;
}
rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
@@ -567,9 +575,7 @@ prepare_xmit_gather_list(struct otx_ep_instr_queue *iq, 
struct rte_mbuf *m, uint
 
finfo = &iq->req_list[iq->host_write_index].finfo;
*dptr = rte_mem_virt2iova(finfo->g.sg);
-   ih->s.tlen = pkt_len + ih->s.fsz;
-   ih->s.gsz = frags;
-   ih->s.gather = 1;
+   ih->u64 |= ((1ULL << 62) | ((uint64_t)frags << 48) | (pkt_len + 
ih->s.fsz));
 
while (frags--) {
finfo->g.sg[(j >> 2)].ptr[(j & mask)] = rte_mbuf_data_iova(m);
@@ -752,36 +758,26 @@ otx2_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, 
uint16_t nb_pkts)
 static uint32_t
 otx_ep_droq_refill(struct otx_ep_droq *droq)
 {
-   struct otx_ep_droq_desc *desc_ring;
+   struct otx_ep_droq_desc *desc_ring = droq->desc_ring;
struct otx_ep_droq_info *info;
struct rte_mbuf *buf = NULL;
uint32_t desc_refilled = 0;
 
-   desc_ring = droq->desc_ring;
-
while (droq->refill_count && (desc_refilled < droq->nb_desc)) {
-   /* If a valid buffer exists (happens if there is no dispatch),
-* reuse the buffer, else allocate.
-*/
-   if (droq->recv_buf_list[droq->refill_idx] != NULL)
-   break;
-
buf = rte_pktmbuf_alloc(droq->mpool);
/* If a buffer could not be allocated, no point in
 * continuing
 */
-   if (buf == NULL) {
+   if (unlikely(!buf)) {
droq->stats.rx_alloc_failure++;
break;
}
info = rte_pktmbuf_mtod(buf, struct otx_ep_droq_info *);
-   memset(info, 0, sizeof(*info));
+   info->length = 0;
 
droq->recv_buf_list[droq->refill_idx] = buf;
desc_ring[droq->refill_idx].buffer_ptr =
rte_mbuf_data_iova_default(buf);
-
-
droq->refill_idx = otx_ep_incr_index(droq->refill_idx, 1,
droq->nb_desc);
 
@@ -793,21 +789,18 @@ otx_ep_droq_refill(struct otx_ep_droq *droq)
 }
 
 static struct rte_mbuf *
-otx_ep_droq_read_packet(struct otx_ep_device *otx_ep,
-   struct otx_ep_droq *droq, int next_fetch)
+otx_ep_droq_read_packet(struct otx_ep_device *otx_ep, struct otx_ep_droq 
*droq, int next_fetch)
 {
volatile struct otx_ep_droq_info *info;
-   struct rte_mbuf *droq_pkt2 = NULL;
-   struct rte_mbuf *droq_pkt = NULL;
-   struct rte_net_hdr_lens hdr_lens;
-   struct otx_ep_droq_info *info2;
+   struct rte_mbuf *mbuf_next = NULL;
+   struct rte_mbuf *mbuf = NULL;
uint64_t total_pkt_len;
uint32_t pkt_len = 0;
int next_idx;
 
-   droq_pkt  = droq->recv_buf_list[droq->read_idx];
-   droq_pkt2  = droq->recv_buf_list[droq->read_idx];
-   info = rte_pktmbuf_mtod(droq_pkt, struct otx_ep_droq_info *);
+   mbuf = droq->recv_buf_list[droq->read_idx];
+   info = rte_pktmbuf_mtod(mbuf, struct otx_ep_droq_info *);
+
/* make sure info is available */
rte_rmb();
if (unlikely(!info->length)) {
@@ -828,32 +821,25 @@ otx_ep_droq_read_packet(struct otx_ep_device *otx_ep,
assert(0);
}
}
+
if (next_fetch) {
next_idx = otx_ep_incr_index(droq->read_idx, 1, droq->nb_desc);
-   droq_pkt2  = droq->recv_buf_list[next_idx];
-   info2 = rte_pktmbuf_mtod(droq_pkt2, struct otx_ep_droq_info *);
-  

[PATCH v4 3/3] net/octeon_ep: add new fastpath routines

2023-10-11 Thread Vamsi Attunuru
Adds new fastpath routines for cn10k & cn9k endpoint
devices and assigns the fastpath routines based on
the offload flags.

Patch also adds misc changes to improve performance
and code-readability.

Signed-off-by: Vamsi Attunuru 
---
 drivers/net/octeon_ep/cnxk_ep_rx.c| 310 ++
 drivers/net/octeon_ep/cnxk_ep_tx.c| 210 +
 drivers/net/octeon_ep/cnxk_ep_vf.c|   2 +
 drivers/net/octeon_ep/cnxk_ep_vf.h|  13 ++
 drivers/net/octeon_ep/meson.build |   2 +
 drivers/net/octeon_ep/otx2_ep_vf.c|   1 +
 drivers/net/octeon_ep/otx_ep_common.h | 125 ++-
 drivers/net/octeon_ep/otx_ep_ethdev.c |  69 +-
 drivers/net/octeon_ep/otx_ep_rxtx.c   |  93 +---
 drivers/net/octeon_ep/otx_ep_rxtx.h   |  38 +++-
 10 files changed, 706 insertions(+), 157 deletions(-)

diff --git a/drivers/net/octeon_ep/cnxk_ep_rx.c 
b/drivers/net/octeon_ep/cnxk_ep_rx.c
new file mode 100644
index 00..22bf3ce7a7
--- /dev/null
+++ b/drivers/net/octeon_ep/cnxk_ep_rx.c
@@ -0,0 +1,310 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2023 Marvell.
+ */
+
+#include "otx_ep_common.h"
+#include "otx2_ep_vf.h"
+#include "otx_ep_rxtx.h"
+
+static inline int
+cnxk_ep_rx_refill_mbuf(struct otx_ep_droq *droq, uint32_t count)
+{
+   struct otx_ep_droq_desc *desc_ring = droq->desc_ring;
+   struct rte_mbuf **recv_buf_list = droq->recv_buf_list;
+   uint32_t refill_idx = droq->refill_idx;
+   struct rte_mbuf *buf;
+   uint32_t i;
+   int rc;
+
+   rc = rte_pktmbuf_alloc_bulk(droq->mpool, &recv_buf_list[refill_idx], 
count);
+   if (unlikely(rc)) {
+   droq->stats.rx_alloc_failure++;
+   return rc;
+   }
+
+   for (i = 0; i < count; i++) {
+   buf = recv_buf_list[refill_idx];
+   desc_ring[refill_idx].buffer_ptr = 
rte_mbuf_data_iova_default(buf);
+   refill_idx++;
+   }
+
+   droq->refill_idx = otx_ep_incr_index(droq->refill_idx, count, 
droq->nb_desc);
+   droq->refill_count -= count;
+
+   return 0;
+}
+
+static inline void
+cnxk_ep_rx_refill(struct otx_ep_droq *droq)
+{
+   uint32_t desc_refilled = 0, count;
+   uint32_t nb_desc = droq->nb_desc;
+   uint32_t refill_idx = droq->refill_idx;
+   int rc;
+
+   if (unlikely(droq->read_idx == refill_idx))
+   return;
+
+   if (refill_idx < droq->read_idx) {
+   count = droq->read_idx - refill_idx;
+   rc = cnxk_ep_rx_refill_mbuf(droq, count);
+   if (unlikely(rc)) {
+   droq->stats.rx_alloc_failure++;
+   return;
+   }
+   desc_refilled = count;
+   } else {
+   count = nb_desc - refill_idx;
+   rc = cnxk_ep_rx_refill_mbuf(droq, count);
+   if (unlikely(rc)) {
+   droq->stats.rx_alloc_failure++;
+   return;
+   }
+
+   desc_refilled = count;
+   count = droq->read_idx;
+   rc = cnxk_ep_rx_refill_mbuf(droq, count);
+   if (unlikely(rc)) {
+   droq->stats.rx_alloc_failure++;
+   return;
+   }
+   desc_refilled += count;
+   }
+
+   /* Flush the droq descriptor data to memory to be sure
+* that when we update the credits the data in memory is
+* accurate.
+*/
+   rte_io_wmb();
+   rte_write32(desc_refilled, droq->pkts_credit_reg);
+}
+
+static inline uint32_t
+cnxk_ep_check_rx_pkts(struct otx_ep_droq *droq)
+{
+   uint32_t new_pkts;
+   uint32_t val;
+
+   /* Batch subtractions from the HW counter to reduce PCIe traffic
+* This adds an extra local variable, but almost halves the
+* number of PCIe writes.
+*/
+   val = rte_atomic_load_explicit(droq->pkts_sent_ism, 
rte_memory_order_relaxed);
+   new_pkts = val - droq->pkts_sent_ism_prev;
+   droq->pkts_sent_ism_prev = val;
+
+   if (val > (uint32_t)(1 << 31)) {
+   /* Only subtract the packet count in the HW counter
+* when count above halfway to saturation.
+*/
+   rte_write64((uint64_t)val, droq->pkts_sent_reg);
+   rte_mb();
+
+   rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+   while (rte_atomic_load_explicit(droq->pkts_sent_ism, 
rte_memory_order_relaxed) >=
+  val) {
+   rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+   rte_mb();
+   }
+
+   droq->pkts_sent_ism_prev = 0;
+   }
+   rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+   droq->pkts_pending += new_pkts;
+
+   ret

[PATCH v5 0/3] rewrite fastpath routines

2023-10-18 Thread Vamsi Attunuru
This series adds new fastpath routines for cn10k & cn9k endpoint
devices and supports 32B Tx desciptor format which improves the
performance.

V5 changes:
- Series rebased

v4 changes:
- Use rte_atomic_xxx instead of __atomic_xxx built-ins

v2 & v3 changes:
- Fixed CI

Shijith Thotton (1):
  net/octeon_ep: support 32B IQ descriptor size

Vamsi Attunuru (2):
  net/octeon_ep: clean up receive routine
  net/octeon_ep: add new fastpath routines

 drivers/net/octeon_ep/cnxk_ep_rx.c| 310 ++
 drivers/net/octeon_ep/cnxk_ep_tx.c| 210 +
 drivers/net/octeon_ep/cnxk_ep_vf.c|  12 +-
 drivers/net/octeon_ep/cnxk_ep_vf.h|  13 ++
 drivers/net/octeon_ep/meson.build |   2 +
 drivers/net/octeon_ep/otx2_ep_vf.c|  11 +-
 drivers/net/octeon_ep/otx_ep_common.h | 127 ++-
 drivers/net/octeon_ep/otx_ep_ethdev.c |  69 +-
 drivers/net/octeon_ep/otx_ep_rxtx.c   | 257 +++--
 drivers/net/octeon_ep/otx_ep_rxtx.h   |  38 +++-
 drivers/net/octeon_ep/otx_ep_vf.c |   8 +
 11 files changed, 805 insertions(+), 252 deletions(-)
 create mode 100644 drivers/net/octeon_ep/cnxk_ep_rx.c
 create mode 100644 drivers/net/octeon_ep/cnxk_ep_tx.c

-- 
2.25.1



[PATCH v5 1/3] net/octeon_ep: support 32B IQ descriptor size

2023-10-18 Thread Vamsi Attunuru
From: Shijith Thotton 

Update input queue setup to consider descriptor size in driver conf.
The default instruction size for otx2 and cnxk devices has been updated
to 32 bytes.

Signed-off-by: Shijith Thotton 
---
 drivers/net/octeon_ep/cnxk_ep_vf.c| 10 +-
 drivers/net/octeon_ep/otx2_ep_vf.c| 10 +-
 drivers/net/octeon_ep/otx_ep_common.h |  4 
 drivers/net/octeon_ep/otx_ep_vf.c |  8 
 4 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.c 
b/drivers/net/octeon_ep/cnxk_ep_vf.c
index 92c2d2ca5c..7b3669fe0c 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.c
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.c
@@ -106,6 +106,14 @@ cnxk_ep_vf_setup_iq_regs(struct otx_ep_device *otx_ep, 
uint32_t iq_no)
return -EIO;
}
 
+   /* Configure input queue instruction size. */
+   if (otx_ep->conf->iq.instr_type == OTX_EP_32BYTE_INSTR)
+   reg_val &= ~(CNXK_EP_R_IN_CTL_IS_64B);
+   else
+   reg_val |= CNXK_EP_R_IN_CTL_IS_64B;
+   oct_ep_write64(reg_val, otx_ep->hw_addr + CNXK_EP_R_IN_CONTROL(iq_no));
+   iq->desc_size = otx_ep->conf->iq.instr_type;
+
/* Write the start of the input queue's ring and its size  */
oct_ep_write64(iq->base_addr_dma, otx_ep->hw_addr + 
CNXK_EP_R_IN_INSTR_BADDR(iq_no));
oct_ep_write64(iq->nb_desc, otx_ep->hw_addr + 
CNXK_EP_R_IN_INSTR_RSIZE(iq_no));
@@ -354,7 +362,7 @@ static const struct otx_ep_config default_cnxk_ep_conf = {
/* IQ attributes */
.iq= {
.max_iqs   = OTX_EP_CFG_IO_QUEUES,
-   .instr_type= OTX_EP_64BYTE_INSTR,
+   .instr_type= OTX_EP_32BYTE_INSTR,
.pending_list_size = (OTX_EP_MAX_IQ_DESCRIPTORS *
  OTX_EP_CFG_IO_QUEUES),
},
diff --git a/drivers/net/octeon_ep/otx2_ep_vf.c 
b/drivers/net/octeon_ep/otx2_ep_vf.c
index ced3a415a5..f72b8d25d7 100644
--- a/drivers/net/octeon_ep/otx2_ep_vf.c
+++ b/drivers/net/octeon_ep/otx2_ep_vf.c
@@ -256,6 +256,14 @@ otx2_vf_setup_iq_regs(struct otx_ep_device *otx_ep, 
uint32_t iq_no)
return -EIO;
}
 
+   /* Configure input queue instruction size. */
+   if (otx_ep->conf->iq.instr_type == OTX_EP_32BYTE_INSTR)
+   reg_val &= ~(SDP_VF_R_IN_CTL_IS_64B);
+   else
+   reg_val |= SDP_VF_R_IN_CTL_IS_64B;
+   oct_ep_write64(reg_val, otx_ep->hw_addr + SDP_VF_R_IN_CONTROL(iq_no));
+   iq->desc_size = otx_ep->conf->iq.instr_type;
+
/* Write the start of the input queue's ring and its size  */
oct_ep_write64(iq->base_addr_dma, otx_ep->hw_addr + 
SDP_VF_R_IN_INSTR_BADDR(iq_no));
oct_ep_write64(iq->nb_desc, otx_ep->hw_addr + 
SDP_VF_R_IN_INSTR_RSIZE(iq_no));
@@ -500,7 +508,7 @@ static const struct otx_ep_config default_otx2_ep_conf = {
/* IQ attributes */
.iq= {
.max_iqs   = OTX_EP_CFG_IO_QUEUES,
-   .instr_type= OTX_EP_64BYTE_INSTR,
+   .instr_type= OTX_EP_32BYTE_INSTR,
.pending_list_size = (OTX_EP_MAX_IQ_DESCRIPTORS *
  OTX_EP_CFG_IO_QUEUES),
},
diff --git a/drivers/net/octeon_ep/otx_ep_common.h 
b/drivers/net/octeon_ep/otx_ep_common.h
index c150cbe619..90e059cad0 100644
--- a/drivers/net/octeon_ep/otx_ep_common.h
+++ b/drivers/net/octeon_ep/otx_ep_common.h
@@ -11,6 +11,7 @@
 
 #define OTX_EP_MAX_RINGS_PER_VF(8)
 #define OTX_EP_CFG_IO_QUEUESOTX_EP_MAX_RINGS_PER_VF
+#define OTX_EP_32BYTE_INSTR (32)
 #define OTX_EP_64BYTE_INSTR (64)
 /*
  * Backpressure for SDP is configured on Octeon, and the minimum queue sizes
@@ -215,6 +216,9 @@ struct otx_ep_instr_queue {
/* Number of  descriptors in this ring. */
uint32_t nb_desc;
 
+   /* Size of the descriptor. */
+   uint8_t desc_size;
+
/* Input ring index, where the driver should write the next packet */
uint32_t host_write_index;
 
diff --git a/drivers/net/octeon_ep/otx_ep_vf.c 
b/drivers/net/octeon_ep/otx_ep_vf.c
index 4f3538146b..236b7a874c 100644
--- a/drivers/net/octeon_ep/otx_ep_vf.c
+++ b/drivers/net/octeon_ep/otx_ep_vf.c
@@ -120,6 +120,14 @@ otx_ep_setup_iq_regs(struct otx_ep_device *otx_ep, 
uint32_t iq_no)
return -EIO;
}
 
+   /* Configure input queue instruction size. */
+   if (iq->desc_size == OTX_EP_32BYTE_INSTR)
+   reg_val &= ~(OTX_EP_R_IN_CTL_IS_64B);
+   else
+   reg_val |= OTX_EP_R_IN_CTL_IS_64B;
+   oct_ep_write64(reg_val, otx_ep->hw_addr + OTX_EP_R_IN_CONTROL(iq_no));
+   iq->desc_size = otx_ep->conf->iq.instr_type;
+
/* Write the start of the input queue's ring and its size  */
otx_ep_write64(iq->base_addr_dma, otx_ep->hw_addr,
   OTX_EP_R_IN_INSTR_

[PATCH v5 2/3] net/octeon_ep: clean up receive routine

2023-10-18 Thread Vamsi Attunuru
Patch improves Rx routine and pkt count update routines,
packet count update routines need to drain inflight ISM
memory updates while decrementing the packet count register.

Signed-off-by: Vamsi Attunuru 
---
 drivers/net/octeon_ep/otx_ep_rxtx.c | 164 
 1 file changed, 70 insertions(+), 94 deletions(-)

diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.c 
b/drivers/net/octeon_ep/otx_ep_rxtx.c
index b37fc8109f..2654e13e18 100644
--- a/drivers/net/octeon_ep/otx_ep_rxtx.c
+++ b/drivers/net/octeon_ep/otx_ep_rxtx.c
@@ -442,7 +442,15 @@ otx_vf_update_read_index(struct otx_ep_instr_queue *iq)
 * when count above halfway to saturation.
 */
rte_write32(val, iq->inst_cnt_reg);
-   *iq->inst_cnt_ism = 0;
+   rte_mb();
+
+   rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+   while (rte_atomic_load_explicit(iq->inst_cnt_ism, 
rte_memory_order_relaxed) >=
+  val) {
+   rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+   rte_mb();
+   }
+
iq->inst_cnt_ism_prev = 0;
}
rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
@@ -567,9 +575,7 @@ prepare_xmit_gather_list(struct otx_ep_instr_queue *iq, 
struct rte_mbuf *m, uint
 
finfo = &iq->req_list[iq->host_write_index].finfo;
*dptr = rte_mem_virt2iova(finfo->g.sg);
-   ih->s.tlen = pkt_len + ih->s.fsz;
-   ih->s.gsz = frags;
-   ih->s.gather = 1;
+   ih->u64 |= ((1ULL << 62) | ((uint64_t)frags << 48) | (pkt_len + 
ih->s.fsz));
 
while (frags--) {
finfo->g.sg[(j >> 2)].ptr[(j & mask)] = rte_mbuf_data_iova(m);
@@ -752,36 +758,26 @@ otx2_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, 
uint16_t nb_pkts)
 static uint32_t
 otx_ep_droq_refill(struct otx_ep_droq *droq)
 {
-   struct otx_ep_droq_desc *desc_ring;
+   struct otx_ep_droq_desc *desc_ring = droq->desc_ring;
struct otx_ep_droq_info *info;
struct rte_mbuf *buf = NULL;
uint32_t desc_refilled = 0;
 
-   desc_ring = droq->desc_ring;
-
while (droq->refill_count && (desc_refilled < droq->nb_desc)) {
-   /* If a valid buffer exists (happens if there is no dispatch),
-* reuse the buffer, else allocate.
-*/
-   if (droq->recv_buf_list[droq->refill_idx] != NULL)
-   break;
-
buf = rte_pktmbuf_alloc(droq->mpool);
/* If a buffer could not be allocated, no point in
 * continuing
 */
-   if (buf == NULL) {
+   if (unlikely(!buf)) {
droq->stats.rx_alloc_failure++;
break;
}
info = rte_pktmbuf_mtod(buf, struct otx_ep_droq_info *);
-   memset(info, 0, sizeof(*info));
+   info->length = 0;
 
droq->recv_buf_list[droq->refill_idx] = buf;
desc_ring[droq->refill_idx].buffer_ptr =
rte_mbuf_data_iova_default(buf);
-
-
droq->refill_idx = otx_ep_incr_index(droq->refill_idx, 1,
droq->nb_desc);
 
@@ -793,21 +789,18 @@ otx_ep_droq_refill(struct otx_ep_droq *droq)
 }
 
 static struct rte_mbuf *
-otx_ep_droq_read_packet(struct otx_ep_device *otx_ep,
-   struct otx_ep_droq *droq, int next_fetch)
+otx_ep_droq_read_packet(struct otx_ep_device *otx_ep, struct otx_ep_droq 
*droq, int next_fetch)
 {
volatile struct otx_ep_droq_info *info;
-   struct rte_mbuf *droq_pkt2 = NULL;
-   struct rte_mbuf *droq_pkt = NULL;
-   struct rte_net_hdr_lens hdr_lens;
-   struct otx_ep_droq_info *info2;
+   struct rte_mbuf *mbuf_next = NULL;
+   struct rte_mbuf *mbuf = NULL;
uint64_t total_pkt_len;
uint32_t pkt_len = 0;
int next_idx;
 
-   droq_pkt  = droq->recv_buf_list[droq->read_idx];
-   droq_pkt2  = droq->recv_buf_list[droq->read_idx];
-   info = rte_pktmbuf_mtod(droq_pkt, struct otx_ep_droq_info *);
+   mbuf = droq->recv_buf_list[droq->read_idx];
+   info = rte_pktmbuf_mtod(mbuf, struct otx_ep_droq_info *);
+
/* make sure info is available */
rte_rmb();
if (unlikely(!info->length)) {
@@ -828,32 +821,25 @@ otx_ep_droq_read_packet(struct otx_ep_device *otx_ep,
assert(0);
}
}
+
if (next_fetch) {
next_idx = otx_ep_incr_index(droq->read_idx, 1, droq->nb_desc);
-   droq_pkt2  = droq->recv_buf_list[next_idx];
-   info2 = rte_pktmbuf_mtod(droq_pkt2, struct otx_ep_droq_info *);
-  

[PATCH v5 3/3] net/octeon_ep: add new fastpath routines

2023-10-18 Thread Vamsi Attunuru
Adds new fastpath routines for cn10k & cn9k endpoint
devices and assigns the fastpath routines based on
the offload flags.

Patch also adds misc changes to improve performance
and code-readability.

Signed-off-by: Vamsi Attunuru 
---
 drivers/net/octeon_ep/cnxk_ep_rx.c| 310 ++
 drivers/net/octeon_ep/cnxk_ep_tx.c| 210 +
 drivers/net/octeon_ep/cnxk_ep_vf.c|   2 +
 drivers/net/octeon_ep/cnxk_ep_vf.h|  13 ++
 drivers/net/octeon_ep/meson.build |   2 +
 drivers/net/octeon_ep/otx2_ep_vf.c|   1 +
 drivers/net/octeon_ep/otx_ep_common.h | 125 ++-
 drivers/net/octeon_ep/otx_ep_ethdev.c |  69 +-
 drivers/net/octeon_ep/otx_ep_rxtx.c   |  93 +---
 drivers/net/octeon_ep/otx_ep_rxtx.h   |  38 +++-
 10 files changed, 706 insertions(+), 157 deletions(-)

diff --git a/drivers/net/octeon_ep/cnxk_ep_rx.c 
b/drivers/net/octeon_ep/cnxk_ep_rx.c
new file mode 100644
index 00..22bf3ce7a7
--- /dev/null
+++ b/drivers/net/octeon_ep/cnxk_ep_rx.c
@@ -0,0 +1,310 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2023 Marvell.
+ */
+
+#include "otx_ep_common.h"
+#include "otx2_ep_vf.h"
+#include "otx_ep_rxtx.h"
+
+static inline int
+cnxk_ep_rx_refill_mbuf(struct otx_ep_droq *droq, uint32_t count)
+{
+   struct otx_ep_droq_desc *desc_ring = droq->desc_ring;
+   struct rte_mbuf **recv_buf_list = droq->recv_buf_list;
+   uint32_t refill_idx = droq->refill_idx;
+   struct rte_mbuf *buf;
+   uint32_t i;
+   int rc;
+
+   rc = rte_pktmbuf_alloc_bulk(droq->mpool, &recv_buf_list[refill_idx], 
count);
+   if (unlikely(rc)) {
+   droq->stats.rx_alloc_failure++;
+   return rc;
+   }
+
+   for (i = 0; i < count; i++) {
+   buf = recv_buf_list[refill_idx];
+   desc_ring[refill_idx].buffer_ptr = 
rte_mbuf_data_iova_default(buf);
+   refill_idx++;
+   }
+
+   droq->refill_idx = otx_ep_incr_index(droq->refill_idx, count, 
droq->nb_desc);
+   droq->refill_count -= count;
+
+   return 0;
+}
+
+static inline void
+cnxk_ep_rx_refill(struct otx_ep_droq *droq)
+{
+   uint32_t desc_refilled = 0, count;
+   uint32_t nb_desc = droq->nb_desc;
+   uint32_t refill_idx = droq->refill_idx;
+   int rc;
+
+   if (unlikely(droq->read_idx == refill_idx))
+   return;
+
+   if (refill_idx < droq->read_idx) {
+   count = droq->read_idx - refill_idx;
+   rc = cnxk_ep_rx_refill_mbuf(droq, count);
+   if (unlikely(rc)) {
+   droq->stats.rx_alloc_failure++;
+   return;
+   }
+   desc_refilled = count;
+   } else {
+   count = nb_desc - refill_idx;
+   rc = cnxk_ep_rx_refill_mbuf(droq, count);
+   if (unlikely(rc)) {
+   droq->stats.rx_alloc_failure++;
+   return;
+   }
+
+   desc_refilled = count;
+   count = droq->read_idx;
+   rc = cnxk_ep_rx_refill_mbuf(droq, count);
+   if (unlikely(rc)) {
+   droq->stats.rx_alloc_failure++;
+   return;
+   }
+   desc_refilled += count;
+   }
+
+   /* Flush the droq descriptor data to memory to be sure
+* that when we update the credits the data in memory is
+* accurate.
+*/
+   rte_io_wmb();
+   rte_write32(desc_refilled, droq->pkts_credit_reg);
+}
+
+static inline uint32_t
+cnxk_ep_check_rx_pkts(struct otx_ep_droq *droq)
+{
+   uint32_t new_pkts;
+   uint32_t val;
+
+   /* Batch subtractions from the HW counter to reduce PCIe traffic
+* This adds an extra local variable, but almost halves the
+* number of PCIe writes.
+*/
+   val = rte_atomic_load_explicit(droq->pkts_sent_ism, 
rte_memory_order_relaxed);
+   new_pkts = val - droq->pkts_sent_ism_prev;
+   droq->pkts_sent_ism_prev = val;
+
+   if (val > (uint32_t)(1 << 31)) {
+   /* Only subtract the packet count in the HW counter
+* when count above halfway to saturation.
+*/
+   rte_write64((uint64_t)val, droq->pkts_sent_reg);
+   rte_mb();
+
+   rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+   while (rte_atomic_load_explicit(droq->pkts_sent_ism, 
rte_memory_order_relaxed) >=
+  val) {
+   rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+   rte_mb();
+   }
+
+   droq->pkts_sent_ism_prev = 0;
+   }
+   rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+   droq->pkts_pending += new_pkts;
+
+   ret

[PATCH v6 0/3] rewrite fastpath routines

2023-10-18 Thread Vamsi Attunuru
This series adds new fastpath routines for cn10k & cn9k endpoint
devices and supports 32B Tx descriptor format which improves the
performance.

V6 changes:
- Use __atomic_xxx built-ins to fix CI build

V5 changes:
- Series rebased

v4 changes:
- Use rte_atomic_xxx instead of __atomic_xxx built-ins

v2 & v3 changes:
- Fixed CI

Shijith Thotton (1):
  net/octeon_ep: support 32B IQ descriptor size

Vamsi Attunuru (2):
  net/octeon_ep: clean up receive routine
  net/octeon_ep: add new fastpath routines

 drivers/net/octeon_ep/cnxk_ep_rx.c| 309 ++
 drivers/net/octeon_ep/cnxk_ep_tx.c| 209 +
 drivers/net/octeon_ep/cnxk_ep_vf.c|  12 +-
 drivers/net/octeon_ep/cnxk_ep_vf.h|  13 ++
 drivers/net/octeon_ep/meson.build |   2 +
 drivers/net/octeon_ep/otx2_ep_vf.c|  11 +-
 drivers/net/octeon_ep/otx_ep_common.h | 127 ++-
 drivers/net/octeon_ep/otx_ep_ethdev.c |  69 +-
 drivers/net/octeon_ep/otx_ep_rxtx.c   | 255 +++--
 drivers/net/octeon_ep/otx_ep_rxtx.h   |  38 +++-
 drivers/net/octeon_ep/otx_ep_vf.c |   8 +
 11 files changed, 801 insertions(+), 252 deletions(-)
 create mode 100644 drivers/net/octeon_ep/cnxk_ep_rx.c
 create mode 100644 drivers/net/octeon_ep/cnxk_ep_tx.c

-- 
2.25.1



[PATCH v6 1/3] net/octeon_ep: support 32B IQ descriptor size

2023-10-18 Thread Vamsi Attunuru
From: Shijith Thotton 

Update input queue setup to consider descriptor size in driver conf.
The default instruction size for otx2 and cnxk devices has been updated
to 32 bytes.

Signed-off-by: Shijith Thotton 
---
 drivers/net/octeon_ep/cnxk_ep_vf.c| 10 +-
 drivers/net/octeon_ep/otx2_ep_vf.c| 10 +-
 drivers/net/octeon_ep/otx_ep_common.h |  4 
 drivers/net/octeon_ep/otx_ep_vf.c |  8 
 4 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.c 
b/drivers/net/octeon_ep/cnxk_ep_vf.c
index 92c2d2ca5c..7b3669fe0c 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.c
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.c
@@ -106,6 +106,14 @@ cnxk_ep_vf_setup_iq_regs(struct otx_ep_device *otx_ep, 
uint32_t iq_no)
return -EIO;
}
 
+   /* Configure input queue instruction size. */
+   if (otx_ep->conf->iq.instr_type == OTX_EP_32BYTE_INSTR)
+   reg_val &= ~(CNXK_EP_R_IN_CTL_IS_64B);
+   else
+   reg_val |= CNXK_EP_R_IN_CTL_IS_64B;
+   oct_ep_write64(reg_val, otx_ep->hw_addr + CNXK_EP_R_IN_CONTROL(iq_no));
+   iq->desc_size = otx_ep->conf->iq.instr_type;
+
/* Write the start of the input queue's ring and its size  */
oct_ep_write64(iq->base_addr_dma, otx_ep->hw_addr + 
CNXK_EP_R_IN_INSTR_BADDR(iq_no));
oct_ep_write64(iq->nb_desc, otx_ep->hw_addr + 
CNXK_EP_R_IN_INSTR_RSIZE(iq_no));
@@ -354,7 +362,7 @@ static const struct otx_ep_config default_cnxk_ep_conf = {
/* IQ attributes */
.iq= {
.max_iqs   = OTX_EP_CFG_IO_QUEUES,
-   .instr_type= OTX_EP_64BYTE_INSTR,
+   .instr_type= OTX_EP_32BYTE_INSTR,
.pending_list_size = (OTX_EP_MAX_IQ_DESCRIPTORS *
  OTX_EP_CFG_IO_QUEUES),
},
diff --git a/drivers/net/octeon_ep/otx2_ep_vf.c 
b/drivers/net/octeon_ep/otx2_ep_vf.c
index ced3a415a5..f72b8d25d7 100644
--- a/drivers/net/octeon_ep/otx2_ep_vf.c
+++ b/drivers/net/octeon_ep/otx2_ep_vf.c
@@ -256,6 +256,14 @@ otx2_vf_setup_iq_regs(struct otx_ep_device *otx_ep, 
uint32_t iq_no)
return -EIO;
}
 
+   /* Configure input queue instruction size. */
+   if (otx_ep->conf->iq.instr_type == OTX_EP_32BYTE_INSTR)
+   reg_val &= ~(SDP_VF_R_IN_CTL_IS_64B);
+   else
+   reg_val |= SDP_VF_R_IN_CTL_IS_64B;
+   oct_ep_write64(reg_val, otx_ep->hw_addr + SDP_VF_R_IN_CONTROL(iq_no));
+   iq->desc_size = otx_ep->conf->iq.instr_type;
+
/* Write the start of the input queue's ring and its size  */
oct_ep_write64(iq->base_addr_dma, otx_ep->hw_addr + 
SDP_VF_R_IN_INSTR_BADDR(iq_no));
oct_ep_write64(iq->nb_desc, otx_ep->hw_addr + 
SDP_VF_R_IN_INSTR_RSIZE(iq_no));
@@ -500,7 +508,7 @@ static const struct otx_ep_config default_otx2_ep_conf = {
/* IQ attributes */
.iq= {
.max_iqs   = OTX_EP_CFG_IO_QUEUES,
-   .instr_type= OTX_EP_64BYTE_INSTR,
+   .instr_type= OTX_EP_32BYTE_INSTR,
.pending_list_size = (OTX_EP_MAX_IQ_DESCRIPTORS *
  OTX_EP_CFG_IO_QUEUES),
},
diff --git a/drivers/net/octeon_ep/otx_ep_common.h 
b/drivers/net/octeon_ep/otx_ep_common.h
index c150cbe619..90e059cad0 100644
--- a/drivers/net/octeon_ep/otx_ep_common.h
+++ b/drivers/net/octeon_ep/otx_ep_common.h
@@ -11,6 +11,7 @@
 
 #define OTX_EP_MAX_RINGS_PER_VF(8)
 #define OTX_EP_CFG_IO_QUEUESOTX_EP_MAX_RINGS_PER_VF
+#define OTX_EP_32BYTE_INSTR (32)
 #define OTX_EP_64BYTE_INSTR (64)
 /*
  * Backpressure for SDP is configured on Octeon, and the minimum queue sizes
@@ -215,6 +216,9 @@ struct otx_ep_instr_queue {
/* Number of  descriptors in this ring. */
uint32_t nb_desc;
 
+   /* Size of the descriptor. */
+   uint8_t desc_size;
+
/* Input ring index, where the driver should write the next packet */
uint32_t host_write_index;
 
diff --git a/drivers/net/octeon_ep/otx_ep_vf.c 
b/drivers/net/octeon_ep/otx_ep_vf.c
index 4f3538146b..236b7a874c 100644
--- a/drivers/net/octeon_ep/otx_ep_vf.c
+++ b/drivers/net/octeon_ep/otx_ep_vf.c
@@ -120,6 +120,14 @@ otx_ep_setup_iq_regs(struct otx_ep_device *otx_ep, 
uint32_t iq_no)
return -EIO;
}
 
+   /* Configure input queue instruction size. */
+   if (iq->desc_size == OTX_EP_32BYTE_INSTR)
+   reg_val &= ~(OTX_EP_R_IN_CTL_IS_64B);
+   else
+   reg_val |= OTX_EP_R_IN_CTL_IS_64B;
+   oct_ep_write64(reg_val, otx_ep->hw_addr + OTX_EP_R_IN_CONTROL(iq_no));
+   iq->desc_size = otx_ep->conf->iq.instr_type;
+
/* Write the start of the input queue's ring and its size  */
otx_ep_write64(iq->base_addr_dma, otx_ep->hw_addr,
   OTX_EP_R_IN_INSTR_

[PATCH v6 2/3] net/octeon_ep: clean up receive routine

2023-10-18 Thread Vamsi Attunuru
Patch improves Rx routine and pkt count update routines,
packet count update routines need to drain inflight ISM
memory updates while decrementing the packet count register.

Signed-off-by: Vamsi Attunuru 
---
 drivers/net/octeon_ep/otx_ep_rxtx.c | 162 
 1 file changed, 68 insertions(+), 94 deletions(-)

diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.c 
b/drivers/net/octeon_ep/otx_ep_rxtx.c
index b37fc8109f..4c509a419f 100644
--- a/drivers/net/octeon_ep/otx_ep_rxtx.c
+++ b/drivers/net/octeon_ep/otx_ep_rxtx.c
@@ -442,7 +442,14 @@ otx_vf_update_read_index(struct otx_ep_instr_queue *iq)
 * when count above halfway to saturation.
 */
rte_write32(val, iq->inst_cnt_reg);
-   *iq->inst_cnt_ism = 0;
+   rte_mb();
+
+   rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+   while (__atomic_load_n(iq->inst_cnt_ism, __ATOMIC_RELAXED) >= 
val) {
+   rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
+   rte_mb();
+   }
+
iq->inst_cnt_ism_prev = 0;
}
rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
@@ -567,9 +574,7 @@ prepare_xmit_gather_list(struct otx_ep_instr_queue *iq, 
struct rte_mbuf *m, uint
 
finfo = &iq->req_list[iq->host_write_index].finfo;
*dptr = rte_mem_virt2iova(finfo->g.sg);
-   ih->s.tlen = pkt_len + ih->s.fsz;
-   ih->s.gsz = frags;
-   ih->s.gather = 1;
+   ih->u64 |= ((1ULL << 62) | ((uint64_t)frags << 48) | (pkt_len + 
ih->s.fsz));
 
while (frags--) {
finfo->g.sg[(j >> 2)].ptr[(j & mask)] = rte_mbuf_data_iova(m);
@@ -752,36 +757,26 @@ otx2_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, 
uint16_t nb_pkts)
 static uint32_t
 otx_ep_droq_refill(struct otx_ep_droq *droq)
 {
-   struct otx_ep_droq_desc *desc_ring;
+   struct otx_ep_droq_desc *desc_ring = droq->desc_ring;
struct otx_ep_droq_info *info;
struct rte_mbuf *buf = NULL;
uint32_t desc_refilled = 0;
 
-   desc_ring = droq->desc_ring;
-
while (droq->refill_count && (desc_refilled < droq->nb_desc)) {
-   /* If a valid buffer exists (happens if there is no dispatch),
-* reuse the buffer, else allocate.
-*/
-   if (droq->recv_buf_list[droq->refill_idx] != NULL)
-   break;
-
buf = rte_pktmbuf_alloc(droq->mpool);
/* If a buffer could not be allocated, no point in
 * continuing
 */
-   if (buf == NULL) {
+   if (unlikely(!buf)) {
droq->stats.rx_alloc_failure++;
break;
}
info = rte_pktmbuf_mtod(buf, struct otx_ep_droq_info *);
-   memset(info, 0, sizeof(*info));
+   info->length = 0;
 
droq->recv_buf_list[droq->refill_idx] = buf;
desc_ring[droq->refill_idx].buffer_ptr =
rte_mbuf_data_iova_default(buf);
-
-
droq->refill_idx = otx_ep_incr_index(droq->refill_idx, 1,
droq->nb_desc);
 
@@ -793,21 +788,18 @@ otx_ep_droq_refill(struct otx_ep_droq *droq)
 }
 
 static struct rte_mbuf *
-otx_ep_droq_read_packet(struct otx_ep_device *otx_ep,
-   struct otx_ep_droq *droq, int next_fetch)
+otx_ep_droq_read_packet(struct otx_ep_device *otx_ep, struct otx_ep_droq 
*droq, int next_fetch)
 {
volatile struct otx_ep_droq_info *info;
-   struct rte_mbuf *droq_pkt2 = NULL;
-   struct rte_mbuf *droq_pkt = NULL;
-   struct rte_net_hdr_lens hdr_lens;
-   struct otx_ep_droq_info *info2;
+   struct rte_mbuf *mbuf_next = NULL;
+   struct rte_mbuf *mbuf = NULL;
uint64_t total_pkt_len;
uint32_t pkt_len = 0;
int next_idx;
 
-   droq_pkt  = droq->recv_buf_list[droq->read_idx];
-   droq_pkt2  = droq->recv_buf_list[droq->read_idx];
-   info = rte_pktmbuf_mtod(droq_pkt, struct otx_ep_droq_info *);
+   mbuf = droq->recv_buf_list[droq->read_idx];
+   info = rte_pktmbuf_mtod(mbuf, struct otx_ep_droq_info *);
+
/* make sure info is available */
rte_rmb();
if (unlikely(!info->length)) {
@@ -828,32 +820,25 @@ otx_ep_droq_read_packet(struct otx_ep_device *otx_ep,
assert(0);
}
}
+
if (next_fetch) {
next_idx = otx_ep_incr_index(droq->read_idx, 1, droq->nb_desc);
-   droq_pkt2  = droq->recv_buf_list[next_idx];
-   info2 = rte_pktmbuf_mtod(droq_pkt2, struct otx_ep_droq_info *);
-   rte_prefetch_non_temporal((const 

[PATCH v6 3/3] net/octeon_ep: add new fastpath routines

2023-10-18 Thread Vamsi Attunuru
Adds new fastpath routines for cn10k & cn9k endpoint
devices and assigns the fastpath routines based on
the offload flags.

Patch also adds misc changes to improve performance
and code-readability.

Signed-off-by: Vamsi Attunuru 
---
 drivers/net/octeon_ep/cnxk_ep_rx.c| 309 ++
 drivers/net/octeon_ep/cnxk_ep_tx.c| 209 +
 drivers/net/octeon_ep/cnxk_ep_vf.c|   2 +
 drivers/net/octeon_ep/cnxk_ep_vf.h|  13 ++
 drivers/net/octeon_ep/meson.build |   2 +
 drivers/net/octeon_ep/otx2_ep_vf.c|   1 +
 drivers/net/octeon_ep/otx_ep_common.h | 125 ++-
 drivers/net/octeon_ep/otx_ep_ethdev.c |  69 +-
 drivers/net/octeon_ep/otx_ep_rxtx.c   |  93 +---
 drivers/net/octeon_ep/otx_ep_rxtx.h   |  38 +++-
 10 files changed, 704 insertions(+), 157 deletions(-)

diff --git a/drivers/net/octeon_ep/cnxk_ep_rx.c 
b/drivers/net/octeon_ep/cnxk_ep_rx.c
new file mode 100644
index 00..74f0011283
--- /dev/null
+++ b/drivers/net/octeon_ep/cnxk_ep_rx.c
@@ -0,0 +1,309 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2023 Marvell.
+ */
+
+#include "otx_ep_common.h"
+#include "otx2_ep_vf.h"
+#include "otx_ep_rxtx.h"
+
+static inline int
+cnxk_ep_rx_refill_mbuf(struct otx_ep_droq *droq, uint32_t count)
+{
+   struct otx_ep_droq_desc *desc_ring = droq->desc_ring;
+   struct rte_mbuf **recv_buf_list = droq->recv_buf_list;
+   uint32_t refill_idx = droq->refill_idx;
+   struct rte_mbuf *buf;
+   uint32_t i;
+   int rc;
+
+   rc = rte_pktmbuf_alloc_bulk(droq->mpool, &recv_buf_list[refill_idx], 
count);
+   if (unlikely(rc)) {
+   droq->stats.rx_alloc_failure++;
+   return rc;
+   }
+
+   for (i = 0; i < count; i++) {
+   buf = recv_buf_list[refill_idx];
+   desc_ring[refill_idx].buffer_ptr = 
rte_mbuf_data_iova_default(buf);
+   refill_idx++;
+   }
+
+   droq->refill_idx = otx_ep_incr_index(droq->refill_idx, count, 
droq->nb_desc);
+   droq->refill_count -= count;
+
+   return 0;
+}
+
+static inline void
+cnxk_ep_rx_refill(struct otx_ep_droq *droq)
+{
+   uint32_t desc_refilled = 0, count;
+   uint32_t nb_desc = droq->nb_desc;
+   uint32_t refill_idx = droq->refill_idx;
+   int rc;
+
+   if (unlikely(droq->read_idx == refill_idx))
+   return;
+
+   if (refill_idx < droq->read_idx) {
+   count = droq->read_idx - refill_idx;
+   rc = cnxk_ep_rx_refill_mbuf(droq, count);
+   if (unlikely(rc)) {
+   droq->stats.rx_alloc_failure++;
+   return;
+   }
+   desc_refilled = count;
+   } else {
+   count = nb_desc - refill_idx;
+   rc = cnxk_ep_rx_refill_mbuf(droq, count);
+   if (unlikely(rc)) {
+   droq->stats.rx_alloc_failure++;
+   return;
+   }
+
+   desc_refilled = count;
+   count = droq->read_idx;
+   rc = cnxk_ep_rx_refill_mbuf(droq, count);
+   if (unlikely(rc)) {
+   droq->stats.rx_alloc_failure++;
+   return;
+   }
+   desc_refilled += count;
+   }
+
+   /* Flush the droq descriptor data to memory to be sure
+* that when we update the credits the data in memory is
+* accurate.
+*/
+   rte_io_wmb();
+   rte_write32(desc_refilled, droq->pkts_credit_reg);
+}
+
+static inline uint32_t
+cnxk_ep_check_rx_pkts(struct otx_ep_droq *droq)
+{
+   uint32_t new_pkts;
+   uint32_t val;
+
+   /* Batch subtractions from the HW counter to reduce PCIe traffic
+* This adds an extra local variable, but almost halves the
+* number of PCIe writes.
+*/
+   val = __atomic_load_n(droq->pkts_sent_ism, __ATOMIC_RELAXED);
+   new_pkts = val - droq->pkts_sent_ism_prev;
+   droq->pkts_sent_ism_prev = val;
+
+   if (val > (uint32_t)(1 << 31)) {
+   /* Only subtract the packet count in the HW counter
+* when count above halfway to saturation.
+*/
+   rte_write64((uint64_t)val, droq->pkts_sent_reg);
+   rte_mb();
+
+   rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+   while (__atomic_load_n(droq->pkts_sent_ism, __ATOMIC_RELAXED) 
>= val) {
+   rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+   rte_mb();
+   }
+
+   droq->pkts_sent_ism_prev = 0;
+   }
+   rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
+   droq->pkts_pending += new_pkts;
+
+   return new_pkts;
+}
+
+static inline int16_t __rte_hot
+cnxk_ep_rx

[RFC] dmadev: add QoS capability

2024-07-29 Thread Vamsi Attunuru
Some DMA controllers support QoS at HW command queue level to
differentiate the performance on different HW queues based on
the priority configured. Patch adds required fields in dmadev
structures to get hardware supported priority levels and the
provision to configure the priority from the applications.

Signed-off-by: Vamsi Attunuru 
---
 lib/dmadev/rte_dmadev.c | 10 ++
 lib/dmadev/rte_dmadev.h | 19 +++
 2 files changed, 29 insertions(+)

diff --git a/lib/dmadev/rte_dmadev.c b/lib/dmadev/rte_dmadev.c
index 845727210f..9ff62efcb4 100644
--- a/lib/dmadev/rte_dmadev.c
+++ b/lib/dmadev/rte_dmadev.c
@@ -491,6 +491,16 @@ rte_dma_configure(int16_t dev_id, const struct 
rte_dma_conf *dev_conf)
"Device %d configure too many vchans", dev_id);
return -EINVAL;
}
+   if (dev_conf->priority &&
+   !(dev_info.dev_capa & RTE_DMA_CAPA_QOS)) {
+   RTE_DMA_LOG(ERR, "Device %d don't support QoS", dev_id);
+   return -EINVAL;
+   }
+   if (dev_conf->priority >= dev_info.nb_priorities) {
+   RTE_DMA_LOG(ERR,
+   "Device %d configure invalid priority", dev_id);
+   return -EINVAL;
+   }
if (dev_conf->enable_silent &&
!(dev_info.dev_capa & RTE_DMA_CAPA_SILENT)) {
RTE_DMA_LOG(ERR, "Device %d don't support silent", dev_id);
diff --git a/lib/dmadev/rte_dmadev.h b/lib/dmadev/rte_dmadev.h
index 5474a5281d..08db8ead0a 100644
--- a/lib/dmadev/rte_dmadev.h
+++ b/lib/dmadev/rte_dmadev.h
@@ -268,6 +268,16 @@ int16_t rte_dma_next_dev(int16_t start_dev_id);
 #define RTE_DMA_CAPA_OPS_COPY_SG   RTE_BIT64(33)
 /** Support fill operation. */
 #define RTE_DMA_CAPA_OPS_FILL  RTE_BIT64(34)
+/** Support QoS at DMA HW channel level
+ *
+ * If device supports QoS then application could configure priority to the
+ * DMA HW channel using 'priority' field in struct rte_dma_conf. Number of
+ * supported prioirty levels will be known from 'nb_priorities' field in
+ * struct rte_dma_info.
+ * DMA devices which support QoS at HW channel level can advertise this
+ * capability.
+ */
+#define RTE_DMA_CAPA_QOS   RTE_BIT64(35)
 /**@}*/
 
 /**
@@ -297,6 +307,8 @@ struct rte_dma_info {
int16_t numa_node;
/** Number of virtual DMA channel configured. */
uint16_t nb_vchans;
+   /** Number of priority levels supported by DMA HW channel. */
+   uint16_t nb_priorities;
 };
 
 /**
@@ -332,6 +344,13 @@ struct rte_dma_conf {
 * @see RTE_DMA_CAPA_SILENT
 */
bool enable_silent;
+   /* The prioirty of the DMA HW channel.
+* This value cannot be greater than or equal to the field 
'nb_priorities'
+* of struct rte_dma_info which get from rte_dma_info_get().
+* Among the values between '0' and 'nb_priorities - 1', lowest value
+* indicates higher priority and vice-versa.
+*/
+   uint16_t priority;
 };
 
 /**
-- 
2.25.1



[PATCH] doc: announce dmadev new capability addition

2024-07-29 Thread Vamsi Attunuru
Announce addition of new capability flag and fields in
rte_dma_info and rte_dma_conf structures.

Signed-off-by: Vamsi Attunuru 
---
RFC:
https://patchwork.dpdk.org/project/dpdk/patch/20240729115558.263574-1-vattun...@marvell.com/

 doc/guides/rel_notes/deprecation.rst | 5 +
 1 file changed, 5 insertions(+)

diff --git a/doc/guides/rel_notes/deprecation.rst 
b/doc/guides/rel_notes/deprecation.rst
index 6948641ff6..05d28473c0 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -147,3 +147,8 @@ Deprecation Notices
   will be deprecated and subsequently removed in DPDK 24.11 release.
   Before this, the new port library API (functions rte_swx_port_*)
   will gradually transition from experimental to stable status.
+
+* dmadev: A new flag ``RTE_DMA_CAPA_QOS`` will be introduced to advertise
+  dma device's QoS capability. Also new fields will be added in 
``rte_dma_info``
+  and ``rte_dma_conf`` structures to get device supported priority levels
+  and to configure the required priority level.
-- 
2.25.1



[PATCH] net/octeon_ep: extend mailbox functionality

2024-08-06 Thread Vamsi Attunuru
Patch extends mbox functionality to handle pf to vf mbox
messages and also updates current mbox version to V3.

As part of PF FLR notify event, event handler invokes
device removal event callback to tear down the driver.

Signed-off-by: Vamsi Attunuru 
---
 drivers/net/octeon_ep/cnxk_ep_vf.h|  5 ++
 drivers/net/octeon_ep/otx_ep_ethdev.c |  4 +-
 drivers/net/octeon_ep/otx_ep_mbox.c   | 70 +--
 drivers/net/octeon_ep/otx_ep_mbox.h   | 11 -
 4 files changed, 82 insertions(+), 8 deletions(-)

diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.h 
b/drivers/net/octeon_ep/cnxk_ep_vf.h
index 41d8fbbb3a..8981dd7e86 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.h
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.h
@@ -134,6 +134,9 @@
 #define CNXK_EP_R_MBOX_VF_PF_DATA(ring)  \
(CNXK_EP_R_MBOX_VF_PF_DATA_START + ((ring) * CNXK_EP_RING_OFFSET))
 
+#define CNXK_EP_R_MBOX_PF_VF_DATA(ring)  \
+   (CNXK_EP_R_MBOX_PF_VF_DATA_START + ((ring) * CNXK_EP_RING_OFFSET))
+
 #define CNXK_EP_R_MBOX_PF_VF_INT(ring)   \
(CNXK_EP_R_MBOX_PF_VF_INT_START + ((ring) * CNXK_EP_RING_OFFSET))
 
@@ -195,5 +198,7 @@ struct cnxk_ep_instr_32B {
 #define CNXK_EP_OQ_ISM_OFFSET(queue)(RTE_CACHE_LINE_SIZE * (queue))
 #define CNXK_EP_ISM_EN  (0x1)
 #define CNXK_EP_ISM_MSIX_DIS(0x2)
+#define CNXK_EP_MBOX_INTR   (0x1)
+#define CNXK_EP_MBOX_ENAB   (0x2)
 
 #endif /*_CNXK_EP_VF_H_ */
diff --git a/drivers/net/octeon_ep/otx_ep_ethdev.c 
b/drivers/net/octeon_ep/otx_ep_ethdev.c
index 46211361a0..196ed69123 100644
--- a/drivers/net/octeon_ep/otx_ep_ethdev.c
+++ b/drivers/net/octeon_ep/otx_ep_ethdev.c
@@ -656,6 +656,7 @@ otx_ep_dev_close(struct rte_eth_dev *eth_dev)
 
otx_epvf = OTX_EP_DEV(eth_dev);
otx_ep_mbox_send_dev_exit(eth_dev);
+   otx_ep_mbox_uninit(eth_dev);
otx_epvf->fn_list.disable_io_queues(otx_epvf);
num_queues = otx_epvf->nb_rx_queues;
for (q_no = 0; q_no < num_queues; q_no++) {
@@ -725,6 +726,7 @@ otx_ep_eth_dev_uninit(struct rte_eth_dev *eth_dev)
return 0;
}
 
+   otx_ep_mbox_uninit(eth_dev);
eth_dev->dev_ops = NULL;
eth_dev->rx_pkt_burst = NULL;
eth_dev->tx_pkt_burst = NULL;
@@ -826,7 +828,7 @@ otx_ep_eth_dev_init(struct rte_eth_dev *eth_dev)
return -EINVAL;
}
 
-   if (otx_ep_mbox_version_check(eth_dev))
+   if (otx_ep_mbox_init(eth_dev))
return -EINVAL;
 
if (otx_ep_eth_dev_query_set_vf_mac(eth_dev,
diff --git a/drivers/net/octeon_ep/otx_ep_mbox.c 
b/drivers/net/octeon_ep/otx_ep_mbox.c
index 4118645dc7..0474419599 100644
--- a/drivers/net/octeon_ep/otx_ep_mbox.c
+++ b/drivers/net/octeon_ep/otx_ep_mbox.c
@@ -17,7 +17,10 @@
  * with new command and it's version info.
  */
 static uint32_t otx_ep_cmd_versions[OTX_EP_MBOX_CMD_MAX] = {
-   [0 ... OTX_EP_MBOX_CMD_DEV_REMOVE] = OTX_EP_MBOX_VERSION_V1
+   [0 ... OTX_EP_MBOX_CMD_DEV_REMOVE] = OTX_EP_MBOX_VERSION_V1,
+   [OTX_EP_MBOX_CMD_GET_FW_INFO ... OTX_EP_MBOX_NOTIF_LINK_STATUS] = 
OTX_EP_MBOX_VERSION_V2,
+   [OTX_EP_MBOX_NOTIF_PF_FLR] = OTX_EP_MBOX_VERSION_V3
+
 };
 
 static int
@@ -288,10 +291,9 @@ otx_ep_mbox_get_max_pkt_len(struct rte_eth_dev *eth_dev)
return rsp.s_get_mtu.mtu;
 }
 
-int otx_ep_mbox_version_check(struct rte_eth_dev *eth_dev)
+static void
+otx_ep_mbox_version_check(struct otx_ep_device *otx_ep)
 {
-   struct otx_ep_device *otx_ep =
-   (struct otx_ep_device *)(eth_dev)->data->dev_private;
union otx_ep_mbox_word cmd;
union otx_ep_mbox_word rsp;
int ret;
@@ -312,15 +314,73 @@ int otx_ep_mbox_version_check(struct rte_eth_dev *eth_dev)
if (ret == OTX_EP_MBOX_CMD_STATUS_NACK || rsp.s_version.version == 0) {
otx_ep_dbg("VF Mbox version fallback to base version from:%u\n",
(uint32_t)cmd.s_version.version);
-   return 0;
+   return;
}
otx_ep->mbox_neg_ver = (uint32_t)rsp.s_version.version;
otx_ep_dbg("VF Mbox version:%u Negotiated VF version with PF:%u\n",
(uint32_t)cmd.s_version.version,
(uint32_t)rsp.s_version.version);
+}
+
+static void
+otx_ep_mbox_intr_handler(void *param)
+{
+   struct rte_eth_dev *eth_dev = (struct rte_eth_dev *)param;
+   struct otx_ep_device *otx_ep = (struct otx_ep_device 
*)eth_dev->data->dev_private;
+   struct rte_pci_device *pdev = RTE_ETH_DEV_TO_PCI(eth_dev);
+   union otx_ep_mbox_word mbox_cmd;
+
+   if (otx2_read64(otx_ep->hw_addr + CNXK_EP_R_MBOX_PF_VF_INT(0)) & 
CNXK_EP_MBOX_INTR) {
+   mbox_cmd.u64 = otx2_read64(otx_ep->hw_addr + 
CNXK_EP_R_MBOX_PF_VF_DATA(0));
+   otx2_write64(CNXK_EP_MBOX_ENAB | CNXK_EP_MBOX_INTR,
+otx_ep->hw_add

[PATCH 1/1] doc: notice to add new ipsec event subtypes

2022-06-24 Thread Vamsi Attunuru
Based on discussion in below email thread, the new event subtypes can be
added in 22.11 release with fixing any compatability issues mentioned in
the mail thread.

https://patches.dpdk.org/project/dpdk/patch/20220416192530.173895-8-gak...@marvell.com/

Signed-off-by: Vamsi Attunuru 
---
 doc/guides/rel_notes/deprecation.rst | 5 +
 1 file changed, 5 insertions(+)

diff --git a/doc/guides/rel_notes/deprecation.rst 
b/doc/guides/rel_notes/deprecation.rst
index 4e5b23c53d..83da1c62ac 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -107,6 +107,11 @@ Deprecation Notices
   alternative is implemented.
   The legacy actions should be removed in DPDK 22.11.
 
+* ethdev: The enum ``rte_eth_event_ipsec_subtype`` would be extended to add
+  new subtype values ``RTE_ETH_EVENT_IPSEC_SA_PKT_EXPIRY``,
+  ``RTE_ETH_EVENT_IPSEC_SA_BYTE_HARD_EXPIRY`` and
+  ``RTE_ETH_EVENT_IPSEC_SA_PKT_HARD_EXPIRY`` in DPDK 22.11.
+
 * cryptodev: Hide structures ``rte_cryptodev_sym_session`` and
   ``rte_cryptodev_asym_session`` to remove unnecessary indirection between
   session and the private data of session. An opaque pointer can be exposed
-- 
2.25.1



[PATCH v2 1/1] doc: announce addition of new ipsec event subtypes

2022-06-26 Thread Vamsi Attunuru
New event subtypes need to be added for notifying expiry events
upon reaching IPsec SA soft packet expiry and hard packet/byte
expiry limits. This would be added in DPDK 22.11.

Signed-off-by: Vamsi Attunuru 
Acked-by: Akhil Goyal 
---
More details on new event subtype proposal discussion are in below email thread.
https://patches.dpdk.org/project/dpdk/patch/20220416192530.173895-8-gak...@marvell.com/

v2: rephrase title and git log.
---
 doc/guides/rel_notes/deprecation.rst | 5 +
 1 file changed, 5 insertions(+)

diff --git a/doc/guides/rel_notes/deprecation.rst 
b/doc/guides/rel_notes/deprecation.rst
index 4e5b23c53d..83da1c62ac 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -107,6 +107,11 @@ Deprecation Notices
   alternative is implemented.
   The legacy actions should be removed in DPDK 22.11.
 
+* ethdev: The enum ``rte_eth_event_ipsec_subtype`` would be extended to add
+  new subtype values ``RTE_ETH_EVENT_IPSEC_SA_PKT_EXPIRY``,
+  ``RTE_ETH_EVENT_IPSEC_SA_BYTE_HARD_EXPIRY`` and
+  ``RTE_ETH_EVENT_IPSEC_SA_PKT_HARD_EXPIRY`` in DPDK 22.11.
+
 * cryptodev: Hide structures ``rte_cryptodev_sym_session`` and
   ``rte_cryptodev_asym_session`` to remove unnecessary indirection between
   session and the private data of session. An opaque pointer can be exposed
-- 
2.25.1



[PATCH 1/1] net/cnxk: enable 3des-cbc capability

2022-04-29 Thread Vamsi Attunuru
Patch enables 3DES-CBC capability of inline crypto device.

Signed-off-by: Vamsi Attunuru 
---
 drivers/common/cnxk/cnxk_security.c |  3 +++
 drivers/net/cnxk/cn10k_ethdev_sec.c | 20 
 2 files changed, 23 insertions(+)

diff --git a/drivers/common/cnxk/cnxk_security.c 
b/drivers/common/cnxk/cnxk_security.c
index ec808c0033..8ea0aea7c2 100644
--- a/drivers/common/cnxk/cnxk_security.c
+++ b/drivers/common/cnxk/cnxk_security.c
@@ -129,6 +129,9 @@ ot_ipsec_sa_common_param_fill(union roc_ot_ipsec_sa_word2 
*w2,
case RTE_CRYPTO_CIPHER_AES_CTR:
w2->s.enc_type = ROC_IE_OT_SA_ENC_AES_CTR;
break;
+   case RTE_CRYPTO_CIPHER_3DES_CBC:
+   w2->s.enc_type = ROC_IE_OT_SA_ENC_3DES_CBC;
+   break;
default:
return -ENOTSUP;
}
diff --git a/drivers/net/cnxk/cn10k_ethdev_sec.c 
b/drivers/net/cnxk/cn10k_ethdev_sec.c
index 87bb691ab4..d0463b3622 100644
--- a/drivers/net/cnxk/cn10k_ethdev_sec.c
+++ b/drivers/net/cnxk/cn10k_ethdev_sec.c
@@ -62,6 +62,26 @@ static struct rte_cryptodev_capabilities 
cn10k_eth_sec_crypto_caps[] = {
}, }
}, }
},
+   {   /* 3DES CBC */
+   .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+   {.sym = {
+   .xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER,
+   {.cipher = {
+   .algo = RTE_CRYPTO_CIPHER_3DES_CBC,
+   .block_size = 8,
+   .key_size = {
+   .min = 24,
+   .max = 24,
+   .increment = 0
+   },
+   .iv_size = {
+   .min = 8,
+   .max = 16,
+   .increment = 8
+   }
+   }, }
+   }, }
+   },
{   /* SHA1 HMAC */
.op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
{.sym = {
-- 
2.25.1



[PATCH 1/1] crypto/cnxk: enable 3des-cbc secure capability

2022-04-29 Thread Vamsi Attunuru
Patch enables 3DES-CBC secure capability of crypto device.

Signed-off-by: Vamsi Attunuru 
---
 drivers/crypto/cnxk/cnxk_cryptodev.h  |  2 +-
 .../crypto/cnxk/cnxk_cryptodev_capabilities.c | 24 +++
 drivers/crypto/cnxk/cnxk_ipsec.h  |  3 ++-
 3 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/cnxk/cnxk_cryptodev.h 
b/drivers/crypto/cnxk/cnxk_cryptodev.h
index b75d681185..8870021725 100644
--- a/drivers/crypto/cnxk/cnxk_cryptodev.h
+++ b/drivers/crypto/cnxk/cnxk_cryptodev.h
@@ -11,7 +11,7 @@
 #include "roc_cpt.h"
 
 #define CNXK_CPT_MAX_CAPS   35
-#define CNXK_SEC_CRYPTO_MAX_CAPS 12
+#define CNXK_SEC_CRYPTO_MAX_CAPS 13
 #define CNXK_SEC_MAX_CAPS   9
 #define CNXK_AE_EC_ID_MAX   8
 /**
diff --git a/drivers/crypto/cnxk/cnxk_cryptodev_capabilities.c 
b/drivers/crypto/cnxk/cnxk_cryptodev_capabilities.c
index 98b002d93a..ba9eaf2325 100644
--- a/drivers/crypto/cnxk/cnxk_cryptodev_capabilities.c
+++ b/drivers/crypto/cnxk/cnxk_cryptodev_capabilities.c
@@ -862,6 +862,29 @@ static const struct rte_cryptodev_capabilities 
sec_caps_aes[] = {
},
 };
 
+static const struct rte_cryptodev_capabilities sec_caps_des[] = {
+   {   /* 3DES CBC */
+   .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+   {.sym = {
+   .xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER,
+   {.cipher = {
+   .algo = RTE_CRYPTO_CIPHER_3DES_CBC,
+   .block_size = 8,
+   .key_size = {
+   .min = 24,
+   .max = 24,
+   .increment = 0
+   },
+   .iv_size = {
+   .min = 8,
+   .max = 16,
+   .increment = 8
+   }
+   }, }
+   }, }
+   }
+};
+
 static const struct rte_cryptodev_capabilities sec_caps_sha1_sha2[] = {
{   /* SHA1 HMAC */
.op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
@@ -1195,6 +1218,7 @@ sec_crypto_caps_populate(struct 
rte_cryptodev_capabilities cnxk_caps[],
int cur_pos = 0;
 
SEC_CAPS_ADD(cnxk_caps, &cur_pos, hw_caps, aes);
+   SEC_CAPS_ADD(cnxk_caps, &cur_pos, hw_caps, des);
SEC_CAPS_ADD(cnxk_caps, &cur_pos, hw_caps, sha1_sha2);
 
if (roc_model_is_cn10k())
diff --git a/drivers/crypto/cnxk/cnxk_ipsec.h b/drivers/crypto/cnxk/cnxk_ipsec.h
index 171ea2774e..7c7833ac1b 100644
--- a/drivers/crypto/cnxk/cnxk_ipsec.h
+++ b/drivers/crypto/cnxk/cnxk_ipsec.h
@@ -24,7 +24,8 @@ ipsec_xform_cipher_verify(struct rte_crypto_sym_xform 
*crypto_xform)
return 0;
 
if (crypto_xform->cipher.algo == RTE_CRYPTO_CIPHER_AES_CBC ||
-   crypto_xform->cipher.algo == RTE_CRYPTO_CIPHER_AES_CTR) {
+   crypto_xform->cipher.algo == RTE_CRYPTO_CIPHER_AES_CTR ||
+   crypto_xform->cipher.algo == RTE_CRYPTO_CIPHER_3DES_CBC) {
switch (crypto_xform->cipher.key.length) {
case 16:
case 24:
-- 
2.25.1



[PATCH v2 1/1] crypto/cnxk: enable 3des-cbc secure capability

2022-05-02 Thread Vamsi Attunuru
Patch enables 3DES-CBC secure capability of crypto device.

Signed-off-by: Vamsi Attunuru 
Acked-by: Akhil Goyal 
---
v2: add seperate key_len check for 3DES 
---
 drivers/crypto/cnxk/cnxk_cryptodev.h  |  2 +-
 .../crypto/cnxk/cnxk_cryptodev_capabilities.c | 24 +++
 drivers/crypto/cnxk/cnxk_ipsec.h  |  4 
 3 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/cnxk/cnxk_cryptodev.h 
b/drivers/crypto/cnxk/cnxk_cryptodev.h
index b75d681185..8870021725 100644
--- a/drivers/crypto/cnxk/cnxk_cryptodev.h
+++ b/drivers/crypto/cnxk/cnxk_cryptodev.h
@@ -11,7 +11,7 @@
 #include "roc_cpt.h"
 
 #define CNXK_CPT_MAX_CAPS   35
-#define CNXK_SEC_CRYPTO_MAX_CAPS 12
+#define CNXK_SEC_CRYPTO_MAX_CAPS 13
 #define CNXK_SEC_MAX_CAPS   9
 #define CNXK_AE_EC_ID_MAX   8
 /**
diff --git a/drivers/crypto/cnxk/cnxk_cryptodev_capabilities.c 
b/drivers/crypto/cnxk/cnxk_cryptodev_capabilities.c
index 98b002d93a..ba9eaf2325 100644
--- a/drivers/crypto/cnxk/cnxk_cryptodev_capabilities.c
+++ b/drivers/crypto/cnxk/cnxk_cryptodev_capabilities.c
@@ -862,6 +862,29 @@ static const struct rte_cryptodev_capabilities 
sec_caps_aes[] = {
},
 };
 
+static const struct rte_cryptodev_capabilities sec_caps_des[] = {
+   {   /* 3DES CBC */
+   .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+   {.sym = {
+   .xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER,
+   {.cipher = {
+   .algo = RTE_CRYPTO_CIPHER_3DES_CBC,
+   .block_size = 8,
+   .key_size = {
+   .min = 24,
+   .max = 24,
+   .increment = 0
+   },
+   .iv_size = {
+   .min = 8,
+   .max = 16,
+   .increment = 8
+   }
+   }, }
+   }, }
+   }
+};
+
 static const struct rte_cryptodev_capabilities sec_caps_sha1_sha2[] = {
{   /* SHA1 HMAC */
.op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
@@ -1195,6 +1218,7 @@ sec_crypto_caps_populate(struct 
rte_cryptodev_capabilities cnxk_caps[],
int cur_pos = 0;
 
SEC_CAPS_ADD(cnxk_caps, &cur_pos, hw_caps, aes);
+   SEC_CAPS_ADD(cnxk_caps, &cur_pos, hw_caps, des);
SEC_CAPS_ADD(cnxk_caps, &cur_pos, hw_caps, sha1_sha2);
 
if (roc_model_is_cn10k())
diff --git a/drivers/crypto/cnxk/cnxk_ipsec.h b/drivers/crypto/cnxk/cnxk_ipsec.h
index 171ea2774e..07ab2cf4ee 100644
--- a/drivers/crypto/cnxk/cnxk_ipsec.h
+++ b/drivers/crypto/cnxk/cnxk_ipsec.h
@@ -36,6 +36,10 @@ ipsec_xform_cipher_verify(struct rte_crypto_sym_xform 
*crypto_xform)
return 0;
}
 
+   if (crypto_xform->cipher.algo == RTE_CRYPTO_CIPHER_3DES_CBC &&
+   crypto_xform->cipher.key.length == 24)
+   return 0;
+
return -ENOTSUP;
 }
 
-- 
2.25.1



[PATCH v3 0/3] node: Introduce kernel_rx & kernel_tx nodes

2023-06-02 Thread Vamsi Attunuru
This patch set introduces two new nodes to transmit & receive packets
from kernel. This nodes can be used for any exception path handling or
to forward any control plane traffic to kernel or receive from kernel stack.

V3:
* Address review comments
* Drop test-graph application from the patch set

V2:
* Handle error checks in testgraph application
* Extend supported test node patterns
* Fix warnings

Vamsi Attunuru (3):
  node/kernel_tx: support packet transmit to kernel
  node/kernel_rx: support receiving packets from kernel
  node/ethdev_rx: remove hardcoded node next details

 doc/guides/prog_guide/graph_lib.rst |  17 ++
 lib/node/ethdev_ctrl.c  |   1 +
 lib/node/ethdev_rx.c|   3 -
 lib/node/kernel_rx.c| 276 
 lib/node/kernel_rx_priv.h   |  48 +
 lib/node/kernel_tx.c| 122 
 lib/node/kernel_tx_priv.h   |  16 ++
 lib/node/meson.build|   2 +
 8 files changed, 482 insertions(+), 3 deletions(-)
 create mode 100644 lib/node/kernel_rx.c
 create mode 100644 lib/node/kernel_rx_priv.h
 create mode 100644 lib/node/kernel_tx.c
 create mode 100644 lib/node/kernel_tx_priv.h

-- 
2.25.1



[PATCH v3 1/3] node/kernel_tx: support packet transmit to kernel

2023-06-02 Thread Vamsi Attunuru
Patch adds a node to transmit the packets to kernel over
a raw socket.

Signed-off-by: Vamsi Attunuru 
---
 doc/guides/prog_guide/graph_lib.rst |   9 ++
 lib/node/kernel_tx.c| 122 
 lib/node/kernel_tx_priv.h   |  16 
 lib/node/meson.build|   1 +
 4 files changed, 148 insertions(+)

diff --git a/doc/guides/prog_guide/graph_lib.rst 
b/doc/guides/prog_guide/graph_lib.rst
index 1cfdc86433..fa22b014f3 100644
--- a/doc/guides/prog_guide/graph_lib.rst
+++ b/doc/guides/prog_guide/graph_lib.rst
@@ -392,3 +392,12 @@ null
 
 This node ignores the set of objects passed to it and reports that all are
 processed.
+
+kernel_tx
+~
+This node is an exit node that forwards the packets to kernel. It will be used
+to forward any control plane traffic to kernel stack from DPDK. It uses a raw
+socket interface to transmit the packets, it uses the packet's destination
+IP address in sockaddr_in address structure and ``sendto`` function to send
+data on the raw socket. Aftering sending the burst of packets to kernel,
+this node free up the packet buffers.
diff --git a/lib/node/kernel_tx.c b/lib/node/kernel_tx.c
new file mode 100644
index 00..27d1808c71
--- /dev/null
+++ b/lib/node/kernel_tx.c
@@ -0,0 +1,122 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2023 Marvell International Ltd.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "kernel_tx_priv.h"
+#include "node_private.h"
+
+static __rte_always_inline void
+kernel_tx_process_mbuf(struct rte_node *node, struct rte_mbuf **mbufs, 
uint16_t cnt)
+{
+   kernel_tx_node_ctx_t *ctx = (kernel_tx_node_ctx_t *)node->ctx;
+   struct sockaddr_in sin = {0};
+   struct rte_ipv4_hdr *ip4;
+   size_t len;
+   char *buf;
+   int i;
+
+   for (i = 0; i < cnt; i++) {
+   ip4 = rte_pktmbuf_mtod(mbufs[i], struct rte_ipv4_hdr *);
+   len = rte_pktmbuf_data_len(mbufs[i]);
+   buf = (char *)ip4;
+
+   sin.sin_family = AF_INET;
+   sin.sin_port = 0;
+   sin.sin_addr.s_addr = ip4->dst_addr;
+
+   if (sendto(ctx->sock, buf, len, 0, (struct sockaddr *)&sin, 
sizeof(sin)) < 0)
+   node_err("kernel_tx", "Unable to send packets: %s\n", 
strerror(errno));
+   }
+}
+
+static uint16_t
+kernel_tx_node_process(struct rte_graph *graph __rte_unused, struct rte_node 
*node, void **objs,
+uint16_t nb_objs)
+{
+   struct rte_mbuf **pkts = (struct rte_mbuf **)objs;
+   uint16_t obj_left = nb_objs;
+
+#define PREFETCH_CNT 4
+
+   while (obj_left >= 12) {
+   /* Prefetch next-next mbufs */
+   rte_prefetch0(pkts[8]);
+   rte_prefetch0(pkts[9]);
+   rte_prefetch0(pkts[10]);
+   rte_prefetch0(pkts[11]);
+
+   /* Prefetch next mbuf data */
+   rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[4], void *, 
pkts[4]->l2_len));
+   rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[5], void *, 
pkts[5]->l2_len));
+   rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[6], void *, 
pkts[6]->l2_len));
+   rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[7], void *, 
pkts[7]->l2_len));
+
+   kernel_tx_process_mbuf(node, pkts, PREFETCH_CNT);
+
+   obj_left -= PREFETCH_CNT;
+   pkts += PREFETCH_CNT;
+   }
+
+   while (obj_left > 0) {
+   kernel_tx_process_mbuf(node, pkts, 1);
+
+   obj_left--;
+   pkts++;
+   }
+
+   rte_pktmbuf_free_bulk((struct rte_mbuf **)objs, nb_objs);
+
+   return nb_objs;
+}
+
+static int
+kernel_tx_node_init(const struct rte_graph *graph __rte_unused, struct 
rte_node *node)
+{
+   kernel_tx_node_ctx_t *ctx = (kernel_tx_node_ctx_t *)node->ctx;
+
+   ctx->sock = socket(AF_INET, SOCK_RAW, IPPROTO_RAW);
+   if (ctx->sock < 0)
+   node_err("kernel_tx", "Unable to open RAW socket\n");
+
+   return 0;
+}
+
+static void
+kernel_tx_node_fini(const struct rte_graph *graph __rte_unused, struct 
rte_node *node)
+{
+   kernel_tx_node_ctx_t *ctx = (kernel_tx_node_ctx_t *)node->ctx;
+
+   if (ctx->sock >= 0) {
+   close(ctx->sock);
+   ctx->sock = -1;
+   }
+}
+
+static struct rte_node_register kernel_tx_node_base = {
+   .process = kernel_tx_node_process,
+   .name = "kernel_tx",
+
+   .init = kernel_tx_node_init,
+   .fini = kernel_tx_node_fini,
+
+   .nb_edges = 0,
+};
+
+struct rte_node_register *
+kernel_tx_node_get(void)
+{
+   return &kernel_tx_node_base;
+}
+
+RTE_NODE_REGISTER(kernel_tx_node_base);
diff --git a/lib/node/kernel_tx_priv.h b/lib/node/ker

[PATCH v3 2/3] node/kernel_rx: support receiving packets from kernel

2023-06-02 Thread Vamsi Attunuru
Adds a node to receive packets from kernel over
a raw socket.

Signed-off-by: Vamsi Attunuru 
---
 doc/guides/prog_guide/graph_lib.rst |   8 +
 lib/node/kernel_rx.c| 276 
 lib/node/kernel_rx_priv.h   |  48 +
 lib/node/meson.build|   1 +
 4 files changed, 333 insertions(+)

diff --git a/doc/guides/prog_guide/graph_lib.rst 
b/doc/guides/prog_guide/graph_lib.rst
index fa22b014f3..4b05bcee3c 100644
--- a/doc/guides/prog_guide/graph_lib.rst
+++ b/doc/guides/prog_guide/graph_lib.rst
@@ -401,3 +401,11 @@ socket interface to transmit the packets, it uses the 
packet's destination
 IP address in sockaddr_in address structure and ``sendto`` function to send
 data on the raw socket. Aftering sending the burst of packets to kernel,
 this node free up the packet buffers.
+
+kernel_rx
+~
+This node is a source node which receives packets from kernel and forwards to
+any of the intermediate nodes. It uses the raw socket interface to receive
+packets from kernel. Uses ``poll`` function to poll on the socket fd for
+``POLLIN`` events to read the packets from raw socket to stream buffer and does
+``rte_node_next_stream_move()`` when there are received packets.
diff --git a/lib/node/kernel_rx.c b/lib/node/kernel_rx.c
new file mode 100644
index 00..2dba7c8cc7
--- /dev/null
+++ b/lib/node/kernel_rx.c
@@ -0,0 +1,276 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2023 Marvell International Ltd.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "ethdev_rx_priv.h"
+#include "kernel_rx_priv.h"
+#include "node_private.h"
+
+static inline struct rte_mbuf *
+alloc_rx_mbuf(kernel_rx_node_ctx_t *ctx)
+{
+   kernel_rx_info_t *rx = ctx->recv_info;
+
+   if (rx->idx >= rx->cnt) {
+   uint16_t cnt;
+
+   rx->idx = 0;
+   rx->cnt = 0;
+
+   cnt = rte_pktmbuf_alloc_bulk(ctx->pktmbuf_pool, rx->rx_bufs, 
KERN_RX_CACHE_COUNT);
+   if (cnt <= 0)
+   return NULL;
+
+   rx->cnt = cnt;
+   }
+
+   return rx->rx_bufs[rx->idx++];
+}
+
+static inline void
+mbuf_update(struct rte_mbuf **mbufs, uint16_t nb_pkts)
+{
+   struct rte_net_hdr_lens hdr_lens;
+   struct rte_mbuf *m;
+   int i;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = mbufs[i];
+
+   m->packet_type = rte_net_get_ptype(m, &hdr_lens, 
RTE_PTYPE_ALL_MASK);
+
+   m->ol_flags = 0;
+   m->tx_offload = 0;
+
+   m->l2_len = hdr_lens.l2_len;
+   m->l3_len = hdr_lens.l3_len;
+   m->l4_len = hdr_lens.l4_len;
+   }
+}
+
+static uint16_t
+recv_pkt_parse(void **objs, uint16_t nb_pkts)
+{
+   uint16_t pkts_left = nb_pkts;
+   struct rte_mbuf **pkts;
+   int i;
+
+   pkts = (struct rte_mbuf **)objs;
+
+   if (pkts_left >= 4) {
+   for (i = 0; i < 4; i++)
+   rte_prefetch0(rte_pktmbuf_mtod(pkts[i], void *));
+   }
+
+   while (pkts_left >= 12) {
+   /* Prefetch next-next mbufs */
+   rte_prefetch0(pkts[8]);
+   rte_prefetch0(pkts[9]);
+   rte_prefetch0(pkts[10]);
+   rte_prefetch0(pkts[11]);
+
+   /* Prefetch next mbuf data */
+   rte_prefetch0(rte_pktmbuf_mtod(pkts[4], void *));
+   rte_prefetch0(rte_pktmbuf_mtod(pkts[5], void *));
+   rte_prefetch0(rte_pktmbuf_mtod(pkts[6], void *));
+   rte_prefetch0(rte_pktmbuf_mtod(pkts[7], void *));
+
+   /* Extract ptype of mbufs */
+   mbuf_update(pkts, 4);
+
+   pkts += 4;
+   pkts_left -= 4;
+   }
+
+   if (pkts_left > 0)
+   mbuf_update(pkts, pkts_left);
+
+   return nb_pkts;
+}
+
+static uint16_t
+kernel_rx_node_do(struct rte_graph *graph, struct rte_node *node, 
kernel_rx_node_ctx_t *ctx)
+{
+   kernel_rx_info_t *rx;
+   uint16_t next_index;
+   int fd;
+
+   rx = ctx->recv_info;
+   next_index = rx->node_next;
+
+   fd = rx->sock;
+   if (fd > 0) {
+   struct rte_mbuf **mbufs;
+   uint16_t len = 0, count = 0;
+   int nb_cnt, i;
+
+   nb_cnt = (node->size >= RTE_GRAPH_BURST_SIZE) ? 
RTE_GRAPH_BURST_SIZE : node->size;
+
+   mbufs = (struct rte_mbuf **)node->objs;
+   for (i = 0; i < nb_cnt; i++) {
+   struct rte_mbuf *m = alloc_rx_mbuf(ctx);
+
+   if (!m)
+   break;
+
+   len = read(fd, rte_pktmbuf_mtod(m, char *), 
rte_pktmbuf_tailroom(m));
+

[PATCH v3 3/3] node/ethdev_rx: remove hardcoded node next details

2023-06-02 Thread Vamsi Attunuru
For ethdev_rx node, node_next details can be populated
during node cloning time and same gets assigned to
node context structure during node initialization.

Patch removes overriding node_next details in node
init().

Signed-off-by: Vamsi Attunuru 
---
 lib/node/ethdev_ctrl.c | 1 +
 lib/node/ethdev_rx.c   | 3 ---
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/lib/node/ethdev_ctrl.c b/lib/node/ethdev_ctrl.c
index 37df0431b8..496f791cee 100644
--- a/lib/node/ethdev_ctrl.c
+++ b/lib/node/ethdev_ctrl.c
@@ -82,6 +82,7 @@ rte_node_eth_config(struct rte_node_ethdev_config *conf, 
uint16_t nb_confs,
memset(elem, 0, sizeof(ethdev_rx_node_elem_t));
elem->ctx.port_id = port_id;
elem->ctx.queue_id = j;
+   elem->ctx.cls_next = ETHDEV_RX_NEXT_PKT_CLS;
elem->nid = id;
elem->next = rx_node_data->head;
rx_node_data->head = elem;
diff --git a/lib/node/ethdev_rx.c b/lib/node/ethdev_rx.c
index a19237b42f..d131034991 100644
--- a/lib/node/ethdev_rx.c
+++ b/lib/node/ethdev_rx.c
@@ -194,8 +194,6 @@ ethdev_rx_node_init(const struct rte_graph *graph, struct 
rte_node *node)
 
RTE_VERIFY(elem != NULL);
 
-   ctx->cls_next = ETHDEV_RX_NEXT_PKT_CLS;
-
/* Check and setup ptype */
return ethdev_ptype_setup(ctx->port_id, ctx->queue_id);
 }
@@ -215,7 +213,6 @@ static struct rte_node_register ethdev_rx_node_base = {
 
.nb_edges = ETHDEV_RX_NEXT_MAX,
.next_nodes = {
-   /* Default pkt classification node */
[ETHDEV_RX_NEXT_PKT_CLS] = "pkt_cls",
[ETHDEV_RX_NEXT_IP4_LOOKUP] = "ip4_lookup",
},
-- 
2.25.1



[PATCH 1/1] net/octeon_ep: use devarg to enable ISM accesses

2024-02-23 Thread Vamsi Attunuru
Adds a devarg option to enable/disable ISM memory accesses
for reading packet count details. This option is disabled
by default, as ISM memory accesses effect throughput of
bigger size packets.

Signed-off-by: Vamsi Attunuru 
---
 doc/guides/nics/octeon_ep.rst | 12 
 drivers/net/octeon_ep/cnxk_ep_rx.h| 42 +-
 drivers/net/octeon_ep/cnxk_ep_tx.c| 42 ++
 drivers/net/octeon_ep/cnxk_ep_vf.c|  4 +--
 drivers/net/octeon_ep/otx2_ep_vf.c|  4 +--
 drivers/net/octeon_ep/otx_ep_common.h | 14 +++--
 drivers/net/octeon_ep/otx_ep_ethdev.c | 43 +++
 drivers/net/octeon_ep/otx_ep_rxtx.c   | 15 ++
 drivers/net/octeon_ep/otx_ep_rxtx.h   |  2 ++
 9 files changed, 153 insertions(+), 25 deletions(-)

diff --git a/doc/guides/nics/octeon_ep.rst b/doc/guides/nics/octeon_ep.rst
index b5040aeee2..befa0a4097 100644
--- a/doc/guides/nics/octeon_ep.rst
+++ b/doc/guides/nics/octeon_ep.rst
@@ -11,6 +11,18 @@ and **Cavium OCTEON** families of adapters in SR-IOV context.
 More information can be found at `Marvell Official Website
 
<https://www.marvell.com/content/dam/marvell/en/public-collateral/embedded-processors/marvell-liquidio-III-solutions-brief.pdf>`_.
 
+Runtime Config Options
+--
+
+- ``Rx&Tx ISM memory accesses enable`` (default ``0``)
+
+   PMD supports 2 modes for checking Rx & Tx packet count, PMD may read the 
packet count directly
+   from hardware registers or it may read from ISM memory, this may be 
selected at runtime
+   using ``ism_enable`` ``devargs`` parameter.
+
+   For example::
+
+  -a 0002:02:00.0,ism_enable=1
 
 Prerequisites
 -
diff --git a/drivers/net/octeon_ep/cnxk_ep_rx.h 
b/drivers/net/octeon_ep/cnxk_ep_rx.h
index 61263e651e..ecf95cd961 100644
--- a/drivers/net/octeon_ep/cnxk_ep_rx.h
+++ b/drivers/net/octeon_ep/cnxk_ep_rx.h
@@ -88,8 +88,9 @@ cnxk_ep_rx_refill(struct otx_ep_droq *droq)
 }
 
 static inline uint32_t
-cnxk_ep_check_rx_pkts(struct otx_ep_droq *droq)
+cnxk_ep_check_rx_ism_mem(void *rx_queue)
 {
+   struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
uint32_t new_pkts;
uint32_t val;
 
@@ -98,8 +99,9 @@ cnxk_ep_check_rx_pkts(struct otx_ep_droq *droq)
 * number of PCIe writes.
 */
val = __atomic_load_n(droq->pkts_sent_ism, __ATOMIC_RELAXED);
-   new_pkts = val - droq->pkts_sent_ism_prev;
-   droq->pkts_sent_ism_prev = val;
+
+   new_pkts = val - droq->pkts_sent_prev;
+   droq->pkts_sent_prev = val;
 
if (val > RTE_BIT32(31)) {
/* Only subtract the packet count in the HW counter
@@ -113,11 +115,34 @@ cnxk_ep_check_rx_pkts(struct otx_ep_droq *droq)
rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
rte_mb();
}
-
-   droq->pkts_sent_ism_prev = 0;
+   droq->pkts_sent_prev = 0;
}
+
rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
-   droq->pkts_pending += new_pkts;
+
+   return new_pkts;
+}
+
+static inline uint32_t
+cnxk_ep_check_rx_pkt_reg(void *rx_queue)
+{
+   struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
+   uint32_t new_pkts;
+   uint32_t val;
+
+   val = rte_read32(droq->pkts_sent_reg);
+
+   new_pkts = val - droq->pkts_sent_prev;
+   droq->pkts_sent_prev = val;
+
+   if (val > RTE_BIT32(31)) {
+   /* Only subtract the packet count in the HW counter
+* when count above halfway to saturation.
+*/
+   rte_write64((uint64_t)val, droq->pkts_sent_reg);
+   rte_mb();
+   droq->pkts_sent_prev = 0;
+   }
 
return new_pkts;
 }
@@ -125,8 +150,11 @@ cnxk_ep_check_rx_pkts(struct otx_ep_droq *droq)
 static inline int16_t __rte_hot
 cnxk_ep_rx_pkts_to_process(struct otx_ep_droq *droq, uint16_t nb_pkts)
 {
+   const otx_ep_check_pkt_count_t cnxk_rx_pkt_count[2] = { 
cnxk_ep_check_rx_pkt_reg,
+   
cnxk_ep_check_rx_ism_mem};
+
if (droq->pkts_pending < nb_pkts)
-   cnxk_ep_check_rx_pkts(droq);
+   droq->pkts_pending += cnxk_rx_pkt_count[droq->ism_ena](droq);
 
return RTE_MIN(nb_pkts, droq->pkts_pending);
 }
diff --git a/drivers/net/octeon_ep/cnxk_ep_tx.c 
b/drivers/net/octeon_ep/cnxk_ep_tx.c
index 9f11a2f317..98c0a861c3 100644
--- a/drivers/net/octeon_ep/cnxk_ep_tx.c
+++ b/drivers/net/octeon_ep/cnxk_ep_tx.c
@@ -5,9 +5,10 @@
 #include "cnxk_ep_vf.h"
 #include "otx_ep_rxtx.h"
 
-static uint32_t
-cnxk_vf_update_read_index(struct otx_ep_instr_queue *iq)
+static inline uint32_t
+cnxk_ep_check_tx_ism_mem(void *tx_queue)
 {
+   struct otx_ep_instr_queue *iq = (struct otx_ep_instr_queue *)tx_queue;
uin

[PATCH v2 1/1] net/octeon_ep: use devarg to enable ISM accesses

2024-02-26 Thread Vamsi Attunuru
Adds a devarg option to enable/disable ISM memory accesses
for reading packet count details. This option is disabled
by default, as ISM memory accesses effect throughput of
bigger size packets.

Signed-off-by: Vamsi Attunuru 
---
V2 changes:
- Updated release notes and documentation
- Added missing degarg string

 doc/guides/nics/octeon_ep.rst  | 14 
 doc/guides/rel_notes/release_24_03.rst |  2 ++
 drivers/net/octeon_ep/cnxk_ep_rx.h | 42 
 drivers/net/octeon_ep/cnxk_ep_tx.c | 42 
 drivers/net/octeon_ep/cnxk_ep_vf.c |  4 +--
 drivers/net/octeon_ep/otx2_ep_vf.c |  4 +--
 drivers/net/octeon_ep/otx_ep_common.h  | 14 ++--
 drivers/net/octeon_ep/otx_ep_ethdev.c  | 45 ++
 drivers/net/octeon_ep/otx_ep_rxtx.c| 15 +
 drivers/net/octeon_ep/otx_ep_rxtx.h|  2 ++
 10 files changed, 159 insertions(+), 25 deletions(-)

diff --git a/doc/guides/nics/octeon_ep.rst b/doc/guides/nics/octeon_ep.rst
index b5040aeee2..db2ff0e7c1 100644
--- a/doc/guides/nics/octeon_ep.rst
+++ b/doc/guides/nics/octeon_ep.rst
@@ -11,6 +11,20 @@ and **Cavium OCTEON** families of adapters in SR-IOV context.
 More information can be found at `Marvell Official Website
 
<https://www.marvell.com/content/dam/marvell/en/public-collateral/embedded-processors/marvell-liquidio-III-solutions-brief.pdf>`_.
 
+Runtime Config Options
+--
+
+- ``Rx&Tx ISM memory accesses enable`` (default ``0``)
+
+   PMD supports two modes for checking Rx & Tx packet count, PMD may read the 
packet count directly
+   from hardware registers or it may read from ISM memory, this may be 
selected at runtime using
+   ``ism_enable`` ``devargs`` parameter. Performance is higher for bigger size 
packets with default
+   value(ism_enable = 0). Use this runtime option to enable ISM memory 
accesses to get better
+   performance for lower size packets.
+
+   For example::
+
+  -a 0002:02:00.0,ism_enable=1
 
 Prerequisites
 -
diff --git a/doc/guides/rel_notes/release_24_03.rst 
b/doc/guides/rel_notes/release_24_03.rst
index 4b3e26ebf6..74ec43ca64 100644
--- a/doc/guides/rel_notes/release_24_03.rst
+++ b/doc/guides/rel_notes/release_24_03.rst
@@ -121,6 +121,8 @@ New Features
   * Added optimized SSE Rx routines.
   * Added optimized AVX2 Rx routines.
   * Added optimized NEON Rx routines.
+  * Added devarg to enable/disable ISM memory accesses which gives better 
performance
+for lower packet sizes when enabled.
 
 * **Updated NVIDIA mlx5 driver.**
 
diff --git a/drivers/net/octeon_ep/cnxk_ep_rx.h 
b/drivers/net/octeon_ep/cnxk_ep_rx.h
index 61263e651e..ecf95cd961 100644
--- a/drivers/net/octeon_ep/cnxk_ep_rx.h
+++ b/drivers/net/octeon_ep/cnxk_ep_rx.h
@@ -88,8 +88,9 @@ cnxk_ep_rx_refill(struct otx_ep_droq *droq)
 }
 
 static inline uint32_t
-cnxk_ep_check_rx_pkts(struct otx_ep_droq *droq)
+cnxk_ep_check_rx_ism_mem(void *rx_queue)
 {
+   struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
uint32_t new_pkts;
uint32_t val;
 
@@ -98,8 +99,9 @@ cnxk_ep_check_rx_pkts(struct otx_ep_droq *droq)
 * number of PCIe writes.
 */
val = __atomic_load_n(droq->pkts_sent_ism, __ATOMIC_RELAXED);
-   new_pkts = val - droq->pkts_sent_ism_prev;
-   droq->pkts_sent_ism_prev = val;
+
+   new_pkts = val - droq->pkts_sent_prev;
+   droq->pkts_sent_prev = val;
 
if (val > RTE_BIT32(31)) {
/* Only subtract the packet count in the HW counter
@@ -113,11 +115,34 @@ cnxk_ep_check_rx_pkts(struct otx_ep_droq *droq)
rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
rte_mb();
}
-
-   droq->pkts_sent_ism_prev = 0;
+   droq->pkts_sent_prev = 0;
}
+
rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
-   droq->pkts_pending += new_pkts;
+
+   return new_pkts;
+}
+
+static inline uint32_t
+cnxk_ep_check_rx_pkt_reg(void *rx_queue)
+{
+   struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
+   uint32_t new_pkts;
+   uint32_t val;
+
+   val = rte_read32(droq->pkts_sent_reg);
+
+   new_pkts = val - droq->pkts_sent_prev;
+   droq->pkts_sent_prev = val;
+
+   if (val > RTE_BIT32(31)) {
+   /* Only subtract the packet count in the HW counter
+* when count above halfway to saturation.
+*/
+   rte_write64((uint64_t)val, droq->pkts_sent_reg);
+   rte_mb();
+   droq->pkts_sent_prev = 0;
+   }
 
return new_pkts;
 }
@@ -125,8 +150,11 @@ cnxk_ep_check_rx_pkts(struct otx_ep_droq *droq)
 static inline int16_t __rte_hot
 cnxk_ep_rx_pkts_to_process(struct otx_ep_droq *droq, uint16_t nb_pkts)
 {
+   const otx_ep_che

[PATCH 0/4] app: introduce testgraph application

2023-04-20 Thread Vamsi Attunuru
This patch series introduces testgraph application that verifies graph
architecture, it provides an infra to verify the graph & node libraries
and scale the test coverage by adding newer configurations to exercise
various graph topologies & graph-walk models required by the DPDK
applications.

Also this series adds two new nodes (punt_kernel & kernel_recv) to the
node library.

Vamsi Attunuru (4):
  node: add pkt punt to kernel node
  node: add a node to receive pkts from kernel
  node: remove hardcoded node next details
  app: add testgraph application

 app/meson.build |1 +
 app/test-graph/cmdline.c|  212 +
 app/test-graph/cmdline_graph.c  |  297 ++
 app/test-graph/cmdline_graph.h  |   19 +
 app/test-graph/meson.build  |   17 +
 app/test-graph/parameters.c |  157 
 app/test-graph/testgraph.c  | 1309 +++
 app/test-graph/testgraph.h  |   92 ++
 doc/guides/prog_guide/graph_lib.rst |   17 +
 doc/guides/tools/index.rst  |1 +
 doc/guides/tools/testgraph.rst  |  131 +++
 lib/node/ethdev_rx.c|2 -
 lib/node/kernel_recv.c  |  277 ++
 lib/node/kernel_recv_priv.h |   74 ++
 lib/node/meson.build|2 +
 lib/node/punt_kernel.c  |  125 +++
 lib/node/punt_kernel_priv.h |   36 +
 17 files changed, 2767 insertions(+), 2 deletions(-)
 create mode 100644 app/test-graph/cmdline.c
 create mode 100644 app/test-graph/cmdline_graph.c
 create mode 100644 app/test-graph/cmdline_graph.h
 create mode 100644 app/test-graph/meson.build
 create mode 100644 app/test-graph/parameters.c
 create mode 100644 app/test-graph/testgraph.c
 create mode 100644 app/test-graph/testgraph.h
 create mode 100644 doc/guides/tools/testgraph.rst
 create mode 100644 lib/node/kernel_recv.c
 create mode 100644 lib/node/kernel_recv_priv.h
 create mode 100644 lib/node/punt_kernel.c
 create mode 100644 lib/node/punt_kernel_priv.h

-- 
2.25.1



[PATCH 1/4] node: add pkt punt to kernel node

2023-04-20 Thread Vamsi Attunuru
Patch adds a node to punt the packets to kernel over
a raw socket.

Signed-off-by: Vamsi Attunuru 
---
 doc/guides/prog_guide/graph_lib.rst |  10 +++
 lib/node/meson.build|   1 +
 lib/node/punt_kernel.c  | 125 
 lib/node/punt_kernel_priv.h |  36 
 4 files changed, 172 insertions(+)

diff --git a/doc/guides/prog_guide/graph_lib.rst 
b/doc/guides/prog_guide/graph_lib.rst
index 1cfdc86433..b3b5b14827 100644
--- a/doc/guides/prog_guide/graph_lib.rst
+++ b/doc/guides/prog_guide/graph_lib.rst
@@ -392,3 +392,13 @@ null
 
 This node ignores the set of objects passed to it and reports that all are
 processed.
+
+punt_kernel
+~~~
+This node punts the packets to kernel using a raw socket interface. For sending
+the received packets, raw socket uses the packet's destination IP address in
+sockaddr_in address structure and node uses ``sendto`` function to send data
+on the raw socket.
+
+Aftering sending the burst of packets to kernel, this node redirects the same
+objects to pkt_drop node to free up the packet buffers.
diff --git a/lib/node/meson.build b/lib/node/meson.build
index dbdf673c86..48c2da73f7 100644
--- a/lib/node/meson.build
+++ b/lib/node/meson.build
@@ -17,6 +17,7 @@ sources = files(
 'null.c',
 'pkt_cls.c',
 'pkt_drop.c',
+'punt_kernel.c',
 )
 headers = files('rte_node_ip4_api.h', 'rte_node_eth_api.h')
 # Strict-aliasing rules are violated by uint8_t[] to context size casts.
diff --git a/lib/node/punt_kernel.c b/lib/node/punt_kernel.c
new file mode 100644
index 00..e5dd15b759
--- /dev/null
+++ b/lib/node/punt_kernel.c
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2023 Marvell International Ltd.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "node_private.h"
+#include "punt_kernel_priv.h"
+
+static __rte_always_inline void
+punt_kernel_process_mbuf(struct rte_node *node, struct rte_mbuf **mbufs, 
uint16_t cnt)
+{
+   punt_kernel_node_ctx_t *ctx = (punt_kernel_node_ctx_t *)node->ctx;
+   struct sockaddr_in sin = {0};
+   struct rte_ipv4_hdr *ip4;
+   size_t len;
+   char *buf;
+   int i;
+
+   for (i = 0; i < cnt; i++) {
+   ip4 = rte_pktmbuf_mtod(mbufs[i], struct rte_ipv4_hdr *);
+   len = rte_pktmbuf_data_len(mbufs[i]);
+   buf = (char *)ip4;
+
+   sin.sin_family = AF_INET;
+   sin.sin_port = 0;
+   sin.sin_addr.s_addr = ip4->dst_addr;
+
+   if (sendto(ctx->sock, buf, len, 0, (struct sockaddr *)&sin, 
sizeof(sin)) < 0)
+   node_err("punt_kernel", "Unable to send packets: %s\n", 
strerror(errno));
+   }
+}
+
+static uint16_t
+punt_kernel_node_process(struct rte_graph *graph __rte_unused, struct rte_node 
*node, void **objs,
+uint16_t nb_objs)
+{
+   struct rte_mbuf **pkts = (struct rte_mbuf **)objs;
+   uint16_t obj_left = nb_objs;
+
+#define PREFETCH_CNT 4
+
+   while (obj_left >= 12) {
+   /* Prefetch next-next mbufs */
+   rte_prefetch0(pkts[8]);
+   rte_prefetch0(pkts[9]);
+   rte_prefetch0(pkts[10]);
+   rte_prefetch0(pkts[11]);
+
+   /* Prefetch next mbuf data */
+   rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[4], void *, 
pkts[4]->l2_len));
+   rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[5], void *, 
pkts[5]->l2_len));
+   rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[6], void *, 
pkts[6]->l2_len));
+   rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[7], void *, 
pkts[7]->l2_len));
+
+   punt_kernel_process_mbuf(node, pkts, PREFETCH_CNT);
+
+   obj_left -= PREFETCH_CNT;
+   pkts += PREFETCH_CNT;
+   }
+
+   while (obj_left > 0) {
+   punt_kernel_process_mbuf(node, pkts, 1);
+
+   obj_left--;
+   pkts++;
+   }
+
+   rte_node_next_stream_move(graph, node, PUNT_KERNEL_NEXT_PKT_DROP);
+
+   return nb_objs;
+}
+
+static int
+punt_kernel_node_init(const struct rte_graph *graph __rte_unused, struct 
rte_node *node)
+{
+   punt_kernel_node_ctx_t *ctx = (punt_kernel_node_ctx_t *)node->ctx;
+
+   ctx->sock = socket(AF_INET, SOCK_RAW, IPPROTO_RAW);
+   if (ctx->sock < 0)
+   node_err("punt_kernel", "Unable to open RAW socket\n");
+
+   return 0;
+}
+
+static void
+punt_kernel_node_fini(const struct rte_graph *graph __rte_unused, struct 
rte_node *node)
+{
+   punt_kernel_node_ctx_t *ctx = (punt_kernel_node_ctx_t *)node->ctx;
+
+   if (ctx->sock >= 0) {
+   close(ctx->sock);
+  

[PATCH 2/4] node: add a node to receive pkts from kernel

2023-04-20 Thread Vamsi Attunuru
Patch adds a node to receive packets from kernel
over a raw socket.

Signed-off-by: Vamsi Attunuru 
---
 doc/guides/prog_guide/graph_lib.rst |   7 +
 lib/node/kernel_recv.c  | 277 
 lib/node/kernel_recv_priv.h |  74 
 lib/node/meson.build|   1 +
 4 files changed, 359 insertions(+)

diff --git a/doc/guides/prog_guide/graph_lib.rst 
b/doc/guides/prog_guide/graph_lib.rst
index b3b5b14827..1057f16de8 100644
--- a/doc/guides/prog_guide/graph_lib.rst
+++ b/doc/guides/prog_guide/graph_lib.rst
@@ -402,3 +402,10 @@ on the raw socket.
 
 Aftering sending the burst of packets to kernel, this node redirects the same
 objects to pkt_drop node to free up the packet buffers.
+
+kernel_recv
+~~~
+This node receives packets from kernel over a raw socket interface. Uses 
``poll``
+function to poll on the socket fd for ``POLLIN`` events to read the packets 
from
+raw socket to stream buffer and does ``rte_node_next_stream_move()`` when there
+are received packets.
diff --git a/lib/node/kernel_recv.c b/lib/node/kernel_recv.c
new file mode 100644
index 00..361dcc3b5f
--- /dev/null
+++ b/lib/node/kernel_recv.c
@@ -0,0 +1,277 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2023 Marvell International Ltd.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "ethdev_rx_priv.h"
+#include "kernel_recv_priv.h"
+#include "node_private.h"
+
+static struct kernel_recv_node_main kernel_recv_main;
+
+static inline struct rte_mbuf *
+alloc_rx_mbuf(kernel_recv_node_ctx_t *ctx)
+{
+   kernel_recv_info_t *rx = ctx->recv_info;
+
+   if (rx->idx >= rx->cnt) {
+   uint16_t cnt;
+
+   rx->idx = 0;
+   rx->cnt = 0;
+
+   cnt = rte_pktmbuf_alloc_bulk(ctx->pktmbuf_pool, rx->rx_bufs, 
KERN_RECV_CACHE_COUNT);
+   if (cnt <= 0)
+   return NULL;
+
+   rx->cnt = cnt;
+   }
+
+   return rx->rx_bufs[rx->idx++];
+}
+
+static inline void
+mbuf_update(struct rte_mbuf **mbufs, uint16_t nb_pkts)
+{
+   struct rte_net_hdr_lens hdr_lens;
+   struct rte_mbuf *m;
+   int i;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = mbufs[i];
+
+   m->packet_type = rte_net_get_ptype(m, &hdr_lens, 
RTE_PTYPE_ALL_MASK);
+
+   m->ol_flags = 0;
+   m->tx_offload = 0;
+
+   m->l2_len = hdr_lens.l2_len;
+   m->l3_len = hdr_lens.l3_len;
+   m->l4_len = hdr_lens.l4_len;
+   }
+}
+
+static uint16_t
+recv_pkt_parse(void **objs, uint16_t nb_pkts)
+{
+   uint16_t pkts_left = nb_pkts;
+   struct rte_mbuf **pkts;
+   int i;
+
+   pkts = (struct rte_mbuf **)objs;
+
+   if (pkts_left >= 4) {
+   for (i = 0; i < 4; i++)
+   rte_prefetch0(rte_pktmbuf_mtod(pkts[i], void *));
+   }
+
+   while (pkts_left >= 12) {
+   /* Prefetch next-next mbufs */
+   rte_prefetch0(pkts[8]);
+   rte_prefetch0(pkts[9]);
+   rte_prefetch0(pkts[10]);
+   rte_prefetch0(pkts[11]);
+
+   /* Prefetch next mbuf data */
+   rte_prefetch0(rte_pktmbuf_mtod(pkts[4], void *));
+   rte_prefetch0(rte_pktmbuf_mtod(pkts[5], void *));
+   rte_prefetch0(rte_pktmbuf_mtod(pkts[6], void *));
+   rte_prefetch0(rte_pktmbuf_mtod(pkts[7], void *));
+
+   /* Extract ptype of mbufs */
+   mbuf_update(pkts, 4);
+
+   pkts += 4;
+   pkts_left -= 4;
+   }
+
+   if (pkts_left > 0)
+   mbuf_update(pkts, pkts_left);
+
+   return nb_pkts;
+}
+
+static uint16_t
+kernel_recv_node_do(struct rte_graph *graph, struct rte_node *node, 
kernel_recv_node_ctx_t *ctx)
+{
+   kernel_recv_info_t *rx;
+   uint16_t next_index;
+   int fd;
+
+   rx = ctx->recv_info;
+   next_index = rx->cls_next;
+
+   fd = rx->sock;
+   if (fd > 0) {
+   struct rte_mbuf **mbufs;
+   uint16_t len = 0, count = 0;
+   int nb_cnt, i;
+
+   nb_cnt = (node->size >= RTE_GRAPH_BURST_SIZE) ? 
RTE_GRAPH_BURST_SIZE : node->size;
+
+   mbufs = (struct rte_mbuf **)node->objs;
+   for (i = 0; i < nb_cnt; i++) {
+   struct rte_mbuf *m = alloc_rx_mbuf(ctx);
+
+   if (!m)
+   break;
+
+   len = read(fd, rte_pktmbuf_mtod(m, char *), 
rte_pktmbuf_tailroom(m));
+   if (len == 0 || len == 0x) {
+   rte_pktmbuf_free(m);
+ 

[PATCH 3/4] node: remove hardcoded node next details

2023-04-20 Thread Vamsi Attunuru
For ethdev_rx node, node_next details can be populated
during node cloning time and same gets assigned to
node context structure during node initialization.

Patch removes overriding node_next details in node
init().

Signed-off-by: Vamsi Attunuru 
---
 lib/node/ethdev_rx.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/lib/node/ethdev_rx.c b/lib/node/ethdev_rx.c
index a19237b42f..85816c489c 100644
--- a/lib/node/ethdev_rx.c
+++ b/lib/node/ethdev_rx.c
@@ -194,8 +194,6 @@ ethdev_rx_node_init(const struct rte_graph *graph, struct 
rte_node *node)
 
RTE_VERIFY(elem != NULL);
 
-   ctx->cls_next = ETHDEV_RX_NEXT_PKT_CLS;
-
/* Check and setup ptype */
return ethdev_ptype_setup(ctx->port_id, ctx->queue_id);
 }
-- 
2.25.1



[PATCH 4/4] app: add testgraph application

2023-04-20 Thread Vamsi Attunuru
Patch adds test-graph application to validate graph
and node libraries.

Signed-off-by: Vamsi Attunuru 
---
 app/meson.build|1 +
 app/test-graph/cmdline.c   |  212 ++
 app/test-graph/cmdline_graph.c |  297 
 app/test-graph/cmdline_graph.h |   19 +
 app/test-graph/meson.build |   17 +
 app/test-graph/parameters.c|  157 
 app/test-graph/testgraph.c | 1309 
 app/test-graph/testgraph.h |   92 +++
 doc/guides/tools/index.rst |1 +
 doc/guides/tools/testgraph.rst |  131 
 10 files changed, 2236 insertions(+)

diff --git a/app/meson.build b/app/meson.build
index 74d2420f67..6c7b24e604 100644
--- a/app/meson.build
+++ b/app/meson.build
@@ -22,6 +22,7 @@ apps = [
 'test-eventdev',
 'test-fib',
 'test-flow-perf',
+'test-graph',
 'test-gpudev',
 'test-mldev',
 'test-pipeline',
diff --git a/app/test-graph/cmdline.c b/app/test-graph/cmdline.c
new file mode 100644
index 00..a07a8a24f9
--- /dev/null
+++ b/app/test-graph/cmdline.c
@@ -0,0 +1,212 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2023 Marvell International Ltd.
+ */
+
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "cmdline_graph.h"
+#include "testgraph.h"
+
+static struct cmdline *testgraph_cl;
+static cmdline_parse_ctx_t *main_ctx;
+
+/* *** Help command with introduction. *** */
+struct cmd_help_brief_result {
+   cmdline_fixed_string_t help;
+};
+
+static void
+cmd_help_brief_parsed(__rte_unused void *parsed_result, struct cmdline *cl, 
__rte_unused void *data)
+{
+   cmdline_printf(cl,
+  "\n"
+  "Help is available for the following sections:\n\n"
+  "help control: Start and stop 
graph walk.\n"
+  "help display: Displaying port, 
stats and config "
+  "information.\n"
+  "help config : Configuration 
information.\n"
+  "help all: All of the above 
sections.\n\n");
+}
+
+static cmdline_parse_token_string_t cmd_help_brief_help =
+   TOKEN_STRING_INITIALIZER(struct cmd_help_brief_result, help, "help");
+
+static cmdline_parse_inst_t cmd_help_brief = {
+   .f = cmd_help_brief_parsed,
+   .data = NULL,
+   .help_str = "help: Show help",
+   .tokens = {
+   (void *)&cmd_help_brief_help,
+   NULL,
+   },
+};
+
+/* *** Help command with help sections. *** */
+struct cmd_help_long_result {
+   cmdline_fixed_string_t help;
+   cmdline_fixed_string_t section;
+};
+
+static void
+cmd_help_long_parsed(void *parsed_result, struct cmdline *cl, __rte_unused 
void *data)
+{
+   int show_all = 0;
+   struct cmd_help_long_result *res = parsed_result;
+
+   if (!strcmp(res->section, "all"))
+   show_all = 1;
+
+   if (show_all || !strcmp(res->section, "control")) {
+
+   cmdline_printf(cl, "\n"
+  "Control forwarding:\n"
+  "---\n\n"
+
+  "start graph_walk\n"
+  " Start graph_walk on worker threads.\n\n"
+
+  "stop graph_walk\n"
+  " Stop worker threads from running 
graph_walk.\n\n"
+
+  "quit\n"
+  "Quit to prompt.\n\n");
+   }
+
+   if (show_all || !strcmp(res->section, "display")) {
+
+   cmdline_printf(cl,
+  "\n"
+  "Display:\n"
+  "\n\n"
+
+  "show node_list\n"
+  " Display the list of supported nodes.\n\n"
+
+  "show graph_stats\n"
+  " Display the node statistics of graph 
cluster.\n\n");
+   }
+
+   if (show_all || !strcmp(res->section, "config")) {
+   cmdline_printf(cl, "\n"
+  "Configuration:\n"
+  "--\n"
+  "set lcore_config 
(port_id0,rxq0,lcore_idX),..."
+  "...

[PATCH v2 0/4] app: introduce testgraph application

2023-04-25 Thread Vamsi Attunuru
This patch series introduces testgraph application that verifies graph
architecture, it provides an infra to verify the graph & node libraries
and scale the test coverage by adding newer configurations to exercise
various graph topologies & graph-walk models required by the DPDK
applications.

Also this series adds two new nodes (punt_kernel & kernel_recv) to the
node library.

V2:
* Handle error checks in testgraph application
* Extend supported test node patterns
* Fix warnings

Vamsi Attunuru (4):
  node: add pkt punt to kernel node
  node: add a node to receive pkts from kernel
  node: remove hardcoded node next details
  app: add testgraph application

 app/meson.build |1 +
 app/test-graph/cmdline.c|  211 
 app/test-graph/cmdline_graph.c  |  294 ++
 app/test-graph/cmdline_graph.h  |   19 +
 app/test-graph/meson.build  |   14 +
 app/test-graph/parameters.c |  157 +++
 app/test-graph/testgraph.c  | 1426 +++
 app/test-graph/testgraph.h  |   91 ++
 doc/guides/prog_guide/graph_lib.rst |   17 +
 doc/guides/tools/index.rst  |1 +
 doc/guides/tools/testgraph.rst  |  131 +++
 lib/node/ethdev_rx.c|2 -
 lib/node/kernel_recv.c  |  276 ++
 lib/node/kernel_recv_priv.h |   74 ++
 lib/node/meson.build|2 +
 lib/node/punt_kernel.c  |  125 +++
 lib/node/punt_kernel_priv.h |   36 +
 17 files changed, 2875 insertions(+), 2 deletions(-)
 create mode 100644 app/test-graph/cmdline.c
 create mode 100644 app/test-graph/cmdline_graph.c
 create mode 100644 app/test-graph/cmdline_graph.h
 create mode 100644 app/test-graph/meson.build
 create mode 100644 app/test-graph/parameters.c
 create mode 100644 app/test-graph/testgraph.c
 create mode 100644 app/test-graph/testgraph.h
 create mode 100644 doc/guides/tools/testgraph.rst
 create mode 100644 lib/node/kernel_recv.c
 create mode 100644 lib/node/kernel_recv_priv.h
 create mode 100644 lib/node/punt_kernel.c
 create mode 100644 lib/node/punt_kernel_priv.h

-- 
2.25.1



[PATCH v2 1/4] node: add pkt punt to kernel node

2023-04-25 Thread Vamsi Attunuru
Patch adds a node to punt the packets to kernel over
a raw socket.

Signed-off-by: Vamsi Attunuru 
---
 doc/guides/prog_guide/graph_lib.rst |  10 +++
 lib/node/meson.build|   1 +
 lib/node/punt_kernel.c  | 125 
 lib/node/punt_kernel_priv.h |  36 
 4 files changed, 172 insertions(+)

diff --git a/doc/guides/prog_guide/graph_lib.rst 
b/doc/guides/prog_guide/graph_lib.rst
index 1cfdc86433..b3b5b14827 100644
--- a/doc/guides/prog_guide/graph_lib.rst
+++ b/doc/guides/prog_guide/graph_lib.rst
@@ -392,3 +392,13 @@ null
 
 This node ignores the set of objects passed to it and reports that all are
 processed.
+
+punt_kernel
+~~~
+This node punts the packets to kernel using a raw socket interface. For sending
+the received packets, raw socket uses the packet's destination IP address in
+sockaddr_in address structure and node uses ``sendto`` function to send data
+on the raw socket.
+
+Aftering sending the burst of packets to kernel, this node redirects the same
+objects to pkt_drop node to free up the packet buffers.
diff --git a/lib/node/meson.build b/lib/node/meson.build
index dbdf673c86..48c2da73f7 100644
--- a/lib/node/meson.build
+++ b/lib/node/meson.build
@@ -17,6 +17,7 @@ sources = files(
 'null.c',
 'pkt_cls.c',
 'pkt_drop.c',
+'punt_kernel.c',
 )
 headers = files('rte_node_ip4_api.h', 'rte_node_eth_api.h')
 # Strict-aliasing rules are violated by uint8_t[] to context size casts.
diff --git a/lib/node/punt_kernel.c b/lib/node/punt_kernel.c
new file mode 100644
index 00..e5dd15b759
--- /dev/null
+++ b/lib/node/punt_kernel.c
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2023 Marvell International Ltd.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "node_private.h"
+#include "punt_kernel_priv.h"
+
+static __rte_always_inline void
+punt_kernel_process_mbuf(struct rte_node *node, struct rte_mbuf **mbufs, 
uint16_t cnt)
+{
+   punt_kernel_node_ctx_t *ctx = (punt_kernel_node_ctx_t *)node->ctx;
+   struct sockaddr_in sin = {0};
+   struct rte_ipv4_hdr *ip4;
+   size_t len;
+   char *buf;
+   int i;
+
+   for (i = 0; i < cnt; i++) {
+   ip4 = rte_pktmbuf_mtod(mbufs[i], struct rte_ipv4_hdr *);
+   len = rte_pktmbuf_data_len(mbufs[i]);
+   buf = (char *)ip4;
+
+   sin.sin_family = AF_INET;
+   sin.sin_port = 0;
+   sin.sin_addr.s_addr = ip4->dst_addr;
+
+   if (sendto(ctx->sock, buf, len, 0, (struct sockaddr *)&sin, 
sizeof(sin)) < 0)
+   node_err("punt_kernel", "Unable to send packets: %s\n", 
strerror(errno));
+   }
+}
+
+static uint16_t
+punt_kernel_node_process(struct rte_graph *graph __rte_unused, struct rte_node 
*node, void **objs,
+uint16_t nb_objs)
+{
+   struct rte_mbuf **pkts = (struct rte_mbuf **)objs;
+   uint16_t obj_left = nb_objs;
+
+#define PREFETCH_CNT 4
+
+   while (obj_left >= 12) {
+   /* Prefetch next-next mbufs */
+   rte_prefetch0(pkts[8]);
+   rte_prefetch0(pkts[9]);
+   rte_prefetch0(pkts[10]);
+   rte_prefetch0(pkts[11]);
+
+   /* Prefetch next mbuf data */
+   rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[4], void *, 
pkts[4]->l2_len));
+   rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[5], void *, 
pkts[5]->l2_len));
+   rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[6], void *, 
pkts[6]->l2_len));
+   rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[7], void *, 
pkts[7]->l2_len));
+
+   punt_kernel_process_mbuf(node, pkts, PREFETCH_CNT);
+
+   obj_left -= PREFETCH_CNT;
+   pkts += PREFETCH_CNT;
+   }
+
+   while (obj_left > 0) {
+   punt_kernel_process_mbuf(node, pkts, 1);
+
+   obj_left--;
+   pkts++;
+   }
+
+   rte_node_next_stream_move(graph, node, PUNT_KERNEL_NEXT_PKT_DROP);
+
+   return nb_objs;
+}
+
+static int
+punt_kernel_node_init(const struct rte_graph *graph __rte_unused, struct 
rte_node *node)
+{
+   punt_kernel_node_ctx_t *ctx = (punt_kernel_node_ctx_t *)node->ctx;
+
+   ctx->sock = socket(AF_INET, SOCK_RAW, IPPROTO_RAW);
+   if (ctx->sock < 0)
+   node_err("punt_kernel", "Unable to open RAW socket\n");
+
+   return 0;
+}
+
+static void
+punt_kernel_node_fini(const struct rte_graph *graph __rte_unused, struct 
rte_node *node)
+{
+   punt_kernel_node_ctx_t *ctx = (punt_kernel_node_ctx_t *)node->ctx;
+
+   if (ctx->sock >= 0) {
+   close(ctx->sock);
+  

[PATCH v2 2/4] node: add a node to receive pkts from kernel

2023-04-25 Thread Vamsi Attunuru
Patch adds a node to receive packets from kernel
over a raw socket.

Signed-off-by: Vamsi Attunuru 
---
 doc/guides/prog_guide/graph_lib.rst |   7 +
 lib/node/kernel_recv.c  | 276 
 lib/node/kernel_recv_priv.h |  74 
 lib/node/meson.build|   1 +
 4 files changed, 358 insertions(+)

diff --git a/doc/guides/prog_guide/graph_lib.rst 
b/doc/guides/prog_guide/graph_lib.rst
index b3b5b14827..1057f16de8 100644
--- a/doc/guides/prog_guide/graph_lib.rst
+++ b/doc/guides/prog_guide/graph_lib.rst
@@ -402,3 +402,10 @@ on the raw socket.
 
 Aftering sending the burst of packets to kernel, this node redirects the same
 objects to pkt_drop node to free up the packet buffers.
+
+kernel_recv
+~~~
+This node receives packets from kernel over a raw socket interface. Uses 
``poll``
+function to poll on the socket fd for ``POLLIN`` events to read the packets 
from
+raw socket to stream buffer and does ``rte_node_next_stream_move()`` when there
+are received packets.
diff --git a/lib/node/kernel_recv.c b/lib/node/kernel_recv.c
new file mode 100644
index 00..9b28ad76d3
--- /dev/null
+++ b/lib/node/kernel_recv.c
@@ -0,0 +1,276 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2023 Marvell International Ltd.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "ethdev_rx_priv.h"
+#include "kernel_recv_priv.h"
+#include "node_private.h"
+
+static struct kernel_recv_node_main kernel_recv_main;
+
+static inline struct rte_mbuf *
+alloc_rx_mbuf(kernel_recv_node_ctx_t *ctx)
+{
+   kernel_recv_info_t *rx = ctx->recv_info;
+
+   if (rx->idx >= rx->cnt) {
+   uint16_t cnt;
+
+   rx->idx = 0;
+   rx->cnt = 0;
+
+   cnt = rte_pktmbuf_alloc_bulk(ctx->pktmbuf_pool, rx->rx_bufs, 
KERN_RECV_CACHE_COUNT);
+   if (cnt <= 0)
+   return NULL;
+
+   rx->cnt = cnt;
+   }
+
+   return rx->rx_bufs[rx->idx++];
+}
+
+static inline void
+mbuf_update(struct rte_mbuf **mbufs, uint16_t nb_pkts)
+{
+   struct rte_net_hdr_lens hdr_lens;
+   struct rte_mbuf *m;
+   int i;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = mbufs[i];
+
+   m->packet_type = rte_net_get_ptype(m, &hdr_lens, 
RTE_PTYPE_ALL_MASK);
+
+   m->ol_flags = 0;
+   m->tx_offload = 0;
+
+   m->l2_len = hdr_lens.l2_len;
+   m->l3_len = hdr_lens.l3_len;
+   m->l4_len = hdr_lens.l4_len;
+   }
+}
+
+static uint16_t
+recv_pkt_parse(void **objs, uint16_t nb_pkts)
+{
+   uint16_t pkts_left = nb_pkts;
+   struct rte_mbuf **pkts;
+   int i;
+
+   pkts = (struct rte_mbuf **)objs;
+
+   if (pkts_left >= 4) {
+   for (i = 0; i < 4; i++)
+   rte_prefetch0(rte_pktmbuf_mtod(pkts[i], void *));
+   }
+
+   while (pkts_left >= 12) {
+   /* Prefetch next-next mbufs */
+   rte_prefetch0(pkts[8]);
+   rte_prefetch0(pkts[9]);
+   rte_prefetch0(pkts[10]);
+   rte_prefetch0(pkts[11]);
+
+   /* Prefetch next mbuf data */
+   rte_prefetch0(rte_pktmbuf_mtod(pkts[4], void *));
+   rte_prefetch0(rte_pktmbuf_mtod(pkts[5], void *));
+   rte_prefetch0(rte_pktmbuf_mtod(pkts[6], void *));
+   rte_prefetch0(rte_pktmbuf_mtod(pkts[7], void *));
+
+   /* Extract ptype of mbufs */
+   mbuf_update(pkts, 4);
+
+   pkts += 4;
+   pkts_left -= 4;
+   }
+
+   if (pkts_left > 0)
+   mbuf_update(pkts, pkts_left);
+
+   return nb_pkts;
+}
+
+static uint16_t
+kernel_recv_node_do(struct rte_graph *graph, struct rte_node *node, 
kernel_recv_node_ctx_t *ctx)
+{
+   kernel_recv_info_t *rx;
+   uint16_t next_index;
+   int fd;
+
+   rx = ctx->recv_info;
+   next_index = rx->cls_next;
+
+   fd = rx->sock;
+   if (fd > 0) {
+   struct rte_mbuf **mbufs;
+   uint16_t len = 0, count = 0;
+   int nb_cnt, i;
+
+   nb_cnt = (node->size >= RTE_GRAPH_BURST_SIZE) ? 
RTE_GRAPH_BURST_SIZE : node->size;
+
+   mbufs = (struct rte_mbuf **)node->objs;
+   for (i = 0; i < nb_cnt; i++) {
+   struct rte_mbuf *m = alloc_rx_mbuf(ctx);
+
+   if (!m)
+   break;
+
+   len = read(fd, rte_pktmbuf_mtod(m, char *), 
rte_pktmbuf_tailroom(m));
+   if (len == 0 || len == 0x) {
+   rte_pktmbuf_free(m);
+ 

[PATCH v2 3/4] node: remove hardcoded node next details

2023-04-25 Thread Vamsi Attunuru
For ethdev_rx node, node_next details can be populated
during node cloning time and same gets assigned to
node context structure during node initialization.

Patch removes overriding node_next details in node
init().

Signed-off-by: Vamsi Attunuru 
---
 lib/node/ethdev_rx.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/lib/node/ethdev_rx.c b/lib/node/ethdev_rx.c
index a19237b42f..85816c489c 100644
--- a/lib/node/ethdev_rx.c
+++ b/lib/node/ethdev_rx.c
@@ -194,8 +194,6 @@ ethdev_rx_node_init(const struct rte_graph *graph, struct 
rte_node *node)
 
RTE_VERIFY(elem != NULL);
 
-   ctx->cls_next = ETHDEV_RX_NEXT_PKT_CLS;
-
/* Check and setup ptype */
return ethdev_ptype_setup(ctx->port_id, ctx->queue_id);
 }
-- 
2.25.1



[PATCH v2 4/4] app: add testgraph application

2023-04-25 Thread Vamsi Attunuru
Patch adds test-graph application to validate graph
and node libraries.

Signed-off-by: Vamsi Attunuru 
---
 app/meson.build|1 +
 app/test-graph/cmdline.c   |  211 +
 app/test-graph/cmdline_graph.c |  294 +++
 app/test-graph/cmdline_graph.h |   19 +
 app/test-graph/meson.build |   14 +
 app/test-graph/parameters.c|  157 
 app/test-graph/testgraph.c | 1426 
 app/test-graph/testgraph.h |   91 ++
 doc/guides/tools/index.rst |1 +
 doc/guides/tools/testgraph.rst |  131 +++
 10 files changed, 2345 insertions(+)

diff --git a/app/meson.build b/app/meson.build
index 74d2420f67..6c7b24e604 100644
--- a/app/meson.build
+++ b/app/meson.build
@@ -22,6 +22,7 @@ apps = [
 'test-eventdev',
 'test-fib',
 'test-flow-perf',
+'test-graph',
 'test-gpudev',
 'test-mldev',
 'test-pipeline',
diff --git a/app/test-graph/cmdline.c b/app/test-graph/cmdline.c
new file mode 100644
index 00..d9474d827a
--- /dev/null
+++ b/app/test-graph/cmdline.c
@@ -0,0 +1,211 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2023 Marvell International Ltd.
+ */
+
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "cmdline_graph.h"
+#include "testgraph.h"
+
+static struct cmdline *testgraph_cl;
+static cmdline_parse_ctx_t *main_ctx;
+
+/* *** Help command with introduction. *** */
+struct cmd_help_brief_result {
+   cmdline_fixed_string_t help;
+};
+
+static void
+cmd_help_brief_parsed(__rte_unused void *parsed_result, struct cmdline *cl, 
__rte_unused void *data)
+{
+   cmdline_printf(cl,
+  "\n"
+  "Help is available for the following sections:\n\n"
+  "help control: Start and stop 
graph walk.\n"
+  "help display: Displaying port, 
stats and config "
+  "information.\n"
+  "help config : Configuration 
information.\n"
+  "help all: All of the above 
sections.\n\n");
+}
+
+static cmdline_parse_token_string_t cmd_help_brief_help =
+   TOKEN_STRING_INITIALIZER(struct cmd_help_brief_result, help, "help");
+
+static cmdline_parse_inst_t cmd_help_brief = {
+   .f = cmd_help_brief_parsed,
+   .data = NULL,
+   .help_str = "help: Show help",
+   .tokens = {
+   (void *)&cmd_help_brief_help,
+   NULL,
+   },
+};
+
+/* *** Help command with help sections. *** */
+struct cmd_help_long_result {
+   cmdline_fixed_string_t help;
+   cmdline_fixed_string_t section;
+};
+
+static void
+cmd_help_long_parsed(void *parsed_result, struct cmdline *cl, __rte_unused 
void *data)
+{
+   int show_all = 0;
+   struct cmd_help_long_result *res = parsed_result;
+
+   if (!strcmp(res->section, "all"))
+   show_all = 1;
+
+   if (show_all || !strcmp(res->section, "control")) {
+
+   cmdline_printf(cl, "\n"
+  "Control forwarding:\n"
+  "---\n\n"
+
+  "start graph_walk\n"
+  " Start graph_walk on worker threads.\n\n"
+
+  "stop graph_walk\n"
+  " Stop worker threads from running 
graph_walk.\n\n"
+
+  "quit\n"
+  "Quit to prompt.\n\n");
+   }
+
+   if (show_all || !strcmp(res->section, "display")) {
+
+   cmdline_printf(cl,
+  "\n"
+  "Display:\n"
+  "\n\n"
+
+  "show node_list\n"
+  " Display the list of supported nodes.\n\n"
+
+  "show graph_stats\n"
+  " Display the node statistics of graph 
cluster.\n\n");
+   }
+
+   if (show_all || !strcmp(res->section, "config")) {
+   cmdline_printf(cl, "\n"
+  "Configuration:\n"
+  "--\n"
+  "set lcore_config 
(port_id0,rxq0,lcore_idX),..."
+  "...

[PATCH] net/octeon_ep: rework transmit routine

2023-06-05 Thread Vamsi Attunuru
Patch optimizes transmit path for multi-seg packets
by pre-allocating the gather list memory instead of
allocating it in fast path.

Signed-off-by: Vamsi Attunuru 
---
 drivers/net/octeon_ep/otx_ep_common.h |  66 +++
 drivers/net/octeon_ep/otx_ep_rxtx.c   | 251 ++
 drivers/net/octeon_ep/otx_ep_rxtx.h   |  11 --
 3 files changed, 130 insertions(+), 198 deletions(-)

diff --git a/drivers/net/octeon_ep/otx_ep_common.h 
b/drivers/net/octeon_ep/otx_ep_common.h
index 0305079af9..42aa065a3a 100644
--- a/drivers/net/octeon_ep/otx_ep_common.h
+++ b/drivers/net/octeon_ep/otx_ep_common.h
@@ -37,6 +37,8 @@
 #define OTX_EP_NORESP_OHSM_SEND (4)
 #define OTX_EP_NORESP_LAST  (4)
 #define OTX_EP_PCI_RING_ALIGN   65536
+#define OTX_EP_MAX_SG_LISTS 4
+#define OTX_EP_NUM_SG_PTRS 4
 #define SDP_PKIND 40
 #define SDP_OTX2_PKIND 57
 #define SDP_OTX2_PKIND_FS0 0
@@ -135,9 +137,40 @@ typedef union otx_ep_instr_ih {
} s;
 } otx_ep_instr_ih_t;
 
+struct otx_ep_sg_entry {
+   /** The first 64 bit gives the size of data in each dptr. */
+   union {
+   uint16_t size[OTX_EP_NUM_SG_PTRS];
+   uint64_t size64;
+   } u;
+
+   /** The 4 dptr pointers for this entry. */
+   uint64_t ptr[OTX_EP_NUM_SG_PTRS];
+};
+
+#define OTX_EP_SG_ENTRY_SIZE   (sizeof(struct otx_ep_sg_entry))
+
+/** Structure of a node in list of gather components maintained by
+ *  driver for each network device.
+ */
+struct otx_ep_gather {
+   /** number of gather entries. */
+   int num_sg;
+
+   /** Gather component that can accommodate max sized fragment list
+*  received from the IP layer.
+*/
+   struct otx_ep_sg_entry *sg;
+};
+
+struct otx_ep_buf_free_info {
+   struct rte_mbuf *mbuf;
+   struct otx_ep_gather g;
+};
+
 /* OTX_EP IQ request list */
 struct otx_ep_instr_list {
-   void *buf;
+   struct otx_ep_buf_free_info finfo;
uint32_t reqtype;
 };
 #define OTX_EP_IQREQ_LIST_SIZE (sizeof(struct otx_ep_instr_list))
@@ -516,37 +549,6 @@ int otx_ep_setup_oqs(struct otx_ep_device *otx_ep, int 
oq_no, int num_descs,
 unsigned int socket_id);
 int otx_ep_delete_oqs(struct otx_ep_device *otx_ep, uint32_t oq_no);
 
-struct otx_ep_sg_entry {
-   /** The first 64 bit gives the size of data in each dptr. */
-   union {
-   uint16_t size[4];
-   uint64_t size64;
-   } u;
-
-   /** The 4 dptr pointers for this entry. */
-   uint64_t ptr[4];
-};
-
-#define OTX_EP_SG_ENTRY_SIZE   (sizeof(struct otx_ep_sg_entry))
-
-/** Structure of a node in list of gather components maintained by
- *  driver for each network device.
- */
-struct otx_ep_gather {
-   /** number of gather entries. */
-   int num_sg;
-
-   /** Gather component that can accommodate max sized fragment list
-*  received from the IP layer.
-*/
-   struct otx_ep_sg_entry *sg;
-};
-
-struct otx_ep_buf_free_info {
-   struct rte_mbuf *mbuf;
-   struct otx_ep_gather g;
-};
-
 #define OTX_EP_MAX_PKT_SZ 65498U
 #define OTX_EP_MAX_MAC_ADDRS 1
 #define OTX_EP_SG_ALIGN 8
diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.c 
b/drivers/net/octeon_ep/otx_ep_rxtx.c
index ca968f6fe7..b37fc8109f 100644
--- a/drivers/net/octeon_ep/otx_ep_rxtx.c
+++ b/drivers/net/octeon_ep/otx_ep_rxtx.c
@@ -49,6 +49,7 @@ int
 otx_ep_delete_iqs(struct otx_ep_device *otx_ep, uint32_t iq_no)
 {
struct otx_ep_instr_queue *iq;
+   uint32_t i;
 
iq = otx_ep->instr_queue[iq_no];
if (iq == NULL) {
@@ -56,7 +57,12 @@ otx_ep_delete_iqs(struct otx_ep_device *otx_ep, uint32_t 
iq_no)
return -EINVAL;
}
 
-   rte_free(iq->req_list);
+   if (iq->req_list) {
+   for (i = 0; i < iq->nb_desc; i++)
+   rte_free(iq->req_list[i].finfo.g.sg);
+   rte_free(iq->req_list);
+   }
+
iq->req_list = NULL;
 
if (iq->iq_mz) {
@@ -81,7 +87,8 @@ otx_ep_init_instr_queue(struct otx_ep_device *otx_ep, int 
iq_no, int num_descs,
 {
const struct otx_ep_config *conf;
struct otx_ep_instr_queue *iq;
-   uint32_t q_size;
+   struct otx_ep_sg_entry *sg;
+   uint32_t i, q_size;
int ret;
 
conf = otx_ep->conf;
@@ -121,6 +128,18 @@ otx_ep_init_instr_queue(struct otx_ep_device *otx_ep, int 
iq_no, int num_descs,
goto iq_init_fail;
}
 
+   for (i = 0; i < iq->nb_desc; i++) {
+   sg = rte_zmalloc_socket("sg_entry", (OTX_EP_MAX_SG_LISTS * 
OTX_EP_SG_ENTRY_SIZE),
+   OTX_EP_SG_ALIGN, rte_socket_id());
+   if (sg == NULL) {
+   otx_ep_err("IQ[%d] sg_entries alloc failed\n", iq_no);
+   goto iq_init_fail;
+   }
+
+   iq->req_list[i].finfo.g.num_sg = OTX_EP_MAX_SG_LISTS;
+   iq->req_l

[PATCH] maintainers: update for OCTEON EP and CNXK DMA

2023-06-13 Thread Vamsi Attunuru
Replace the maintainers for OcteonTX EP and cnxk DPI DMA drivers.

Signed-off-by: Vamsi Attunuru 
---
 MAINTAINERS | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 906b31f97c..827733a1aa 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -845,9 +845,7 @@ F: doc/guides/nics/mvneta.rst
 F: doc/guides/nics/features/mvneta.ini
 
 Marvell OCTEON TX EP - endpoint
-M: Radha Mohan Chintakuntla 
-M: Veerasenareddy Burru 
-M: Sathesh Edara 
+M: Vamsi Attunuru 
 T: git://dpdk.org/next/dpdk-next-net-mrvl
 F: drivers/net/octeon_ep/
 F: doc/guides/nics/features/octeon_ep.ini
@@ -1250,8 +1248,7 @@ F: drivers/dma/hisilicon/
 F: doc/guides/dmadevs/hisilicon.rst
 
 Marvell CNXK DPI DMA
-M: Radha Mohan Chintakuntla 
-M: Veerasenareddy Burru 
+M: Vamsi Attunuru 
 F: drivers/dma/cnxk/
 F: doc/guides/dmadevs/cnxk.rst
 
-- 
2.25.1



[PATCH] net/octeon_ep: support backward compatibility

2023-06-22 Thread Vamsi Attunuru
From: Sathesh Edara 

Add backward compatibility support between VF
and PF mailbox messages.

Signed-off-by: Sathesh Edara 
Signed-off-by: Vamsi Attunuru 
---
 drivers/net/octeon_ep/otx_ep_common.h |  3 +++
 drivers/net/octeon_ep/otx_ep_ethdev.c |  6 +
 drivers/net/octeon_ep/otx_ep_mbox.c   | 38 ++-
 drivers/net/octeon_ep/otx_ep_mbox.h   | 11 ++--
 4 files changed, 50 insertions(+), 8 deletions(-)

diff --git a/drivers/net/octeon_ep/otx_ep_common.h 
b/drivers/net/octeon_ep/otx_ep_common.h
index 42aa065a3a..c150cbe619 100644
--- a/drivers/net/octeon_ep/otx_ep_common.h
+++ b/drivers/net/octeon_ep/otx_ep_common.h
@@ -538,6 +538,9 @@ struct otx_ep_device {
 
/* Mailbox receive message length */
int32_t mbox_rcv_message_len;
+
+   /* Negotiated Mbox version */
+   uint32_t mbox_neg_ver;
 };
 
 int otx_ep_setup_iqs(struct otx_ep_device *otx_ep, uint32_t iq_no,
diff --git a/drivers/net/octeon_ep/otx_ep_ethdev.c 
b/drivers/net/octeon_ep/otx_ep_ethdev.c
index a9868909f8..57b965ad06 100644
--- a/drivers/net/octeon_ep/otx_ep_ethdev.c
+++ b/drivers/net/octeon_ep/otx_ep_ethdev.c
@@ -666,6 +666,12 @@ otx_ep_eth_dev_init(struct rte_eth_dev *eth_dev)
otx_epvf->port_id = eth_dev->data->port_id;
eth_dev->dev_ops = &otx_ep_eth_dev_ops;
rte_spinlock_init(&otx_epvf->mbox_lock);
+
+   /*
+* Initialize negotiated Mbox version to base version of VF Mbox
+* This will address working legacy PF with latest VF.
+*/
+   otx_epvf->mbox_neg_ver = OTX_EP_MBOX_VERSION_V1;
eth_dev->data->mac_addrs = rte_zmalloc("otx_ep", RTE_ETHER_ADDR_LEN, 0);
if (eth_dev->data->mac_addrs == NULL) {
otx_ep_err("MAC addresses memory allocation failed\n");
diff --git a/drivers/net/octeon_ep/otx_ep_mbox.c 
b/drivers/net/octeon_ep/otx_ep_mbox.c
index 1ad36e14c8..4118645dc7 100644
--- a/drivers/net/octeon_ep/otx_ep_mbox.c
+++ b/drivers/net/octeon_ep/otx_ep_mbox.c
@@ -12,6 +12,14 @@
 #include "cnxk_ep_vf.h"
 #include "otx_ep_mbox.h"
 
+/*
+ * When a new command is implemented, the below table should be updated
+ * with new command and it's version info.
+ */
+static uint32_t otx_ep_cmd_versions[OTX_EP_MBOX_CMD_MAX] = {
+   [0 ... OTX_EP_MBOX_CMD_DEV_REMOVE] = OTX_EP_MBOX_VERSION_V1
+};
+
 static int
 __otx_ep_send_mbox_cmd(struct otx_ep_device *otx_ep,
   union otx_ep_mbox_word cmd,
@@ -56,6 +64,12 @@ otx_ep_send_mbox_cmd(struct otx_ep_device *otx_ep,
int ret;
 
rte_spinlock_lock(&otx_ep->mbox_lock);
+   if (otx_ep_cmd_versions[cmd.s.opcode] > otx_ep->mbox_neg_ver) {
+   otx_ep_dbg("CMD:%d not supported in Version:%d\n", cmd.s.opcode,
+   otx_ep->mbox_neg_ver);
+   rte_spinlock_unlock(&otx_ep->mbox_lock);
+   return -EOPNOTSUPP;
+   }
ret = __otx_ep_send_mbox_cmd(otx_ep, cmd, rsp);
rte_spinlock_unlock(&otx_ep->mbox_lock);
return ret;
@@ -284,15 +298,27 @@ int otx_ep_mbox_version_check(struct rte_eth_dev *eth_dev)
 
cmd.u64 = 0;
cmd.s_version.opcode = OTX_EP_MBOX_CMD_VERSION;
-   cmd.s_version.version = OTX_EP_MBOX_VERSION;
+   cmd.s_version.version = OTX_EP_MBOX_VERSION_CURRENT;
ret = otx_ep_send_mbox_cmd(otx_ep, cmd, &rsp);
-   if (!ret)
-   return 0;
-   if (ret == OTX_EP_MBOX_CMD_STATUS_NACK) {
-   otx_ep_err("VF Mbox version:%u is not compatible with PF\n",
+
+   /*
+* VF receives NACK or version info as zero
+* only if PF driver running old version of Mailbox
+* In this case VF mailbox version fallbacks to base
+* mailbox vesrion OTX_EP_MBOX_VERSION_V1.
+* Default VF mbox_neg_ver is set to OTX_EP_MBOX_VERSION_V1
+* during initialization of PMD driver.
+*/
+   if (ret == OTX_EP_MBOX_CMD_STATUS_NACK || rsp.s_version.version == 0) {
+   otx_ep_dbg("VF Mbox version fallback to base version from:%u\n",
(uint32_t)cmd.s_version.version);
+   return 0;
}
-   return ret;
+   otx_ep->mbox_neg_ver = (uint32_t)rsp.s_version.version;
+   otx_ep_dbg("VF Mbox version:%u Negotiated VF version with PF:%u\n",
+   (uint32_t)cmd.s_version.version,
+   (uint32_t)rsp.s_version.version);
+   return 0;
 }
 
 int otx_ep_mbox_send_dev_exit(struct rte_eth_dev *eth_dev)
diff --git a/drivers/net/octeon_ep/otx_ep_mbox.h 
b/drivers/net/octeon_ep/otx_ep_mbox.h
index 9df3c53edd..a3fc15cca7 100644
--- a/drivers/net/octeon_ep/otx_ep_mbox.h
+++ b/drivers/net/octeon_ep/otx_ep_mbox.h
@@ -5,8 +5,15 @@
 #ifndef _OTX_EP_MBOX_H_
 #define _OTX_EP_MBOX_H_
 
+/*
+ * When a new command is implemented, VF Mbox version should be b

[PATCH 1/1] net/cnxk: add outb soft expiry notification support

2022-02-23 Thread Vamsi Attunuru
Patch implements soft expiry notification mechanism in out bound
path by creating required number of ring buffers and a common poll
thread which polls for soft expiry events enqueued by ucode.

Signed-off-by: Vamsi Attunuru 
---
 drivers/common/cnxk/roc_idev.c|  15 +++
 drivers/common/cnxk/roc_idev.h|   2 +
 drivers/common/cnxk/roc_ie_ot.h   |  25 +++-
 drivers/common/cnxk/roc_nix_inl.c |  71 +++
 drivers/common/cnxk/roc_nix_inl.h |  20 ++-
 drivers/common/cnxk/roc_nix_inl_dev.c | 146 +-
 drivers/common/cnxk/roc_nix_inl_dev_irq.c |   2 +-
 drivers/common/cnxk/roc_nix_inl_priv.h|  13 ++
 drivers/common/cnxk/roc_nix_priv.h|   2 +
 drivers/common/cnxk/roc_platform.h|   3 +
 drivers/common/cnxk/version.map   |   2 +
 drivers/net/cnxk/cn10k_ethdev_sec.c   |  52 +++-
 drivers/net/cnxk/cnxk_ethdev.c|   4 +
 drivers/net/cnxk/cnxk_ethdev_sec.c|   1 +
 14 files changed, 350 insertions(+), 8 deletions(-)

diff --git a/drivers/common/cnxk/roc_idev.c b/drivers/common/cnxk/roc_idev.c
index b1f38fb5fc..a08c7ce8fd 100644
--- a/drivers/common/cnxk/roc_idev.c
+++ b/drivers/common/cnxk/roc_idev.c
@@ -185,6 +185,21 @@ roc_idev_cpt_get(void)
return NULL;
 }
 
+uint64_t *
+roc_nix_inl_outb_ring_base_get(struct roc_nix *roc_nix)
+{
+   struct nix *nix = roc_nix_to_nix_priv(roc_nix);
+   struct idev_cfg *idev = idev_get_cfg();
+   struct nix_inl_dev *inl_dev;
+
+   if (!idev || !idev->nix_inl_dev)
+   return NULL;
+
+   inl_dev = idev->nix_inl_dev;
+
+   return (uint64_t *)&inl_dev->sa_soft_exp_ring[nix->outb_se_ring_base];
+}
+
 void
 roc_idev_cpt_set(struct roc_cpt *cpt)
 {
diff --git a/drivers/common/cnxk/roc_idev.h b/drivers/common/cnxk/roc_idev.h
index 16793c2828..7e0beed495 100644
--- a/drivers/common/cnxk/roc_idev.h
+++ b/drivers/common/cnxk/roc_idev.h
@@ -17,4 +17,6 @@ void __roc_api roc_idev_cpt_set(struct roc_cpt *cpt);
 
 struct roc_nix *__roc_api roc_idev_npa_nix_get(void);
 
+uint64_t *__roc_api roc_nix_inl_outb_ring_base_get(struct roc_nix *roc_nix);
+
 #endif /* _ROC_IDEV_H_ */
diff --git a/drivers/common/cnxk/roc_ie_ot.h b/drivers/common/cnxk/roc_ie_ot.h
index c502c7983f..537eafbe79 100644
--- a/drivers/common/cnxk/roc_ie_ot.h
+++ b/drivers/common/cnxk/roc_ie_ot.h
@@ -176,6 +176,28 @@ enum {
(PLT_ALIGN_CEIL(ROC_AR_WIN_SIZE_MAX, BITS_PER_LONG_LONG) / \
 BITS_PER_LONG_LONG)
 
+#define ROC_IPSEC_ERR_RING_MAX_ENTRY 65536
+
+union roc_ot_ipsec_err_ring_head {
+   uint64_t u64;
+   struct {
+   uint16_t tail_pos;
+   uint16_t tail_gen;
+   uint16_t head_pos;
+   uint16_t head_gen;
+   } s;
+};
+
+union roc_ot_ipsec_err_ring_entry {
+   uint64_t u64;
+   struct {
+   uint64_t data0 : 44;
+   uint64_t data1 : 9;
+   uint64_t rsvd : 3;
+   uint64_t comp_code : 8;
+   } s;
+};
+
 /* Common bit fields between inbound and outbound SA */
 union roc_ot_ipsec_sa_word2 {
struct {
@@ -428,7 +450,8 @@ struct roc_ot_ipsec_outb_sa {
uint64_t count_mib_pkts : 1;
uint64_t hw_ctx_off : 7;
 
-   uint64_t rsvd1 : 32;
+   uint64_t ctx_id : 16;
+   uint64_t rsvd1 : 16;
 
uint64_t ctx_push_size : 7;
uint64_t rsvd2 : 1;
diff --git a/drivers/common/cnxk/roc_nix_inl.c 
b/drivers/common/cnxk/roc_nix_inl.c
index e8981c4aa4..0d9e5dd5c4 100644
--- a/drivers/common/cnxk/roc_nix_inl.c
+++ b/drivers/common/cnxk/roc_nix_inl.c
@@ -5,6 +5,8 @@
 #include "roc_api.h"
 #include "roc_priv.h"
 
+uint32_t soft_exp_consumer_cnt;
+
 PLT_STATIC_ASSERT(ROC_NIX_INL_ONF_IPSEC_INB_SA_SZ ==
  1UL << ROC_NIX_INL_ONF_IPSEC_INB_SA_SZ_LOG2);
 PLT_STATIC_ASSERT(ROC_NIX_INL_ONF_IPSEC_INB_SA_SZ == 512);
@@ -394,6 +396,34 @@ roc_nix_inl_outb_init(struct roc_nix *roc_nix)
nix->nb_cpt_lf = nb_lf;
nix->outb_err_sso_pffunc = sso_pffunc;
nix->inl_outb_ena = true;
+   nix->outb_se_ring_cnt =
+   roc_nix->ipsec_out_max_sa / ROC_IPSEC_ERR_RING_MAX_ENTRY + 1;
+   nix->outb_se_ring_base =
+   roc_nix->port_id * ROC_NIX_SOFT_EXP_PER_PORT_MAX_RINGS;
+
+   if (inl_dev == NULL) {
+   nix->outb_se_ring_cnt = 0;
+   return 0;
+   }
+
+   /* Allocate memory to be used as a ring buffer to poll for
+* soft expiry event from ucode
+*/
+   for (i = 0; i < nix->outb_se_ring_cnt; i++) {
+   inl_dev->sa_soft_exp_ring[nix->outb_se_ring_base + i] =
+   plt_zmalloc((ROC_IPSEC_ERR_RING_MAX_ENTRY + 1) *
+   sizeof(uint64_t),
+ 

[PATCH v2 1/1] net/cnxk: support outbound soft expiry notification

2022-02-24 Thread Vamsi Attunuru
Patch implements soft expiry notification mechanism in outbound
path by creating required number of ring buffers and a common poll
thread which polls for soft expiry events enqueued by microcode.

Signed-off-by: Vamsi Attunuru 
---
v2: fix subject and commit message.
---
 drivers/common/cnxk/roc_idev.c|  15 +++
 drivers/common/cnxk/roc_idev.h|   2 +
 drivers/common/cnxk/roc_ie_ot.h   |  25 +++-
 drivers/common/cnxk/roc_nix_inl.c |  71 +++
 drivers/common/cnxk/roc_nix_inl.h |  20 ++-
 drivers/common/cnxk/roc_nix_inl_dev.c | 146 +-
 drivers/common/cnxk/roc_nix_inl_dev_irq.c |   2 +-
 drivers/common/cnxk/roc_nix_inl_priv.h|  13 ++
 drivers/common/cnxk/roc_nix_priv.h|   2 +
 drivers/common/cnxk/roc_platform.h|   3 +
 drivers/common/cnxk/version.map   |   2 +
 drivers/net/cnxk/cn10k_ethdev.h   |   3 +-
 drivers/net/cnxk/cn10k_ethdev_sec.c   |  52 +++-
 drivers/net/cnxk/cnxk_ethdev.c|   4 +
 drivers/net/cnxk/cnxk_ethdev_sec.c|   1 +
 15 files changed, 352 insertions(+), 9 deletions(-)

diff --git a/drivers/common/cnxk/roc_idev.c b/drivers/common/cnxk/roc_idev.c
index b1f38fb5fc..a08c7ce8fd 100644
--- a/drivers/common/cnxk/roc_idev.c
+++ b/drivers/common/cnxk/roc_idev.c
@@ -185,6 +185,21 @@ roc_idev_cpt_get(void)
return NULL;
 }
 
+uint64_t *
+roc_nix_inl_outb_ring_base_get(struct roc_nix *roc_nix)
+{
+   struct nix *nix = roc_nix_to_nix_priv(roc_nix);
+   struct idev_cfg *idev = idev_get_cfg();
+   struct nix_inl_dev *inl_dev;
+
+   if (!idev || !idev->nix_inl_dev)
+   return NULL;
+
+   inl_dev = idev->nix_inl_dev;
+
+   return (uint64_t *)&inl_dev->sa_soft_exp_ring[nix->outb_se_ring_base];
+}
+
 void
 roc_idev_cpt_set(struct roc_cpt *cpt)
 {
diff --git a/drivers/common/cnxk/roc_idev.h b/drivers/common/cnxk/roc_idev.h
index 16793c2828..7e0beed495 100644
--- a/drivers/common/cnxk/roc_idev.h
+++ b/drivers/common/cnxk/roc_idev.h
@@ -17,4 +17,6 @@ void __roc_api roc_idev_cpt_set(struct roc_cpt *cpt);
 
 struct roc_nix *__roc_api roc_idev_npa_nix_get(void);
 
+uint64_t *__roc_api roc_nix_inl_outb_ring_base_get(struct roc_nix *roc_nix);
+
 #endif /* _ROC_IDEV_H_ */
diff --git a/drivers/common/cnxk/roc_ie_ot.h b/drivers/common/cnxk/roc_ie_ot.h
index 202e2d2f39..173cc2c667 100644
--- a/drivers/common/cnxk/roc_ie_ot.h
+++ b/drivers/common/cnxk/roc_ie_ot.h
@@ -176,6 +176,28 @@ enum {
(PLT_ALIGN_CEIL(ROC_AR_WIN_SIZE_MAX, BITS_PER_LONG_LONG) / \
 BITS_PER_LONG_LONG)
 
+#define ROC_IPSEC_ERR_RING_MAX_ENTRY 65536
+
+union roc_ot_ipsec_err_ring_head {
+   uint64_t u64;
+   struct {
+   uint16_t tail_pos;
+   uint16_t tail_gen;
+   uint16_t head_pos;
+   uint16_t head_gen;
+   } s;
+};
+
+union roc_ot_ipsec_err_ring_entry {
+   uint64_t u64;
+   struct {
+   uint64_t data0 : 44;
+   uint64_t data1 : 9;
+   uint64_t rsvd : 3;
+   uint64_t comp_code : 8;
+   } s;
+};
+
 /* Common bit fields between inbound and outbound SA */
 union roc_ot_ipsec_sa_word2 {
struct {
@@ -429,7 +451,8 @@ struct roc_ot_ipsec_outb_sa {
uint64_t count_mib_pkts : 1;
uint64_t hw_ctx_off : 7;
 
-   uint64_t rsvd1 : 32;
+   uint64_t ctx_id : 16;
+   uint64_t rsvd1 : 16;
 
uint64_t ctx_push_size : 7;
uint64_t rsvd2 : 1;
diff --git a/drivers/common/cnxk/roc_nix_inl.c 
b/drivers/common/cnxk/roc_nix_inl.c
index 7bf89a44c1..fb08d3deb2 100644
--- a/drivers/common/cnxk/roc_nix_inl.c
+++ b/drivers/common/cnxk/roc_nix_inl.c
@@ -5,6 +5,8 @@
 #include "roc_api.h"
 #include "roc_priv.h"
 
+uint32_t soft_exp_consumer_cnt;
+
 PLT_STATIC_ASSERT(ROC_NIX_INL_ONF_IPSEC_INB_SA_SZ ==
  1UL << ROC_NIX_INL_ONF_IPSEC_INB_SA_SZ_LOG2);
 PLT_STATIC_ASSERT(ROC_NIX_INL_ONF_IPSEC_INB_SA_SZ == 512);
@@ -430,6 +432,34 @@ roc_nix_inl_outb_init(struct roc_nix *roc_nix)
nix->nb_cpt_lf = nb_lf;
nix->outb_err_sso_pffunc = sso_pffunc;
nix->inl_outb_ena = true;
+   nix->outb_se_ring_cnt =
+   roc_nix->ipsec_out_max_sa / ROC_IPSEC_ERR_RING_MAX_ENTRY + 1;
+   nix->outb_se_ring_base =
+   roc_nix->port_id * ROC_NIX_SOFT_EXP_PER_PORT_MAX_RINGS;
+
+   if (inl_dev == NULL) {
+   nix->outb_se_ring_cnt = 0;
+   return 0;
+   }
+
+   /* Allocate memory to be used as a ring buffer to poll for
+* soft expiry event from ucode
+*/
+   for (i = 0; i < nix->outb_se_ring_cnt; i++) {
+   inl_dev->sa_soft_exp_ring[nix->outb_se_ring_base + i] =
+   plt_zmalloc((ROC_IPSEC_ERR_RIN

[PATCH 1/1] net/cnxk: make inline inbound device usage default

2022-02-24 Thread Vamsi Attunuru
Currently inline inbound device usage is not default for eventdev,
patch renames force_inl_dev dev arg to no_inl_dev and enables inline
inbound device by default.

Signed-off-by: Vamsi Attunuru 
---
 doc/guides/nics/cnxk.rst | 10 +-
 drivers/event/cnxk/cnxk_eventdev_adptr.c |  4 ++--
 drivers/net/cnxk/cn9k_ethdev.c   |  1 +
 drivers/net/cnxk/cnxk_ethdev.h   |  4 ++--
 drivers/net/cnxk/cnxk_ethdev_devargs.c   | 11 +--
 5 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/doc/guides/nics/cnxk.rst b/doc/guides/nics/cnxk.rst
index be51ca2146..31c801fa04 100644
--- a/doc/guides/nics/cnxk.rst
+++ b/doc/guides/nics/cnxk.rst
@@ -275,7 +275,7 @@ Runtime Config Options
With the above configuration, two CPT LF's are setup and distributed among
all the Tx queues for outbound processing.
 
-- ``Force using inline ipsec device for inbound`` (default ``0``)
+- ``Disable using inline ipsec device for inbound`` (default ``0``)
 
In CN10K, in event mode, driver can work in two modes,
 
@@ -285,13 +285,13 @@ Runtime Config Options
2. Both Inbound encrypted traffic and plain traffic post decryption are
   received by ethdev.
 
-   By default event mode works without using inline device i.e mode ``2``.
-   This behaviour can be changed to pick mode ``1`` by using
-   ``force_inb_inl_dev`` ``devargs`` parameter.
+   By default event mode works using inline device i.e mode ``1``.
+   This behaviour can be changed to pick mode ``2`` by using
+   ``no_inl_dev`` ``devargs`` parameter.
 
For example::
 
-  -a 0002:02:00.0,force_inb_inl_dev=1 -a 0002:03:00.0,force_inb_inl_dev=1
+  -a 0002:02:00.0,no_inl_dev=1 -a 0002:03:00.0,no_inl_dev=1
 
With the above configuration, inbound encrypted traffic from both the ports
is received by ipsec inline device.
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c 
b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index 5ebd3340e7..42ac14064d 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -263,9 +263,9 @@ cnxk_sso_rx_adapter_queue_add(
 
/* Switch to use PF/VF's NIX LF instead of inline device for inbound
 * when all the RQ's are switched to event dev mode. We do this only
-* when using inline device is not forced by dev args.
+* when dev arg no_inl_dev=1 is selected.
 */
-   if (!cnxk_eth_dev->inb.force_inl_dev &&
+   if (cnxk_eth_dev->inb.no_inl_dev &&
cnxk_eth_dev->nb_rxq_sso == cnxk_eth_dev->nb_rxq)
cnxk_nix_inb_mode_set(cnxk_eth_dev, false);
 
diff --git a/drivers/net/cnxk/cn9k_ethdev.c b/drivers/net/cnxk/cn9k_ethdev.c
index 6b049b2897..ae42d76d6d 100644
--- a/drivers/net/cnxk/cn9k_ethdev.c
+++ b/drivers/net/cnxk/cn9k_ethdev.c
@@ -594,6 +594,7 @@ cn9k_nix_probe(struct rte_pci_driver *pci_drv, struct 
rte_pci_device *pci_dev)
}
 
dev->hwcap = 0;
+   dev->inb.no_inl_dev = 1;
 
/* Register up msg callbacks for PTP information */
roc_nix_ptp_info_cb_register(&dev->nix, cn9k_nix_ptp_info_update_cb);
diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h
index 445b7abf69..9a9d3baf25 100644
--- a/drivers/net/cnxk/cnxk_ethdev.h
+++ b/drivers/net/cnxk/cnxk_ethdev.h
@@ -272,8 +272,8 @@ struct cnxk_eth_dev_sec_inb {
/* Using inbound with inline device */
bool inl_dev;
 
-   /* Device argument to force inline device for inb */
-   bool force_inl_dev;
+   /* Device argument to disable inline device usage for inb */
+   bool no_inl_dev;
 
/* Active sessions */
uint16_t nb_sess;
diff --git a/drivers/net/cnxk/cnxk_ethdev_devargs.c 
b/drivers/net/cnxk/cnxk_ethdev_devargs.c
index 8a71644899..9b2beb6743 100644
--- a/drivers/net/cnxk/cnxk_ethdev_devargs.c
+++ b/drivers/net/cnxk/cnxk_ethdev_devargs.c
@@ -241,7 +241,7 @@ parse_sdp_channel_mask(const char *key, const char *value, 
void *extra_args)
 #define CNXK_IPSEC_IN_MAX_SPI  "ipsec_in_max_spi"
 #define CNXK_IPSEC_OUT_MAX_SA  "ipsec_out_max_sa"
 #define CNXK_OUTB_NB_DESC  "outb_nb_desc"
-#define CNXK_FORCE_INB_INL_DEV "force_inb_inl_dev"
+#define CNXK_NO_INL_DEV"no_inl_dev"
 #define CNXK_OUTB_NB_CRYPTO_QS "outb_nb_crypto_qs"
 #define CNXK_SDP_CHANNEL_MASK  "sdp_channel_mask"
 #define CNXK_FLOW_PRE_L2_INFO  "flow_pre_l2_info"
@@ -257,7 +257,6 @@ cnxk_ethdev_parse_devargs(struct rte_devargs *devargs, 
struct cnxk_eth_dev *dev)
uint16_t flow_prealloc_size = 1;
uint16_t switch_header_type = 0;
uint16_t flow_max_priority = 3;
-   uint16_t force_inb_inl_dev = 0;
uint16_t outb_nb_crypto_qs = 1;
uint32_t ipsec_in_min_spi = 0;
uint16_t outb_nb_desc = 8200;
@@ -266,6 +265,7 @@ cnxk_ethdev_parse_devargs(struct rte_devargs *de

[PATCH 1/1] common/cnxk: fix static assertion failure

2022-03-02 Thread Vamsi Attunuru
use dynamically allocated memory for storing soft expiry
ring base addresses which fixes the static assertion failure,
size of this dynamic allocation depends on RTE_MAX_ETHPORTS
which varies based on the build config.

Fixes: d26185716d3f ("net/cnxk: support outbound soft expiry
notification")

Signed-off-by: Vamsi Attunuru 
Signed-off-by: Srikanth Yalavarthi 
---
 drivers/common/cnxk/roc_nix_inl.c  | 23 +--
 drivers/common/cnxk/roc_nix_inl.h  |  2 +-
 drivers/common/cnxk/roc_nix_inl_dev.c  | 11 ++-
 drivers/common/cnxk/roc_nix_inl_priv.h |  2 +-
 drivers/common/cnxk/roc_platform.h | 16 
 5 files changed, 41 insertions(+), 13 deletions(-)

diff --git a/drivers/common/cnxk/roc_nix_inl.c 
b/drivers/common/cnxk/roc_nix_inl.c
index 11ed157703..826c6e99c1 100644
--- a/drivers/common/cnxk/roc_nix_inl.c
+++ b/drivers/common/cnxk/roc_nix_inl.c
@@ -330,12 +330,13 @@ roc_nix_inl_outb_init(struct roc_nix *roc_nix)
struct dev *dev = &nix->dev;
struct msix_offset_rsp *rsp;
struct nix_inl_dev *inl_dev;
+   size_t sa_sz, ring_sz;
uint16_t sso_pffunc;
uint8_t eng_grpmask;
uint64_t blkaddr, i;
+   uint64_t *ring_base;
uint16_t nb_lf;
void *sa_base;
-   size_t sa_sz;
int j, rc;
void *sa;
 
@@ -468,16 +469,16 @@ roc_nix_inl_outb_init(struct roc_nix *roc_nix)
/* Allocate memory to be used as a ring buffer to poll for
 * soft expiry event from ucode
 */
+   ring_sz = (ROC_IPSEC_ERR_RING_MAX_ENTRY + 1) * sizeof(uint64_t);
+   ring_base = inl_dev->sa_soft_exp_ring;
for (i = 0; i < nix->outb_se_ring_cnt; i++) {
-   inl_dev->sa_soft_exp_ring[nix->outb_se_ring_base + i] =
-   plt_zmalloc((ROC_IPSEC_ERR_RING_MAX_ENTRY + 1) *
-   sizeof(uint64_t),
-   0);
-   if (!inl_dev->sa_soft_exp_ring[i]) {
+   ring_base[nix->outb_se_ring_base + i] =
+   PLT_U64_CAST(plt_zmalloc(ring_sz, 0));
+   if (!ring_base[nix->outb_se_ring_base + i]) {
plt_err("Couldn't allocate memory for soft exp ring");
while (i--)
-   plt_free(inl_dev->sa_soft_exp_ring
-[nix->outb_se_ring_base + i]);
+   plt_free(PLT_PTR_CAST(
+   ring_base[nix->outb_se_ring_base + i]));
rc = -ENOMEM;
goto lf_fini;
}
@@ -504,6 +505,7 @@ roc_nix_inl_outb_fini(struct roc_nix *roc_nix)
struct idev_cfg *idev = idev_get_cfg();
struct dev *dev = &nix->dev;
struct nix_inl_dev *inl_dev;
+   uint64_t *ring_base;
int i, rc, ret = 0;
 
if (!nix->inl_outb_ena)
@@ -537,10 +539,11 @@ roc_nix_inl_outb_fini(struct roc_nix *roc_nix)
 
if (idev && idev->nix_inl_dev) {
inl_dev = idev->nix_inl_dev;
+   ring_base = inl_dev->sa_soft_exp_ring;
 
for (i = 0; i < ROC_NIX_INL_MAX_SOFT_EXP_RNGS; i++) {
-   if (inl_dev->sa_soft_exp_ring[i])
-   plt_free(inl_dev->sa_soft_exp_ring[i]);
+   if (ring_base[i])
+   plt_free(PLT_PTR_CAST(ring_base[i]));
}
}
 
diff --git a/drivers/common/cnxk/roc_nix_inl.h 
b/drivers/common/cnxk/roc_nix_inl.h
index 1dc58f2da2..2c2a4d76f2 100644
--- a/drivers/common/cnxk/roc_nix_inl.h
+++ b/drivers/common/cnxk/roc_nix_inl.h
@@ -137,7 +137,7 @@ struct roc_nix_inl_dev {
bool set_soft_exp_poll;
/* End of input parameters */
 
-#define ROC_NIX_INL_MEM_SZ (2304)
+#define ROC_NIX_INL_MEM_SZ (1280)
uint8_t reserved[ROC_NIX_INL_MEM_SZ] __plt_cache_aligned;
 } __plt_cache_aligned;
 
diff --git a/drivers/common/cnxk/roc_nix_inl_dev.c 
b/drivers/common/cnxk/roc_nix_inl_dev.c
index 1cfcdba3f2..5a032aab52 100644
--- a/drivers/common/cnxk/roc_nix_inl_dev.c
+++ b/drivers/common/cnxk/roc_nix_inl_dev.c
@@ -653,7 +653,7 @@ inl_outb_soft_exp_poll(struct nix_inl_dev *inl_dev, 
uint32_t ring_idx)
uint32_t port_id;
 
port_id = ring_idx / ROC_NIX_SOFT_EXP_PER_PORT_MAX_RINGS;
-   ring_base = inl_dev->sa_soft_exp_ring[ring_idx];
+   ring_base = PLT_PTR_CAST(inl_dev->sa_soft_exp_ring[ring_idx]);
if (!ring_base) {
plt_err("Invalid soft exp ring base");
return;
@@ -751,6 +751,14 @@ nix_inl_outb_poll_thread_setup(struct nix_inl_dev *inl_dev)
 
inl_dev->soft_exp_ring_bmap_mem = mem;
inl_dev->soft_exp_ring_bmap = bmap;
+   inl_dev->sa_soft_exp_ring = plt_zmalloc(
+   ROC_NI

[PATCH v2 1/1] common/cnxk: fix static assertion failure

2022-03-02 Thread Vamsi Attunuru
Use dynamically allocated memory for storing soft expiry
ring base addresses which fixes the static assertion failure,
as the size of dynamic allocation depends on RTE_MAX_ETHPORTS
which varies based on the build config.

Bugzilla ID: 940
Fixes: d26185716d3f ("net/cnxk: support outbound soft expiry notification")
Cc: sta...@dpdk.org

Reported-by: Wei Ling 
Reported-by: Yu Jiang 
Signed-off-by: Vamsi Attunuru 
Signed-off-by: Srikanth Yalavarthi 
---
V2: Add bugzilla & reportee details, remove unused changes.
---
 drivers/common/cnxk/roc_nix_inl.c  | 23 +--
 drivers/common/cnxk/roc_nix_inl.h  |  2 +-
 drivers/common/cnxk/roc_nix_inl_dev.c  | 11 ++-
 drivers/common/cnxk/roc_nix_inl_priv.h |  2 +-
 drivers/common/cnxk/roc_platform.h |  7 +++
 5 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/drivers/common/cnxk/roc_nix_inl.c 
b/drivers/common/cnxk/roc_nix_inl.c
index 11ed157703..826c6e99c1 100644
--- a/drivers/common/cnxk/roc_nix_inl.c
+++ b/drivers/common/cnxk/roc_nix_inl.c
@@ -330,12 +330,13 @@ roc_nix_inl_outb_init(struct roc_nix *roc_nix)
struct dev *dev = &nix->dev;
struct msix_offset_rsp *rsp;
struct nix_inl_dev *inl_dev;
+   size_t sa_sz, ring_sz;
uint16_t sso_pffunc;
uint8_t eng_grpmask;
uint64_t blkaddr, i;
+   uint64_t *ring_base;
uint16_t nb_lf;
void *sa_base;
-   size_t sa_sz;
int j, rc;
void *sa;
 
@@ -468,16 +469,16 @@ roc_nix_inl_outb_init(struct roc_nix *roc_nix)
/* Allocate memory to be used as a ring buffer to poll for
 * soft expiry event from ucode
 */
+   ring_sz = (ROC_IPSEC_ERR_RING_MAX_ENTRY + 1) * sizeof(uint64_t);
+   ring_base = inl_dev->sa_soft_exp_ring;
for (i = 0; i < nix->outb_se_ring_cnt; i++) {
-   inl_dev->sa_soft_exp_ring[nix->outb_se_ring_base + i] =
-   plt_zmalloc((ROC_IPSEC_ERR_RING_MAX_ENTRY + 1) *
-   sizeof(uint64_t),
-   0);
-   if (!inl_dev->sa_soft_exp_ring[i]) {
+   ring_base[nix->outb_se_ring_base + i] =
+   PLT_U64_CAST(plt_zmalloc(ring_sz, 0));
+   if (!ring_base[nix->outb_se_ring_base + i]) {
plt_err("Couldn't allocate memory for soft exp ring");
while (i--)
-   plt_free(inl_dev->sa_soft_exp_ring
-[nix->outb_se_ring_base + i]);
+   plt_free(PLT_PTR_CAST(
+   ring_base[nix->outb_se_ring_base + i]));
rc = -ENOMEM;
goto lf_fini;
}
@@ -504,6 +505,7 @@ roc_nix_inl_outb_fini(struct roc_nix *roc_nix)
struct idev_cfg *idev = idev_get_cfg();
struct dev *dev = &nix->dev;
struct nix_inl_dev *inl_dev;
+   uint64_t *ring_base;
int i, rc, ret = 0;
 
if (!nix->inl_outb_ena)
@@ -537,10 +539,11 @@ roc_nix_inl_outb_fini(struct roc_nix *roc_nix)
 
if (idev && idev->nix_inl_dev) {
inl_dev = idev->nix_inl_dev;
+   ring_base = inl_dev->sa_soft_exp_ring;
 
for (i = 0; i < ROC_NIX_INL_MAX_SOFT_EXP_RNGS; i++) {
-   if (inl_dev->sa_soft_exp_ring[i])
-   plt_free(inl_dev->sa_soft_exp_ring[i]);
+   if (ring_base[i])
+   plt_free(PLT_PTR_CAST(ring_base[i]));
}
}
 
diff --git a/drivers/common/cnxk/roc_nix_inl.h 
b/drivers/common/cnxk/roc_nix_inl.h
index 1dc58f2da2..2c2a4d76f2 100644
--- a/drivers/common/cnxk/roc_nix_inl.h
+++ b/drivers/common/cnxk/roc_nix_inl.h
@@ -137,7 +137,7 @@ struct roc_nix_inl_dev {
bool set_soft_exp_poll;
/* End of input parameters */
 
-#define ROC_NIX_INL_MEM_SZ (2304)
+#define ROC_NIX_INL_MEM_SZ (1280)
uint8_t reserved[ROC_NIX_INL_MEM_SZ] __plt_cache_aligned;
 } __plt_cache_aligned;
 
diff --git a/drivers/common/cnxk/roc_nix_inl_dev.c 
b/drivers/common/cnxk/roc_nix_inl_dev.c
index 1cfcdba3f2..5a032aab52 100644
--- a/drivers/common/cnxk/roc_nix_inl_dev.c
+++ b/drivers/common/cnxk/roc_nix_inl_dev.c
@@ -653,7 +653,7 @@ inl_outb_soft_exp_poll(struct nix_inl_dev *inl_dev, 
uint32_t ring_idx)
uint32_t port_id;
 
port_id = ring_idx / ROC_NIX_SOFT_EXP_PER_PORT_MAX_RINGS;
-   ring_base = inl_dev->sa_soft_exp_ring[ring_idx];
+   ring_base = PLT_PTR_CAST(inl_dev->sa_soft_exp_ring[ring_idx]);
if (!ring_base) {
plt_err("Invalid soft exp ring base");
return;
@@ -751,6 +751,14 @@ nix_inl_outb_poll_thread_setup(struct nix_inl_dev *inl_dev)
 
inl_dev-&

[PATCH 1/1] ethdev: add packet expiry event subtype

2022-03-17 Thread Vamsi Attunuru
Patch adds a new event subtype for notifying expiry
event upon soft packet limit expiry.

Signed-off-by: Vamsi Attunuru 
---
 lib/ethdev/rte_ethdev.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index 04cff8ee10..07d1f02bae 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -3828,6 +3828,8 @@ enum rte_eth_event_ipsec_subtype {
RTE_ETH_EVENT_IPSEC_SA_TIME_EXPIRY,
/** Soft byte expiry of SA */
RTE_ETH_EVENT_IPSEC_SA_BYTE_EXPIRY,
+   /** Soft packet expiry of SA */
+   RTE_ETH_EVENT_IPSEC_SA_PKT_EXPIRY,
/** Max value of this enum */
RTE_ETH_EVENT_IPSEC_MAX
 };
@@ -3849,6 +3851,7 @@ struct rte_eth_event_ipsec_desc {
 * - @ref RTE_ETH_EVENT_IPSEC_ESN_OVERFLOW
 * - @ref RTE_ETH_EVENT_IPSEC_SA_TIME_EXPIRY
 * - @ref RTE_ETH_EVENT_IPSEC_SA_BYTE_EXPIRY
+* - @ref RTE_ETH_EVENT_IPSEC_SA_PKT_EXPIRY
 *
 * @see struct rte_security_session_conf
 *
-- 
2.25.1



[dpdk-dev] [PATCH v1 1/1] app/test: fix --socket-mem option in eal flag autotest

2019-04-10 Thread Vamsi Attunuru
"argv2[]" positive test case fails with RTE_MAX_NUMA_NODES=1 config
because of "--socket-mem=0,0,0,0" option, which passes memory sizes
for multiple sockets. This patch fixes the issue by passing memory
size for node 0 alone.

Signed-off-by: Vamsi Attunuru 
---
 app/test/test_eal_flags.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/test/test_eal_flags.c b/app/test/test_eal_flags.c
index 9112c96..90df7b3 100644
--- a/app/test/test_eal_flags.c
+++ b/app/test/test_eal_flags.c
@@ -1161,7 +1161,7 @@ test_memory_flags(void)
 
/* valid (zero) --socket-mem flag */
const char *argv2[] = {prgname, "-c", "10", "-n", "2",
-   "--file-prefix=" memtest, "--socket-mem=0,0,0,0"};
+   "--file-prefix=" memtest, "--socket-mem=0"};
 
/* invalid (incomplete) --socket-mem flag */
const char *argv3[] = {prgname, "-c", "10", "-n", "2",
-- 
2.8.4