[dpdk-dev] [PATCH v3 0/4] support async dequeue for split ring

2021-09-28 Thread Wenwu Ma
This patch implements asynchronous dequeue data path for split ring.
A new asynchronous dequeue function is introduced. With this function,
the application can try to receive packets from the guest with offloading
copies to the DMA engine, thus saving precious CPU cycles.

v3:
- Update release note.
- Update function comments.

v2:
- Removed struct async_nethdr in 1/4.
- Removed a useless function declaration in 2/4,
  and fixed some coding style in 4/4.

Wenwu Ma (3):
  examples/vhost: refactor vhost enqueue and dequeue datapaths
  examples/vhost: use a new API to query remaining ring space
  examples/vhost: support vhost async dequeue data path

Yuan Wang (1):
  vhost: support async dequeue for split ring

 doc/guides/prog_guide/vhost_lib.rst|   9 +
 doc/guides/rel_notes/release_21_11.rst |   3 +
 doc/guides/sample_app_ug/vhost.rst |   9 +-
 examples/vhost/ioat.c  |  67 +++-
 examples/vhost/ioat.h  |  25 ++
 examples/vhost/main.c  | 269 -
 examples/vhost/main.h  |  34 +-
 examples/vhost/virtio_net.c|  16 +-
 lib/vhost/rte_vhost_async.h|  34 +-
 lib/vhost/version.map  |   3 +
 lib/vhost/vhost.h  |   3 +-
 lib/vhost/virtio_net.c | 530 +
 12 files changed, 881 insertions(+), 121 deletions(-)

-- 
2.25.1



[dpdk-dev] [PATCH v3 1/4] vhost: support async dequeue for split ring

2021-09-28 Thread Wenwu Ma
From: Yuan Wang 

This patch implements asynchronous dequeue data path for split ring.
A new asynchronous dequeue function is introduced. With this function,
the application can try to receive packets from the guest with
offloading copies to the async channel, thus saving precious CPU
cycles.

Signed-off-by: Yuan Wang 
Signed-off-by: Jiayu Hu 
Signed-off-by: Wenwu Ma 
Tested-by: Yinan Wang 
Tested-by: Yvonne Yang 
---
 doc/guides/prog_guide/vhost_lib.rst|   9 +
 doc/guides/rel_notes/release_21_11.rst |   3 +
 lib/vhost/rte_vhost_async.h|  34 +-
 lib/vhost/version.map  |   3 +
 lib/vhost/vhost.h  |   3 +-
 lib/vhost/virtio_net.c | 530 +
 6 files changed, 579 insertions(+), 3 deletions(-)

diff --git a/doc/guides/prog_guide/vhost_lib.rst 
b/doc/guides/prog_guide/vhost_lib.rst
index 171e0096f6..9ed544db7a 100644
--- a/doc/guides/prog_guide/vhost_lib.rst
+++ b/doc/guides/prog_guide/vhost_lib.rst
@@ -303,6 +303,15 @@ The following is an overview of some key Vhost API 
functions:
   Clear inflight packets which are submitted to DMA engine in vhost async data
   path. Completed packets are returned to applications through ``pkts``.
 
+* ``rte_vhost_async_try_dequeue_burst(vid, queue_id, mbuf_pool, pkts, count, 
nr_inflight)``
+
+  This function tries to receive packets from the guest with offloading
+  copies to the async channel. The packets that are transfer completed
+  are returned in ``pkts``. The other packets that their copies are submitted
+  to the async channel but not completed are called "in-flight packets".
+  This function will not return in-flight packets until their copies are
+  completed by the async channel.
+
 Vhost-user Implementations
 --
 
diff --git a/doc/guides/rel_notes/release_21_11.rst 
b/doc/guides/rel_notes/release_21_11.rst
index ad7c1afec0..79e4297ff9 100644
--- a/doc/guides/rel_notes/release_21_11.rst
+++ b/doc/guides/rel_notes/release_21_11.rst
@@ -91,6 +91,9 @@ New Features
   Added command-line options to specify total number of processes and
   current process ID. Each process owns subset of Rx and Tx queues.
 
+* **Added support for vhost async splited ring data path.**
+
+  Added async dequeue support for splited ring in vhost async data path.
 
 Removed Items
 -
diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h
index ad71555a7f..703c81753a 100644
--- a/lib/vhost/rte_vhost_async.h
+++ b/lib/vhost/rte_vhost_async.h
@@ -84,11 +84,12 @@ struct rte_vhost_async_channel_ops {
 };
 
 /**
- * inflight async packet information
+ * in-flight async packet information
  */
 struct async_inflight_info {
struct rte_mbuf *mbuf;
-   uint16_t descs; /* num of descs inflight */
+   struct virtio_net_hdr nethdr;
+   uint16_t descs; /* num of descs in-flight */
uint16_t nr_buffers; /* num of buffers inflight for packed ring */
 };
 
@@ -256,4 +257,33 @@ __rte_experimental
 uint16_t rte_vhost_clear_queue_thread_unsafe(int vid, uint16_t queue_id,
struct rte_mbuf **pkts, uint16_t count);
 
+/**
+ * This function tries to receive packets from the guest with offloading
+ * copies to the async channel. The packets that are transfer completed
+ * are returned in "pkts". The other packets that their copies are submitted to
+ * the async channel but not completed are called "in-flight packets".
+ * This function will not return in-flight packets until their copies are
+ * completed by the async channel.
+ *
+ * @param vid
+ *  ID of vhost device to dequeue data
+ * @param queue_id
+ *  ID of virtqueue to dequeue data
+ * @param mbuf_pool
+ *  Mbuf_pool where host mbuf is allocated.
+ * @param pkts
+ *  Blank array to keep successfully dequeued packets
+ * @param count
+ *  Size of the packet array
+ * @param nr_inflight
+ *  The amount of in-flight packets. If error occurred, its value is set to -1.
+ * @return
+ *  Num of successfully dequeued packets
+ */
+__rte_experimental
+uint16_t
+rte_vhost_async_try_dequeue_burst(int vid, uint16_t queue_id,
+   struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count,
+   int *nr_inflight);
+
 #endif /* _RTE_VHOST_ASYNC_H_ */
diff --git a/lib/vhost/version.map b/lib/vhost/version.map
index 8ebde3f694..8eb7e92c32 100644
--- a/lib/vhost/version.map
+++ b/lib/vhost/version.map
@@ -85,4 +85,7 @@ EXPERIMENTAL {
rte_vhost_async_channel_register_thread_unsafe;
rte_vhost_async_channel_unregister_thread_unsafe;
rte_vhost_clear_queue_thread_unsafe;
+
+   # added in 21.11
+   rte_vhost_async_try_dequeue_burst;
 };
diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
index 1e56311725..89a31e4ca8 100644
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -49,7 +49,8 @@
 #define MAX_PKT_BURST 32
 
 #define VHOST_MAX_ASYNC_IT (MAX_PKT_BURST * 2)
-#define VHOST_MAX_ASYNC_VEC (BUF_VECTOR_MAX * 4)
+#define MAX_ASYNC_COPY_VECTOR 1024
+#define

[dpdk-dev] [PATCH v3 2/4] examples/vhost: refactor vhost enqueue and dequeue datapaths

2021-09-28 Thread Wenwu Ma
Previously, by judging the flag, we call different enqueue/dequeue
functions in data path.

Now, we use an ops that was initialized when Vhost was created,
so that we can call ops directly in Vhost data path without any more
flag judgment.

Signed-off-by: Wenwu Ma 
Reviewed-by: Maxime Coquelin 
Tested-by: Yvonne Yang 
---
 examples/vhost/main.c   | 100 +---
 examples/vhost/main.h   |  28 --
 examples/vhost/virtio_net.c |  16 +-
 3 files changed, 98 insertions(+), 46 deletions(-)

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index d0bf1f31e3..254f7097bc 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -106,6 +106,8 @@ static uint32_t burst_rx_retry_num = BURST_RX_RETRIES;
 static char *socket_files;
 static int nb_sockets;
 
+static struct vhost_queue_ops vdev_queue_ops[MAX_VHOST_DEVICE];
+
 /* empty vmdq configuration structure. Filled in programatically */
 static struct rte_eth_conf vmdq_conf_default = {
.rxmode = {
@@ -879,22 +881,8 @@ drain_vhost(struct vhost_dev *vdev)
uint16_t nr_xmit = vhost_txbuff[buff_idx]->len;
struct rte_mbuf **m = vhost_txbuff[buff_idx]->m_table;
 
-   if (builtin_net_driver) {
-   ret = vs_enqueue_pkts(vdev, VIRTIO_RXQ, m, nr_xmit);
-   } else if (async_vhost_driver) {
-   uint16_t enqueue_fail = 0;
-
-   complete_async_pkts(vdev);
-   ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ, m, 
nr_xmit);
-   __atomic_add_fetch(&vdev->pkts_inflight, ret, __ATOMIC_SEQ_CST);
-
-   enqueue_fail = nr_xmit - ret;
-   if (enqueue_fail)
-   free_pkts(&m[ret], nr_xmit - ret);
-   } else {
-   ret = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
-   m, nr_xmit);
-   }
+   ret = vdev_queue_ops[vdev->vid].enqueue_pkt_burst(vdev,
+   VIRTIO_RXQ, m, nr_xmit);
 
if (enable_stats) {
__atomic_add_fetch(&vdev->stats.rx_total_atomic, nr_xmit,
@@ -1173,6 +1161,33 @@ drain_mbuf_table(struct mbuf_table *tx_q)
}
 }
 
+uint16_t
+async_enqueue_pkts(struct vhost_dev *vdev, uint16_t queue_id,
+   struct rte_mbuf **pkts, uint32_t rx_count)
+{
+   uint16_t enqueue_count;
+   uint16_t enqueue_fail = 0;
+
+   complete_async_pkts(vdev);
+   enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
+   queue_id, pkts, rx_count);
+   __atomic_add_fetch(&vdev->pkts_inflight, enqueue_count,
+   __ATOMIC_SEQ_CST);
+
+   enqueue_fail = rx_count - enqueue_count;
+   if (enqueue_fail)
+   free_pkts(&pkts[enqueue_count], enqueue_fail);
+
+   return enqueue_count;
+}
+
+uint16_t
+sync_enqueue_pkts(struct vhost_dev *vdev, uint16_t queue_id,
+   struct rte_mbuf **pkts, uint32_t rx_count)
+{
+   return rte_vhost_enqueue_burst(vdev->vid, queue_id, pkts, rx_count);
+}
+
 static __rte_always_inline void
 drain_eth_rx(struct vhost_dev *vdev)
 {
@@ -1203,25 +1218,8 @@ drain_eth_rx(struct vhost_dev *vdev)
}
}
 
-   if (builtin_net_driver) {
-   enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,
-   pkts, rx_count);
-   } else if (async_vhost_driver) {
-   uint16_t enqueue_fail = 0;
-
-   complete_async_pkts(vdev);
-   enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
-   VIRTIO_RXQ, pkts, rx_count);
-   __atomic_add_fetch(&vdev->pkts_inflight, enqueue_count, 
__ATOMIC_SEQ_CST);
-
-   enqueue_fail = rx_count - enqueue_count;
-   if (enqueue_fail)
-   free_pkts(&pkts[enqueue_count], enqueue_fail);
-
-   } else {
-   enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
-   pkts, rx_count);
-   }
+   enqueue_count = vdev_queue_ops[vdev->vid].enqueue_pkt_burst(vdev,
+   VIRTIO_RXQ, pkts, rx_count);
 
if (enable_stats) {
__atomic_add_fetch(&vdev->stats.rx_total_atomic, rx_count,
@@ -1234,6 +1232,14 @@ drain_eth_rx(struct vhost_dev *vdev)
free_pkts(pkts, rx_count);
 }
 
+uint16_t sync_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
+   struct rte_mempool *mbuf_pool,
+   struct rte_mbuf **pkts, uint16_t count)
+{
+   return rte_vhost_dequeue_burst(dev->vid, queue_id,
+   mbuf_pool, pkts, count);
+}
+
 static __rte_always_inline void
 drain_virtio_tx(struct vhost_dev *vdev)
 {
@@ -1241,13 +1247,8 @@ drain_virtio_tx(struct vhost_dev *vdev)
uint16_t count;
uint16_t i;
 
-  

[dpdk-dev] [PATCH v3 3/4] examples/vhost: use a new API to query remaining ring space

2021-09-28 Thread Wenwu Ma
A new API for querying the remaining descriptor ring capacity
is available, so we use the new one instead of the old one.

Signed-off-by: Wenwu Ma 
Reviewed-by: Maxime Coquelin 
Reviewed-by: Chenbo Xia 
Tested-by: Yvonne Yang 
---
 examples/vhost/ioat.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c
index 457f8171f0..6adc30b622 100644
--- a/examples/vhost/ioat.c
+++ b/examples/vhost/ioat.c
@@ -17,7 +17,6 @@ struct packet_tracker {
unsigned short next_read;
unsigned short next_write;
unsigned short last_remain;
-   unsigned short ioat_space;
 };
 
 struct packet_tracker cb_tracker[MAX_VHOST_DEVICE];
@@ -113,7 +112,6 @@ open_ioat(const char *value)
goto out;
}
rte_rawdev_start(dev_id);
-   cb_tracker[dev_id].ioat_space = IOAT_RING_SIZE - 1;
dma_info->nr++;
i++;
}
@@ -140,7 +138,7 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
src = descs[i_desc].src;
dst = descs[i_desc].dst;
i_seg = 0;
-   if (cb_tracker[dev_id].ioat_space < src->nr_segs)
+   if (rte_ioat_burst_capacity(dev_id) < src->nr_segs)
break;
while (i_seg < src->nr_segs) {
rte_ioat_enqueue_copy(dev_id,
@@ -155,7 +153,6 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
}
write &= mask;
cb_tracker[dev_id].size_track[write] = src->nr_segs;
-   cb_tracker[dev_id].ioat_space -= src->nr_segs;
write++;
}
} else {
@@ -194,7 +191,6 @@ ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
if (n_seg == 0)
return 0;
 
-   cb_tracker[dev_id].ioat_space += n_seg;
n_seg += cb_tracker[dev_id].last_remain;
 
read = cb_tracker[dev_id].next_read;
-- 
2.25.1



[dpdk-dev] [PATCH v3 4/4] examples/vhost: support vhost async dequeue data path

2021-09-28 Thread Wenwu Ma
This patch is to add vhost async dequeue data-path in vhost sample.
vswitch can leverage IOAT to accelerate vhost async dequeue data-path.

Signed-off-by: Wenwu Ma 
Reviewed-by: Maxime Coquelin 
Tested-by: Yvonne Yang 
---
 doc/guides/sample_app_ug/vhost.rst |   9 +-
 examples/vhost/ioat.c  |  61 +++--
 examples/vhost/ioat.h  |  25 
 examples/vhost/main.c  | 201 +++--
 examples/vhost/main.h  |   6 +-
 5 files changed, 219 insertions(+), 83 deletions(-)

diff --git a/doc/guides/sample_app_ug/vhost.rst 
b/doc/guides/sample_app_ug/vhost.rst
index 9afde9c7f5..63dcf181e1 100644
--- a/doc/guides/sample_app_ug/vhost.rst
+++ b/doc/guides/sample_app_ug/vhost.rst
@@ -169,9 +169,12 @@ demonstrates how to use the async vhost APIs. It's used in 
combination with dmas
 **--dmas**
 This parameter is used to specify the assigned DMA device of a vhost device.
 Async vhost-user net driver will be used if --dmas is set. For example
---dmas [txd0@00:04.0,txd1@00:04.1] means use DMA channel 00:04.0 for vhost
-device 0 enqueue operation and use DMA channel 00:04.1 for vhost device 1
-enqueue operation.
+--dmas [txd0@00:04.0,txd1@00:04.1,rxd0@00:04.2,rxd1@00:04.3] means use
+DMA channel 00:04.0/00:04.2 for vhost device 0 enqueue/dequeue operation
+and use DMA channel 00:04.1/00:04.3 for vhost device 1 enqueue/dequeue
+operation. The index of the device corresponds to the socket file in order,
+that means vhost device 0 is created through the first socket file, vhost
+device 1 is created through the second socket file, and so on.
 
 Common Issues
 -
diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c
index 6adc30b622..3a256b0f4c 100644
--- a/examples/vhost/ioat.c
+++ b/examples/vhost/ioat.c
@@ -21,6 +21,8 @@ struct packet_tracker {
 
 struct packet_tracker cb_tracker[MAX_VHOST_DEVICE];
 
+int vid2socketid[MAX_VHOST_DEVICE];
+
 int
 open_ioat(const char *value)
 {
@@ -29,7 +31,7 @@ open_ioat(const char *value)
char *addrs = input;
char *ptrs[2];
char *start, *end, *substr;
-   int64_t vid, vring_id;
+   int64_t socketid, vring_id;
struct rte_ioat_rawdev_config config;
struct rte_rawdev_info info = { .dev_private = &config };
char name[32];
@@ -60,6 +62,7 @@ open_ioat(const char *value)
goto out;
}
while (i < args_nr) {
+   bool is_txd;
char *arg_temp = dma_arg[i];
uint8_t sub_nr;
sub_nr = rte_strsplit(arg_temp, strlen(arg_temp), ptrs, 2, '@');
@@ -68,27 +71,39 @@ open_ioat(const char *value)
goto out;
}
 
-   start = strstr(ptrs[0], "txd");
-   if (start == NULL) {
+   int async_flag;
+   char *txd, *rxd;
+   txd = strstr(ptrs[0], "txd");
+   rxd = strstr(ptrs[0], "rxd");
+   if (txd) {
+   is_txd = true;
+   start = txd;
+   async_flag = ASYNC_ENQUEUE_VHOST;
+   } else if (rxd) {
+   is_txd = false;
+   start = rxd;
+   async_flag = ASYNC_DEQUEUE_VHOST;
+   } else {
ret = -1;
goto out;
}
 
start += 3;
-   vid = strtol(start, &end, 0);
+   socketid = strtol(start, &end, 0);
if (end == start) {
ret = -1;
goto out;
}
 
-   vring_id = 0 + VIRTIO_RXQ;
+   vring_id = is_txd ? VIRTIO_RXQ : VIRTIO_TXQ;
+
if (rte_pci_addr_parse(ptrs[1],
-   &(dma_info + vid)->dmas[vring_id].addr) < 0) {
+   &(dma_info + socketid)->dmas[vring_id].addr) < 0) {
ret = -1;
goto out;
}
 
-   rte_pci_device_name(&(dma_info + vid)->dmas[vring_id].addr,
+   rte_pci_device_name(&(dma_info + socketid)->dmas[vring_id].addr,
name, sizeof(name));
dev_id = rte_rawdev_get_dev_id(name);
if (dev_id == (uint16_t)(-ENODEV) ||
@@ -103,8 +118,9 @@ open_ioat(const char *value)
goto out;
}
 
-   (dma_info + vid)->dmas[vring_id].dev_id = dev_id;
-   (dma_info + vid)->dmas[vring_id].is_valid = true;
+   (dma_info + socketid)->dmas[vring_id].dev_id = dev_id;
+   (dma_info + socketid)->dmas[vring_id].is_valid = true;
+   (dma_info + socketid)->async_flag |= async_flag;
config.ring_size = IOAT_RING_SIZE;
config.hdls_disable = true;
if (rte_rawdev_configure(dev_id, &info, sizeof(config)) < 0) {
@@ -126,13 +142,16 @@ io

Re: [dpdk-dev] [PATCH v2 02/15] crypto: add total raw buffer length

2021-09-28 Thread Akhil Goyal
> Hi Akhil,
> 
> > > > On 9/21/2021 12:58 AM, Akhil Goyal wrote:
> > > > >>> From: Gagandeep Singh 
> > > > >>>
> > > > >>> The current crypto raw data vectors is extended to support
> > > > >>> rte_security usecases, where we need total data length to know
> > > > >>> how much additional memory space is available in buffer other
> > > > >>> than data length so that driver/HW can write expanded size
> > > > >>> data after encryption.
> > > > >>>
> > > > >>> Signed-off-by: Gagandeep Singh 
> > > > >>> Acked-by: Akhil Goyal 
> > > > >>> ---
> > > > >>>   lib/cryptodev/rte_crypto_sym.h | 6 ++
> > > > >>>   1 file changed, 6 insertions(+)
> > > > >>>
> > > > >>> diff --git a/lib/cryptodev/rte_crypto_sym.h
> > > > >> b/lib/cryptodev/rte_crypto_sym.h
> > > > >>> index dcc0bd5933..e5cef1fb72 100644
> > > > >>> --- a/lib/cryptodev/rte_crypto_sym.h
> > > > >>> +++ b/lib/cryptodev/rte_crypto_sym.h
> > > > >>> @@ -37,6 +37,8 @@ struct rte_crypto_vec {
> > > > >>> rte_iova_t iova;
> > > > >>> /** length of the data buffer */
> > > > >>> uint32_t len;
> > > > >>> +   /** total buffer length*/
> > > > >>> +   uint32_t tot_len;
> > > > >>>   };
> > > > >>>
> > > > >>>   /**
> > > > >>> @@ -980,12 +982,14 @@ rte_crypto_mbuf_to_vec(const struct
> > > rte_mbuf
> > > > >> *mb, uint32_t ofs, uint32_t len,
> > > > >>> seglen = mb->data_len - ofs;
> > > > >>> if (len <= seglen) {
> > > > >>> vec[0].len = len;
> > > > >>> +   vec[0].tot_len = mb->buf_len;
> > > > >> That doesn't look right.
> > > > >> We should take into a count mbuf headroom and input offset.
> > > > >> Something like:
> > > > >> vec[0].tot_len = mb->buf_len - rte_pktmbuf_headroom(m) - ofs;
> > > > >> Same in other places below.
> > > > >>
> > > > > I believe the packet can expand into headroom based on the protocol
> > > support.
> > > > Yes, total length is representing the total buffer length available. The
> > > > security protocol shall take care of the headroom and offsets.
> > >
> > > Hmm, and how it will now how many bytes are in head-room, and how
> > > many are in tail-room?
> > > We either need to provide values for both, or assume that only tail-room
> is
> > > available for the driver.
> > I believe it should be starting point where output can be written till the 
> > end
> of buffer.
> 
> Right, that's:
> base = rte_pktmbuf_mtod_offset(mb, void *, ofs);
> 
> > There should not be any headroom and tailroom for raw buffers.
> 
> I am not talking about raw buffers, what I am saying that some space in the
> mbuf
> might be already occupied, that's why we have data_off inside rte_mbuf, etc.
> 
> > It should be mbuf->buf_len - ofs.
> 
> No, it should be:
> len = mb->buf_len - rte_pktmbuf_headroom(m) - ofs;
> Otherwise PMD can overwrite memory beyond its buf_len.
> 
@Hemant: Do you agree. Please send next version.


Re: [dpdk-dev] [EXT] [PATCH v4 02/10] security: add UDP params for IPsec NAT-T

2021-09-28 Thread Akhil Goyal
RFC states about NAT-T, that it should be 4500 but for UDP encapsulation it 
does not specify.
Hence it should be generic here.

From: Nicolau, Radu 
Sent: Monday, September 27, 2021 2:47 PM
To: hemant.agra...@nxp.com; Akhil Goyal ; Declan Doherty 

Cc: dev@dpdk.org; m...@ashroe.eu; konstantin.anan...@intel.com; 
vladimir.medved...@intel.com; bruce.richard...@intel.com; 
roy.fan.zh...@intel.com; Anoob Joseph ; 
abhijit.si...@intel.com; daniel.m.buck...@intel.com; Archana Muniganti 
; Tejasree Kondoj ; 
ma...@nvidia.com
Subject: Re: [dpdk-dev] [EXT] [PATCH v4 02/10] security: add UDP params for 
IPsec NAT-T



On 9/24/2021 10:11 AM, Hemant Agrawal wrote:


On 9/6/2021 4:39 PM, Nicolau, Radu wrote:

On 9/5/2021 3:19 PM, Akhil Goyal wrote:

Hi Radu,


Add support for specifying UDP port params for UDP encapsulation option.

Signed-off-by: Declan Doherty 

Signed-off-by: Radu Nicolau 

Signed-off-by: Abhijit Sinha 

Signed-off-by: Daniel Martin Buckley 

Do we really need to specify the port numbers for NAT-T?
I suppose they are fixed as 4500.
Could you please specify what the user need to set here for session
creation?

From what I'm seeing here 
https://datatracker.ietf.org/doc/html/rfc3948#section-2.1
 there is no requirement in general for UDP encapsulation so I think it's 
better to make the API flexible as to allow any port to be used.



This section states that :

o  the Source Port and Destination Port MUST be the same as that used by IKE 
traffic,



IKE usages port 4500



am I missing something?



I think there's enough confusion in the RFCs so I think it's better to keep 
this option flexible:

For example 
https://datatracker.ietf.org/doc/html/rfc5996#section-2.23:

   It is a common practice of NATs to translate TCP and UDP port numbers

   as well as addresses and use the port numbers of inbound packets to

   decide which internal node should get a given packet.  For this

   reason, even though IKE packets MUST be sent to and from UDP port 500

   or 4500, they MUST be accepted coming from any port and responses

   MUST be sent to the port from whence they came.  This is because the

   ports may be modified as the packets pass through NATs.  Similarly,

   IP addresses of the IKE endpoints are generally not included in the

   IKE payloads because the payloads are cryptographically protected and

   could not be transparently modified by NATs.


Re: [dpdk-dev] [RFC V2] ethdev: fix issue that dev close in PMD calls twice

2021-09-28 Thread Singh, Aman Deep



On 9/22/2021 9:01 AM, Huisong Li wrote:


在 2021/9/20 22:07, Ferruh Yigit 写道:

On 8/25/2021 10:53 AM, Huisong Li wrote:

在 2021/8/24 22:42, Ferruh Yigit 写道:

On 8/19/2021 4:45 AM, Huisong Li wrote:

在 2021/8/18 19:24, Ferruh Yigit 写道:

On 8/13/2021 9:16 AM, Huisong Li wrote:

在 2021/8/13 14:12, Thomas Monjalon 写道:

13/08/2021 04:11, Huisong Li:

Hi, all

This patch can enhance the security of device uninstallation to
eliminate dependency on user usage methods.

Can you check this patch?


在 2021/8/3 10:30, Huisong Li 写道:
Ethernet devices in DPDK can be released by 
rte_eth_dev_close() and
rte_dev_remove(). These APIs both call xxx_dev_close() in PMD 
layer
to uninstall hardware. However, the two APIs do not have 
explicit
invocation restrictions. In other words, at the ethdev layer, 
it is
possible to call rte_eth_dev_close() before calling 
rte_dev_remove()

or rte_eal_hotplug_remove(). In such a bad scenario,

It is not a bad scenario.
If there is no more port for the device after calling close,
the device should be removed automatically.
Keep in mind "close" is for one port, "remove" is for the 
entire device

which can have more than one port.

I know.

dev_close() is for removing an eth device. And rte_dev_remove() 
can be used


for removing the rte device and all its eth devices belonging to 
the rte

device.

In rte_dev_remove(), "remove" is executed in primary or one of 
secondary,


all eth devices having same pci address will be closed and removed.


the primary
process may be fine, but it may cause that xxx_dev_close() in 
the PMD
layer will be called twice in the secondary process. So this 
patch

fixes it.
If a port is closed in primary, it should be the same in 
secondary.




+    /*
+ * The eth_dev->data->name doesn't be cleared by the 
secondary

process,
+ * so above "eth_dev" isn't NULL after 
rte_eth_dev_close() called.

This assumption is not clear. All should be closed together.
However, dev_close() does not have the feature similar to 
rte_dev_remove().


Namely, it is not guaranteed that all eth devices are closed 
together in

ethdev
layer. It depends on app or user.

If the app does not close together, the operation of repeatedly
uninstalling an
eth device in the secondary process

will be triggered when dev_close() is first called by one secondary
process, and
then rte_dev_remove() is called.

So I think it should be avoided.

First of all, I am not sure about calling 'rte_eth_dev_close()' or
'rte_dev_remove()' from the secondary process.
There are explicit checks in various locations to prevent 
clearing resources

completely from secondary process.

There's no denying that.

Generally, hardware resources of eth device and shared data of the 
primary and

secondary process

are cleared by primary, which are controled by ethdev layer or PMD 
layer.


But there may be some private data or resources of each process 
(primary or

secondary ), such as mp action

registered by rte_mp_action_register() or others.  For these 
resources, the

secondary process still needs to clear.

Namely, both primary and secondary processes need to prevent 
repeated offloading

of resources.

Calling 'rte_eth_dev_close()' or 'rte_dev_remove()' by secondary 
is technically
can be done but application needs to be extra cautious and should 
take extra

measures and synchronization to make it work.
Regular use-case is secondary processes do the packet processing 
and all

control
commands run by primary.

You are right. We have a consensus that 'rte_eth_dev_close()' or
'rte_dev_remove()'

can be called by primary and secondary processes.

But DPDK framework cannot assume user behavior.😁

We need to make it more secure and reliable for both primary and 
secondary

processes.

In primary, if you call 'rte_eth_dev_close()' it will clear all 
ethdev

resources
and further 'rte_dev_remove()' call will detect missing ethdev 
resources and

won't try to clear them again.

In secondary, if you call 'rte_eth_dev_close()', it WON'T clear 
all resources
and further 'rte_dev_remove()' call (either from primary or 
secondary) will try
to clean ethdev resources again. You are trying to prevent this 
retry in remove

happening for secondary process.
Right. However, if secondary process in PMD layer has its own 
private resources

to be

cleared, it still need to do it by calling 'rte_eth_dev_close()' or
'rte_dev_remove()'.

In secondary it won't free ethdev resources anyway if you let it 
continue,

but I
guess here you are trying to prevent the PMD dev_close() called 
again. Why? Is
it just for optimization or does it cause unexpected behavior in 
the PMD?



Overall, to free resources you need to do the 
'rte_eth_dev_close()' or
'rte_dev_remove()' in the primary anyway. So instead of this 
workaround, I

would
suggest making PMD dev_close() safe to be called multiple times 
(if this is the

problem.)
In conclusion,  primary and secondary processes in PMD layer may 
have their own


private data and resourc

Re: [dpdk-dev] [PATCH] net/ixgbe: fix RxQ/TxQ release

2021-09-28 Thread Julien Meunier

Hello,

On 28/09/2021 05:21, Wang, Haiyue wrote:

-Original Message-
From: Wang, Haiyue
Sent: Tuesday, September 28, 2021 11:06
To: 'Julien Meunier' ; dev@dpdk.org
Cc: sta...@dpdk.org; Richardson, Bruce 
Subject: RE: [PATCH] net/ixgbe: fix RxQ/TxQ release


-Original Message-
From: Julien Meunier 
Sent: Tuesday, September 28, 2021 01:18
To: dev@dpdk.org
Cc: sta...@dpdk.org; Richardson, Bruce ; Wang, 
Haiyue

Subject: [PATCH] net/ixgbe: fix RxQ/TxQ release

On the vector implementation, during the tear-down, the mbufs not
drained in the RxQ and TxQ are freed based on an algorithm which
supposed that the number of descriptors is a power of 2 (max_desc).
Based on this hypothesis, this algorithm uses a bitmask in order to
detect an index overflow during the iteration, and to restart the loop
from 0.

However, there is no such power of 2 requirement in the ixgbe for the
number of descriptors in the RxQ / TxQ. The only requirement is to have
a number correctly aligned.

If a user requested to configure a number of descriptors which is not a
power of 2, as a consequence, during the tear-down, it was possible to
be in an infinite loop, and to never reach the exit loop condition.



Are you able to setup not a power of 2 successfully ?



My fault, yes, possible. ;-)



Yes, we have some usecases where the nb of descriptiors for the TxQ is 
set to 1536.
I modified the test_pmd_perf in order to validate this behavior, as my 
ixgbe X550 supports the loopback mode:

- nb_desc = 2048 => txq is drained and stopped correctly
- nb_desc = 1536 => freeze during the teardown


int
rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
   uint16_t nb_tx_desc, unsigned int socket_id,
   const struct rte_eth_txconf *tx_conf)
{
...

if (nb_tx_desc > dev_info.tx_desc_lim.nb_max ||
nb_tx_desc < dev_info.tx_desc_lim.nb_min ||
nb_tx_desc % dev_info.tx_desc_lim.nb_align != 0) {
RTE_ETHDEV_LOG(ERR,
"Invalid value for nb_tx_desc(=%hu), should be: <= %hu, 
>= %hu, and a product
of %hu\n",
nb_tx_desc, dev_info.tx_desc_lim.nb_max,
dev_info.tx_desc_lim.nb_min,
dev_info.tx_desc_lim.nb_align);
return -EINVAL;
}

...

}


By removing the bitmask and changing the loop method, we can avoid this
issue, and allow the user to configure a RxQ / TxQ which is not a power
of 2.

Fixes: c95584dc2b18 ("ixgbe: new vectorized functions for Rx/Tx")
Cc: bruce.richard...@intel.com
Cc: sta...@dpdk.org

Signed-off-by: Julien Meunier 
---
  drivers/net/ixgbe/ixgbe_rxtx_vec_common.h | 20 +---
  1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h 
b/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h
index adba855ca3..8912558918 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h
+++ b/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h
@@ -150,11 +150,14 @@ _ixgbe_tx_queue_release_mbufs_vec(struct ixgbe_tx_queue 
*txq)
return;



Just one line ?

i = (i + 1) % txq->nb_tx_desc



Ah yes, I was too focused with this bitmask...
The shorter, the better.

I will send a V2 today.

Thanks for this feedback !


/* release the used mbufs in sw_ring */
-   for (i = txq->tx_next_dd - (txq->tx_rs_thresh - 1);
-i != txq->tx_tail;
-i = (i + 1) & max_desc) {
+   i = txq->tx_next_dd - (txq->tx_rs_thresh - 1);
+   while (i != txq->tx_tail) {
txe = &txq->sw_ring_v[i];
rte_pktmbuf_free_seg(txe->mbuf);
+
+   i = i + 1;
+   if (i > max_desc)
+   i = 0;
}
txq->nb_tx_free = max_desc;

@@ -168,7 +171,7 @@ _ixgbe_tx_queue_release_mbufs_vec(struct ixgbe_tx_queue 
*txq)
  static inline void
  _ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq)
  {
-   const unsigned int mask = rxq->nb_rx_desc - 1;
+   const unsigned int max_desc = rxq->nb_rx_desc - 1;
unsigned int i;

if (rxq->sw_ring == NULL || rxq->rxrearm_nb >= rxq->nb_rx_desc)
@@ -181,11 +184,14 @@ _ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue 
*rxq)
rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
}
} else {
-   for (i = rxq->rx_tail;
-i != rxq->rxrearm_start;
-i = (i + 1) & mask) {
+   i = rxq->rx_tail;
+   while (i != rxq->rxrearm_start) {
if (rxq->sw_ring[i].mbuf != NULL)
rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
+
+   i = i + 1;
+   if (i > max_desc)
+   i = 0;
}
}

--
2.17.1




--
Julien Meunier


Re: [dpdk-dev] [PATCH v5 0/5] Add lookaside IPsec tests

2021-09-28 Thread Akhil Goyal
> Add lookaside IPsec functional tests. Known vector tests and
> combined mode framework is added.
> 
> Known vectors are outbound vectors based on
> https://datatracker.ietf.org/doc/html/draft-mcgrew-gcm-test-01
> 
> The vectors are updated to have sequence number as 1 & L4 checksum
> computed correctly. And they have following properties,
> 1. ESP
> 2. Tunnel mode
> 3. IPv4
> 4. IPv4 tunnel
> 
> Known vector tests for inbound operation would generate test vectors by
> reversing outbound known vectors. The input_text would become encrypted
> packet and output_text would be the plain packet. Tests would then validate
> the operation by comparing against plain packet.
> 
> Combined mode tests are used to test all IPsec features against all ciphers
> supported by the PMD. The framework is introduced to avoid testing
> with any specific algo, thereby making it mandatory to be supported. Also,
> testing with all supported combinations will help with increasing coverage
> as well.
> 
> Four test cases use combined mode,
> 1. Display algo coverage and basic in + out tests
> 2. Negative test for ICV corruption
> 3. IV generation
> 4. UDP encapsulation
> 
> IV generation test case compares IV generated for a batch of packets and
> returns
> failure if IV is repeated.
> 
> Upcoming additions,
> 1. AES-CBC-SHA1-HMAC known vectors & combined mode
> 2. IPv6
> 3. Transport
> 4. Mixed mode (IPv4-in-IPv6 etc, all combinations)
> 
> Tested with following PMDs
> 1. crypto_octeontx2
> 2. crypto_cn10k
> 3. crypto_cn9k
> 
> Changes in v5:
> - Fixed lack of return check with dev_configure_and_start() (comment from
> Akhil)
> - Adjusted patch title and description (comment from Akhil)
> 
Applied to dpdk-next-crypto

Thanks.


[dpdk-dev] [PATCH] net/mlx5: fix flow tables double release

2021-09-28 Thread Bing Zhao
In the function mlx5_alloc_shared_dr(), there are various reasons
to result in a failure and error clean up process. While in the
caller of mlx5_dev_spawn(), once there is a error occurring after
the mlx5_alloc_shared_dr(), the mlx5_os_free_shared_dr() is called
to release all the resources.

To prevent a double release, the pointers of the resources should
be checked before the releasing and set to NULL after done.

In the mlx5_free_table_hash_list(), after the releasing, the pointer
was missed to set to NULL and a double release may cause a crash.

By setting the tables pointer to NULL as done for other resources,
the double release and crash could be solved.

Fixes: 54534725d2f3 ("net/mlx5: fix flow table hash list conversion")
Cc: ma...@mellanox.com
Cc: sta...@dpdk.org

Signed-off-by: Bing Zhao 
---
 drivers/net/mlx5/mlx5.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 02ea2e781e..3c10f186e6 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -1369,6 +1369,7 @@ mlx5_free_table_hash_list(struct mlx5_priv *priv)
if (!sh->flow_tbls)
return;
mlx5_hlist_destroy(sh->flow_tbls);
+   sh->flow_tbls = NULL;
 }
 
 /**
-- 
2.27.0



[dpdk-dev] [Bug 816] KNI deadlocks while processing mac address set request with linux kernel version >= v5.12

2021-09-28 Thread bugzilla
https://bugs.dpdk.org/show_bug.cgi?id=816

Bug ID: 816
   Summary: KNI deadlocks while processing mac address set request
with linux kernel version >= v5.12
   Product: DPDK
   Version: 21.08
  Hardware: x86
OS: Linux
Status: UNCONFIRMED
  Severity: critical
  Priority: Normal
 Component: ethdev
  Assignee: dev@dpdk.org
  Reporter: sahithi.sin...@oracle.com
  Target Milestone: ---

Starting from linux kernel version 5.12, a new global semaphore(dev_addr_sem)
was introduced in dev_set_mac_address_user() function that should be acquired
and released along with rtnl_lock when a mac address set request was received
from userspace. 

This introduced following locking sequence in linux kernel , 
1. As part of dev_ioctl(), take rtnl_lock first
2. Then call down_write(&dev_addr_sem) in dev_set_mac_address_user()
3. Finally call kni_net_set_mac which calls kni_net_process_request.
4. In kni_net_process_request we will release only rtnl_lock but not
dev_addr_sem before we enqueue the request to req_q(i.e to userspace dpdk
process)
5. After receiving a response or timeout , we will again try to hold rtnl_lock 

Above sequence in KNI is resulting in deadlock as we are just releasing
rtnl_lock without releasing semaphore while some other device could be waiting
for dev_addr_sem while holding rtnl_lock. 
For example, if user issues two mac address set requests immediately on two kni
interfaces intf1, intf2. Then
  1.intf1 takes rtnl_lock
  2.intf1 takes dev_addr_sem
  3.intf2 waits for rtnl_lock
  4.intf1 in KNI , releases rtnl_lock
  5.intf2 takes rtnl_lock and waits for dev_addr_sem held by intf1
  6.intf1 at the end of kni request handling code, will try to lock rtnl_lock
held by intf2.
But intf2 will not release rtnl_lock as it was waiting for dev_addr_sem
held by intf1.

So at the end,  intf1 will be holding dev_addr_sem and will be waiting for
rtnl_lock. intf2 will be holding rtnl_lock and will be waiting for dev_addr_sem
resulting in the KERNEL deadlock.


This issue started due to changes from 
commitid:631217c761353aa5e4e548a20e570245ecbc8eda  (kni: fix kernel deadlock
with bifurcated device) 
and with linux kernel version >= v5.12

-- 
You are receiving this mail because:
You are the assignee for the bug.

[dpdk-dev] [PATCH v2] net/ixgbe: fix RxQ/TxQ release

2021-09-28 Thread Julien Meunier
On the vector implementation, during the tear-down, the mbufs not
drained in the RxQ and TxQ are freed based on an algorithm which
supposed that the number of descriptors is a power of 2 (max_desc).
Based on this hypothesis, this algorithm uses a bitmask in order to
detect an index overflow during the iteration, and to restart the loop
from 0.

However, there is no such power of 2 requirement in the ixgbe for the
number of descriptors in the RxQ / TxQ. The only requirement is to have
a number correctly aligned.

If a user requested to configure a number of descriptors which is not a
power of 2, as a consequence, during the tear-down, it was possible to
be in an infinite loop, and to never reach the exit loop condition.

By removing the bitmask and changing the loop method, we can avoid this
issue, and allow the user to configure a RxQ / TxQ which is not a power
of 2.

Fixes: c95584dc2b18 ("ixgbe: new vectorized functions for Rx/Tx")
Cc: bruce.richard...@intel.com
Cc: sta...@dpdk.org

Signed-off-by: Julien Meunier 
---
 drivers/net/ixgbe/ixgbe_rxtx_vec_common.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h 
b/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h
index adba855ca3..005e60668a 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h
+++ b/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h
@@ -152,7 +152,7 @@ _ixgbe_tx_queue_release_mbufs_vec(struct ixgbe_tx_queue 
*txq)
/* release the used mbufs in sw_ring */
for (i = txq->tx_next_dd - (txq->tx_rs_thresh - 1);
 i != txq->tx_tail;
-i = (i + 1) & max_desc) {
+i = (i + 1) % txq->nb_tx_desc) {
txe = &txq->sw_ring_v[i];
rte_pktmbuf_free_seg(txe->mbuf);
}
@@ -168,7 +168,6 @@ _ixgbe_tx_queue_release_mbufs_vec(struct ixgbe_tx_queue 
*txq)
 static inline void
 _ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq)
 {
-   const unsigned int mask = rxq->nb_rx_desc - 1;
unsigned int i;

if (rxq->sw_ring == NULL || rxq->rxrearm_nb >= rxq->nb_rx_desc)
@@ -183,7 +182,7 @@ _ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue 
*rxq)
} else {
for (i = rxq->rx_tail;
 i != rxq->rxrearm_start;
-i = (i + 1) & mask) {
+i = (i + 1) % rxq->nb_rx_desc) {
if (rxq->sw_ring[i].mbuf != NULL)
rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
}
--
2.17.1



Re: [dpdk-dev] [PATCH v2 02/15] crypto: add total raw buffer length

2021-09-28 Thread Hemant Agrawal
HI Akhil/Konstantin
> > Hi Akhil,
> >
> > > > > On 9/21/2021 12:58 AM, Akhil Goyal wrote:
> > > > > >>> From: Gagandeep Singh 
> > > > > >>>
> > > > > >>> The current crypto raw data vectors is extended to support
> > > > > >>> rte_security usecases, where we need total data length to
> > > > > >>> know how much additional memory space is available in buffer
> > > > > >>> other than data length so that driver/HW can write expanded
> > > > > >>> size data after encryption.
> > > > > >>>
> > > > > >>> Signed-off-by: Gagandeep Singh 
> > > > > >>> Acked-by: Akhil Goyal 
> > > > > >>> ---
> > > > > >>>   lib/cryptodev/rte_crypto_sym.h | 6 ++
> > > > > >>>   1 file changed, 6 insertions(+)
> > > > > >>>
> > > > > >>> diff --git a/lib/cryptodev/rte_crypto_sym.h
> > > > > >> b/lib/cryptodev/rte_crypto_sym.h
> > > > > >>> index dcc0bd5933..e5cef1fb72 100644
> > > > > >>> --- a/lib/cryptodev/rte_crypto_sym.h
> > > > > >>> +++ b/lib/cryptodev/rte_crypto_sym.h
> > > > > >>> @@ -37,6 +37,8 @@ struct rte_crypto_vec {
> > > > > >>>   rte_iova_t iova;
> > > > > >>>   /** length of the data buffer */
> > > > > >>>   uint32_t len;
> > > > > >>> + /** total buffer length*/
> > > > > >>> + uint32_t tot_len;
> > > > > >>>   };
> > > > > >>>
> > > > > >>>   /**
> > > > > >>> @@ -980,12 +982,14 @@ rte_crypto_mbuf_to_vec(const struct
> > > > rte_mbuf
> > > > > >> *mb, uint32_t ofs, uint32_t len,
> > > > > >>>   seglen = mb->data_len - ofs;
> > > > > >>>   if (len <= seglen) {
> > > > > >>>   vec[0].len = len;
> > > > > >>> + vec[0].tot_len = mb->buf_len;
> > > > > >> That doesn't look right.
> > > > > >> We should take into a count mbuf headroom and input offset.
> > > > > >> Something like:
> > > > > >> vec[0].tot_len = mb->buf_len - rte_pktmbuf_headroom(m) - ofs;
> > > > > >> Same in other places below.
> > > > > >>
> > > > > > I believe the packet can expand into headroom based on the
> > > > > > protocol
> > > > support.
> > > > > Yes, total length is representing the total buffer length
> > > > > available. The security protocol shall take care of the headroom and
> offsets.
> > > >
> > > > Hmm, and how it will now how many bytes are in head-room, and how
> > > > many are in tail-room?
> > > > We either need to provide values for both, or assume that only
> > > > tail-room
> > is
> > > > available for the driver.
> > > I believe it should be starting point where output can be written
> > > till the end
> > of buffer.
> >
> > Right, that's:
> > base = rte_pktmbuf_mtod_offset(mb, void *, ofs);
> >
> > > There should not be any headroom and tailroom for raw buffers.
> >
> > I am not talking about raw buffers, what I am saying that some space
> > in the mbuf might be already occupied, that's why we have data_off
> > inside rte_mbuf, etc.
> >
> > > It should be mbuf->buf_len - ofs.
> >
> > No, it should be:
> > len = mb->buf_len - rte_pktmbuf_headroom(m) - ofs; Otherwise PMD can
> > overwrite memory beyond its buf_len.
> >
> @Hemant: Do you agree. Please send next version.
[Hemant] 
[Hemant] Yes, I will send the new version


Re: [dpdk-dev] [EXT] Re: [PATCH v6 2/3] security: add option for faster udata or mdata access

2021-09-28 Thread Akhil Goyal
> > --- a/doc/guides/rel_notes/release_21_08.rst
> > +++ b/doc/guides/rel_notes/release_21_08.rst
> > @@ -223,6 +223,12 @@ ABI Changes
> >
> >  * No ABI change that would break compatibility with 20.11.
> >
> > +* security: ``rte_security_set_pkt_metadata`` and
> ``rte_security_get_userdata``
> > +  routines used by Inline outbound and Inline inbound security processing
> are
> > +  made inline and enhanced to do simple 64-bit set/get for PMD's that do
> not
> > +  have much processing in PMD specific callbacks but just 64-bit set/get.
> > +  This avoids a per pkt function pointer jump overhead for such PMD's.
> 
> Please pay attention it is not the right release notes.
> 
Fixed... My bad.


Re: [dpdk-dev] [dpdk-stable] [PATCH v4] mbuf: fix reset on mbuf free

2021-09-28 Thread Thomas Monjalon
Follow-up again:
We have added a note in 21.08, we should fix it in 21.11.
If there are no counter proposal, I suggest applying this patch,
no matter the performance regression.


30/07/2021 16:54, Thomas Monjalon:
> 30/07/2021 16:35, Morten Brørup:
> > > From: Olivier Matz [mailto:olivier.m...@6wind.com]
> > > Sent: Friday, 30 July 2021 14.37
> > > 
> > > Hi Thomas,
> > > 
> > > On Sat, Jul 24, 2021 at 10:47:34AM +0200, Thomas Monjalon wrote:
> > > > What's the follow-up for this patch?
> > > 
> > > Unfortunatly, I still don't have the time to work on this topic yet.
> > > 
> > > In my initial tests, in our lab, I didn't notice any performance
> > > regression, but Ali has seen an impact (0.5M PPS, but I don't know how
> > > much in percent).
> > > 
> > > 
> > > > 19/01/2021 15:04, Slava Ovsiienko:
> > > > > Hi, All
> > > > >
> > > > > Could we postpose this patch at least to rc2? We would like to
> > > conduct more investigations?
> > > > >
> > > > > With best regards, Slava
> > > > >
> > > > > From: Olivier Matz 
> > > > > > On Mon, Jan 18, 2021 at 05:52:32PM +, Ali Alnubani wrote:
> > > > > > > Hi,
> > > > > > > (Sorry had to resend this to some recipients due to mail server
> > > problems).
> > > > > > >
> > > > > > > Just confirming that I can still reproduce the regression with
> > > single core and
> > > > > > 64B frames on other servers.
> > > > > >
> > > > > > Many thanks for the feedback. Can you please detail what is the
> > > amount of
> > > > > > performance loss in percent, and confirm the test case? (I
> > > suppose it is
> > > > > > testpmd io forward).
> > > > > >
> > > > > > Unfortunatly, I won't be able to spend a lot of time on this soon
> > > (sorry for
> > > > > > that). So I see at least these 2 options:
> > > > > >
> > > > > > - postpone the patch again, until I can find more time to analyze
> > > > > >   and optimize
> > > > > > - apply the patch if the performance loss is acceptable compared
> > > to
> > > > > >   the added value of fixing a bug
> > > > > >
> > > > [...]
> > > 
> > > Statu quo...
> > > 
> > > Olivier
> > > 
> > 
> > The decision should be simple:
> > 
> > Does the DPDK project support segmented packets?
> > If yes, then apply the patch to fix the bug!
> > 
> > If anyone seriously cares about the regression it introduces, optimization 
> > patches are welcome later. We shouldn't wait for it.
> 
> You're right, but the regression is flagged to a 4-years old patch,
> that's why I don't consider it as urgent.
> 
> > If the patch is not applied, the documentation must be updated to mention 
> > that we are releasing DPDK with a known bug: that segmented packets are 
> > handled incorrectly in the scenario described in this patch.
> 
> Yes, would be good to document the known issue,
> no matter how old it is.
> 
> > Generally, there could be some performance to gain by not supporting 
> > segmented packets at all, as a compile time option. But that is a different 
> > discussion.






[dpdk-dev] [PATCH v7 0/9] baseband: add NXP LA12xx driver

2021-09-28 Thread nipun . gupta
From: Nipun Gupta 

This series introduces the BBDEV LA12xx poll mode driver (PMD) to support
an implementation for offloading High Phy processing functions like
LDPC Encode / Decode 5GNR wireless acceleration function, using PCI based
LA12xx Software defined radio.

Please check the documentation patch for more info.

The driver currently implements basic feature to offload only the 5G LDPC
encode/decode.

A new capability has been added to check if the driver can support the
input data in network byte order. Two test vectors are also added as an
example with input data in network byte.

v2: add test case changes
v3: fix 32 bit compilation
v4: capability for network byte order, doc patch merged inline.
v5: add llr_size and llr_decimals, removed LLR compression flag,
update testbbdev to handle endianness, rebased on top of 20.08
v6: added BE as device info instead of capability, updated test
to have 2 codeblocks
v7: fixed checkpatch errors

Hemant Agrawal (6):
  baseband: introduce NXP LA12xx driver
  baseband/la12xx: add devargs for max queues
  baseband/la12xx: add support for multiple modems
  baseband/la12xx: add queue and modem config support
  baseband/la12xx: add enqueue and dequeue support
  app/bbdev: enable la12xx for bbdev

Nipun Gupta (3):
  bbdev: add big endian processing data processing info
  app/bbdev: handle endianness of test data
  app/bbdev: add test vectors for transport blocks

 MAINTAINERS   |   10 +
 app/test-bbdev/meson.build|3 +
 app/test-bbdev/test_bbdev_perf.c  |   62 +
 app/test-bbdev/test_vectors/ldpc_dec_tb.data  |  265 
 app/test-bbdev/test_vectors/ldpc_enc_tb.data  |   95 ++
 doc/guides/bbdevs/features/la12xx.ini |   13 +
 doc/guides/bbdevs/index.rst   |1 +
 doc/guides/bbdevs/la12xx.rst  |  126 ++
 doc/guides/rel_notes/release_21_11.rst|5 +
 drivers/baseband/la12xx/bbdev_la12xx.c| 1100 +
 drivers/baseband/la12xx/bbdev_la12xx.h|   51 +
 drivers/baseband/la12xx/bbdev_la12xx_ipc.h|  244 
 .../baseband/la12xx/bbdev_la12xx_pmd_logs.h   |   28 +
 drivers/baseband/la12xx/meson.build   |6 +
 drivers/baseband/la12xx/version.map   |3 +
 drivers/baseband/meson.build  |1 +
 lib/bbdev/rte_bbdev.h |2 +
 17 files changed, 2015 insertions(+)
 create mode 100644 app/test-bbdev/test_vectors/ldpc_dec_tb.data
 create mode 100644 app/test-bbdev/test_vectors/ldpc_enc_tb.data
 create mode 100644 doc/guides/bbdevs/features/la12xx.ini
 create mode 100644 doc/guides/bbdevs/la12xx.rst
 create mode 100644 drivers/baseband/la12xx/bbdev_la12xx.c
 create mode 100644 drivers/baseband/la12xx/bbdev_la12xx.h
 create mode 100644 drivers/baseband/la12xx/bbdev_la12xx_ipc.h
 create mode 100644 drivers/baseband/la12xx/bbdev_la12xx_pmd_logs.h
 create mode 100644 drivers/baseband/la12xx/meson.build
 create mode 100644 drivers/baseband/la12xx/version.map

-- 
2.17.1



[dpdk-dev] [PATCH v7 1/9] bbdev: add big endian processing data processing info

2021-09-28 Thread nipun . gupta
From: Nipun Gupta 

This patch intoduces a new info pertaining to bbdev device
to process the data in big endian order.

Signed-off-by: Nipun Gupta 
---
 lib/bbdev/rte_bbdev.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/bbdev/rte_bbdev.h b/lib/bbdev/rte_bbdev.h
index 7017124414..3acc008d06 100644
--- a/lib/bbdev/rte_bbdev.h
+++ b/lib/bbdev/rte_bbdev.h
@@ -309,6 +309,8 @@ struct rte_bbdev_driver_info {
uint16_t min_alignment;
/** HARQ memory available in kB */
uint32_t harq_buffer_size;
+   /** Device support input, output and HARQ data as big-endian */
+   uint8_t support_be_data;
/** Default queue configuration used if none is supplied  */
struct rte_bbdev_queue_conf default_queue_conf;
/** Device operation capabilities */
-- 
2.17.1



Re: [dpdk-dev] [PATCH v5 5/5] devargs: parse global device syntax

2021-09-28 Thread David Marchand
On Tue, Apr 13, 2021 at 5:15 AM Xueming Li  wrote:
>
> When parsing a devargs, try to parse using the global device syntax
> first. Fallback on legacy syntax on error.
>
> Example of new global device syntax:
>  -a bus=pci,addr=82:00.0/class=eth/driver=mlx5,dv_flow_en=1
>
> Signed-off-by: Xueming Li 
> Reviewed-by: Gaetan Rivet 

Starting with a virtio user port, I get a warning:

# dpdk-testpmd --vdev
net_virtio_user0,iface=test,path=/dev/vhost-net,queues=1
--log-level=pmd.net.virtio.*:debug -- -i
...
EAL: Unrecognized layer dev/vhost-net,queues=1
...

Am I doing something wrong?
If not, could you have a look?


Thanks.

-- 
David Marchand



[dpdk-dev] [PATCH v7 2/9] baseband: introduce NXP LA12xx driver

2021-09-28 Thread nipun . gupta
From: Hemant Agrawal 

This patch introduce the baseband device drivers for NXP's
LA1200 series software defined baseband modem.

Signed-off-by: Nipun Gupta 
Signed-off-by: Hemant Agrawal 
---
 MAINTAINERS   |   9 ++
 drivers/baseband/la12xx/bbdev_la12xx.c| 108 ++
 .../baseband/la12xx/bbdev_la12xx_pmd_logs.h   |  28 +
 drivers/baseband/la12xx/meson.build   |   6 +
 drivers/baseband/la12xx/version.map   |   3 +
 drivers/baseband/meson.build  |   1 +
 6 files changed, 155 insertions(+)
 create mode 100644 drivers/baseband/la12xx/bbdev_la12xx.c
 create mode 100644 drivers/baseband/la12xx/bbdev_la12xx_pmd_logs.h
 create mode 100644 drivers/baseband/la12xx/meson.build
 create mode 100644 drivers/baseband/la12xx/version.map

diff --git a/MAINTAINERS b/MAINTAINERS
index 30bf77b79a..e3f0e8759f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1289,6 +1289,15 @@ F: drivers/event/opdl/
 F: doc/guides/eventdevs/opdl.rst
 
 
+Baseband Drivers
+
+
+NXP LA12xx driver
+M: Nipun Gupta 
+M: Hemant Agrawal 
+F: drivers/baseband/la12xx/
+
+
 Rawdev Drivers
 --
 
diff --git a/drivers/baseband/la12xx/bbdev_la12xx.c 
b/drivers/baseband/la12xx/bbdev_la12xx.c
new file mode 100644
index 00..d3d7a4df37
--- /dev/null
+++ b/drivers/baseband/la12xx/bbdev_la12xx.c
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2020-2021 NXP
+ */
+
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+#include 
+
+#define DRIVER_NAME baseband_la12xx
+
+/* private data structure */
+struct bbdev_la12xx_private {
+   unsigned int max_nb_queues;  /**< Max number of queues */
+};
+/* Create device */
+static int
+la12xx_bbdev_create(struct rte_vdev_device *vdev)
+{
+   struct rte_bbdev *bbdev;
+   const char *name = rte_vdev_device_name(vdev);
+
+   PMD_INIT_FUNC_TRACE();
+
+   bbdev = rte_bbdev_allocate(name);
+   if (bbdev == NULL)
+   return -ENODEV;
+
+   bbdev->data->dev_private = rte_zmalloc(name,
+   sizeof(struct bbdev_la12xx_private),
+   RTE_CACHE_LINE_SIZE);
+   if (bbdev->data->dev_private == NULL) {
+   rte_bbdev_release(bbdev);
+   return -ENOMEM;
+   }
+
+   bbdev->dev_ops = NULL;
+   bbdev->device = &vdev->device;
+   bbdev->data->socket_id = 0;
+   bbdev->intr_handle = NULL;
+
+   /* register rx/tx burst functions for data path */
+   bbdev->dequeue_enc_ops = NULL;
+   bbdev->dequeue_dec_ops = NULL;
+   bbdev->enqueue_enc_ops = NULL;
+   bbdev->enqueue_dec_ops = NULL;
+
+   return 0;
+}
+
+/* Initialise device */
+static int
+la12xx_bbdev_probe(struct rte_vdev_device *vdev)
+{
+   const char *name;
+
+   PMD_INIT_FUNC_TRACE();
+
+   if (vdev == NULL)
+   return -EINVAL;
+
+   name = rte_vdev_device_name(vdev);
+   if (name == NULL)
+   return -EINVAL;
+
+   return la12xx_bbdev_create(vdev);
+}
+
+/* Uninitialise device */
+static int
+la12xx_bbdev_remove(struct rte_vdev_device *vdev)
+{
+   struct rte_bbdev *bbdev;
+   const char *name;
+
+   PMD_INIT_FUNC_TRACE();
+
+   if (vdev == NULL)
+   return -EINVAL;
+
+   name = rte_vdev_device_name(vdev);
+   if (name == NULL)
+   return -EINVAL;
+
+   bbdev = rte_bbdev_get_named_dev(name);
+   if (bbdev == NULL)
+   return -EINVAL;
+
+   rte_free(bbdev->data->dev_private);
+
+   return rte_bbdev_release(bbdev);
+}
+
+static struct rte_vdev_driver bbdev_la12xx_pmd_drv = {
+   .probe = la12xx_bbdev_probe,
+   .remove = la12xx_bbdev_remove
+};
+
+RTE_PMD_REGISTER_VDEV(DRIVER_NAME, bbdev_la12xx_pmd_drv);
+RTE_LOG_REGISTER_DEFAULT(bbdev_la12xx_logtype, NOTICE);
diff --git a/drivers/baseband/la12xx/bbdev_la12xx_pmd_logs.h 
b/drivers/baseband/la12xx/bbdev_la12xx_pmd_logs.h
new file mode 100644
index 00..452435ccb9
--- /dev/null
+++ b/drivers/baseband/la12xx/bbdev_la12xx_pmd_logs.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2020 NXP
+ */
+
+#ifndef _BBDEV_LA12XX_PMD_LOGS_H_
+#define _BBDEV_LA12XX_PMD_LOGS_H_
+
+extern int bbdev_la12xx_logtype;
+
+#define rte_bbdev_log(level, fmt, ...) \
+   rte_log(RTE_LOG_ ## level, bbdev_la12xx_logtype, fmt "\n", \
+   ##__VA_ARGS__)
+
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
+#define rte_bbdev_log_debug(fmt, ...) \
+   rte_bbdev_log(DEBUG, "la12xx_pmd: " fmt, \
+   ##__VA_ARGS__)
+#else
+#define rte_bbdev_log_debug(fmt, ...)
+#endif
+
+#define PMD_INIT_FUNC_TRACE() rte_bbdev_log_debug(">>")
+
+/* DP Logs, toggled out at compile time if level lower than current level */
+#define rte_bbdev_dp_log(level, fmt, args...) \
+   RTE_LOG_DP(level, PMD, fmt, ## args)
+
+#endif /* _BBDEV_LA12XX_PMD_LOGS_H_ */
diff --git a/drivers/baseband/l

[dpdk-dev] [PATCH v7 3/9] baseband/la12xx: add devargs for max queues

2021-09-28 Thread nipun . gupta
From: Hemant Agrawal 

This patch adds dev args to take  max queues as input

Signed-off-by: Nipun Gupta 
Signed-off-by: Hemant Agrawal 
---
 drivers/baseband/la12xx/bbdev_la12xx.c | 73 +-
 1 file changed, 71 insertions(+), 2 deletions(-)

diff --git a/drivers/baseband/la12xx/bbdev_la12xx.c 
b/drivers/baseband/la12xx/bbdev_la12xx.c
index d3d7a4df37..ee03e93870 100644
--- a/drivers/baseband/la12xx/bbdev_la12xx.c
+++ b/drivers/baseband/la12xx/bbdev_la12xx.c
@@ -17,13 +17,73 @@
 
 #define DRIVER_NAME baseband_la12xx
 
+/*  Initialisation params structure that can be used by LA12xx BBDEV driver */
+struct bbdev_la12xx_params {
+   uint8_t queues_num; /*< LA12xx BBDEV queues number */
+};
+
+#define LA12XX_MAX_NB_QUEUES_ARG   "max_nb_queues"
+
+static const char * const bbdev_la12xx_valid_params[] = {
+   LA12XX_MAX_NB_QUEUES_ARG,
+};
+
 /* private data structure */
 struct bbdev_la12xx_private {
unsigned int max_nb_queues;  /**< Max number of queues */
 };
+static inline int
+parse_u16_arg(const char *key, const char *value, void *extra_args)
+{
+   uint16_t *u16 = extra_args;
+
+   unsigned int long result;
+   if ((value == NULL) || (extra_args == NULL))
+   return -EINVAL;
+   errno = 0;
+   result = strtoul(value, NULL, 0);
+   if ((result >= (1 << 16)) || (errno != 0)) {
+   rte_bbdev_log(ERR, "Invalid value %" PRIu64 " for %s",
+ result, key);
+   return -ERANGE;
+   }
+   *u16 = (uint16_t)result;
+   return 0;
+}
+
+/* Parse parameters used to create device */
+static int
+parse_bbdev_la12xx_params(struct bbdev_la12xx_params *params,
+   const char *input_args)
+{
+   struct rte_kvargs *kvlist = NULL;
+   int ret = 0;
+
+   if (params == NULL)
+   return -EINVAL;
+   if (input_args) {
+   kvlist = rte_kvargs_parse(input_args,
+   bbdev_la12xx_valid_params);
+   if (kvlist == NULL)
+   return -EFAULT;
+
+   ret = rte_kvargs_process(kvlist, bbdev_la12xx_valid_params[0],
+   &parse_u16_arg, ¶ms->queues_num);
+   if (ret < 0)
+   goto exit;
+
+   }
+
+exit:
+   if (kvlist)
+   rte_kvargs_free(kvlist);
+   return ret;
+}
+
 /* Create device */
 static int
-la12xx_bbdev_create(struct rte_vdev_device *vdev)
+la12xx_bbdev_create(struct rte_vdev_device *vdev,
+   struct bbdev_la12xx_params *init_params __rte_unused)
 {
struct rte_bbdev *bbdev;
const char *name = rte_vdev_device_name(vdev);
@@ -60,7 +120,11 @@ la12xx_bbdev_create(struct rte_vdev_device *vdev)
 static int
 la12xx_bbdev_probe(struct rte_vdev_device *vdev)
 {
+   struct bbdev_la12xx_params init_params = {
+   8
+   };
const char *name;
+   const char *input_args;
 
PMD_INIT_FUNC_TRACE();
 
@@ -71,7 +135,10 @@ la12xx_bbdev_probe(struct rte_vdev_device *vdev)
if (name == NULL)
return -EINVAL;
 
-   return la12xx_bbdev_create(vdev);
+   input_args = rte_vdev_device_args(vdev);
+   parse_bbdev_la12xx_params(&init_params, input_args);
+
+   return la12xx_bbdev_create(vdev, &init_params);
 }
 
 /* Uninitialise device */
@@ -105,4 +172,6 @@ static struct rte_vdev_driver bbdev_la12xx_pmd_drv = {
 };
 
 RTE_PMD_REGISTER_VDEV(DRIVER_NAME, bbdev_la12xx_pmd_drv);
+RTE_PMD_REGISTER_PARAM_STRING(DRIVER_NAME,
+   LA12XX_MAX_NB_QUEUES_ARG"=");
 RTE_LOG_REGISTER_DEFAULT(bbdev_la12xx_logtype, NOTICE);
-- 
2.17.1



[dpdk-dev] [PATCH v7 4/9] baseband/la12xx: add support for multiple modems

2021-09-28 Thread nipun . gupta
From: Hemant Agrawal 

This patch add support for multiple modems by assigning
a modem id as dev args in vdev creation.

Signed-off-by: Hemant Agrawal 
---
 drivers/baseband/la12xx/bbdev_la12xx.c | 64 +++---
 drivers/baseband/la12xx/bbdev_la12xx.h | 56 +++
 drivers/baseband/la12xx/bbdev_la12xx_ipc.h | 20 +++
 3 files changed, 133 insertions(+), 7 deletions(-)
 create mode 100644 drivers/baseband/la12xx/bbdev_la12xx.h
 create mode 100644 drivers/baseband/la12xx/bbdev_la12xx_ipc.h

diff --git a/drivers/baseband/la12xx/bbdev_la12xx.c 
b/drivers/baseband/la12xx/bbdev_la12xx.c
index ee03e93870..f9f32d665a 100644
--- a/drivers/baseband/la12xx/bbdev_la12xx.c
+++ b/drivers/baseband/la12xx/bbdev_la12xx.c
@@ -14,24 +14,26 @@
 #include 
 
 #include 
+#include 
+#include 
 
 #define DRIVER_NAME baseband_la12xx
 
 /*  Initialisation params structure that can be used by LA12xx BBDEV driver */
 struct bbdev_la12xx_params {
uint8_t queues_num; /*< LA12xx BBDEV queues number */
+   int8_t modem_id; /*< LA12xx modem instance id */
 };
 
 #define LA12XX_MAX_NB_QUEUES_ARG   "max_nb_queues"
+#define LA12XX_VDEV_MODEM_ID_ARG   "modem"
+#define LA12XX_MAX_MODEM 4
 
 static const char * const bbdev_la12xx_valid_params[] = {
LA12XX_MAX_NB_QUEUES_ARG,
+   LA12XX_VDEV_MODEM_ID_ARG,
 };
 
-/* private data structure */
-struct bbdev_la12xx_private {
-   unsigned int max_nb_queues;  /**< Max number of queues */
-};
 static inline int
 parse_u16_arg(const char *key, const char *value, void *extra_args)
 {
@@ -51,6 +53,28 @@ parse_u16_arg(const char *key, const char *value, void 
*extra_args)
return 0;
 }
 
+/* Parse integer from integer argument */
+static int
+parse_integer_arg(const char *key __rte_unused,
+   const char *value, void *extra_args)
+{
+   int i;
+   char *end;
+
+   errno = 0;
+
+   i = strtol(value, &end, 10);
+   if (*end != 0 || errno != 0 || i < 0 || i > LA12XX_MAX_MODEM) {
+   rte_bbdev_log(ERR, "Supported Port IDS are 0 to %d",
+   LA12XX_MAX_MODEM - 1);
+   return -EINVAL;
+   }
+
+   *((uint32_t *)extra_args) = i;
+
+   return 0;
+}
+
 /* Parse parameters used to create device */
 static int
 parse_bbdev_la12xx_params(struct bbdev_la12xx_params *params,
@@ -72,6 +96,16 @@ parse_bbdev_la12xx_params(struct bbdev_la12xx_params *params,
if (ret < 0)
goto exit;
 
+   ret = rte_kvargs_process(kvlist,
+   bbdev_la12xx_valid_params[1],
+   &parse_integer_arg,
+   ¶ms->modem_id);
+
+   if (params->modem_id >= LA12XX_MAX_MODEM) {
+   rte_bbdev_log(ERR, "Invalid modem id, must be < %u",
+   LA12XX_MAX_MODEM);
+   goto exit;
+   }
}
 
 exit:
@@ -83,10 +117,11 @@ parse_bbdev_la12xx_params(struct bbdev_la12xx_params 
*params,
 /* Create device */
 static int
 la12xx_bbdev_create(struct rte_vdev_device *vdev,
-   struct bbdev_la12xx_params *init_params __rte_unused)
+   struct bbdev_la12xx_params *init_params)
 {
struct rte_bbdev *bbdev;
const char *name = rte_vdev_device_name(vdev);
+   struct bbdev_la12xx_private *priv;
 
PMD_INIT_FUNC_TRACE();
 
@@ -102,6 +137,20 @@ la12xx_bbdev_create(struct rte_vdev_device *vdev,
return -ENOMEM;
}
 
+   priv = bbdev->data->dev_private;
+   priv->modem_id = init_params->modem_id;
+   /* if modem id is not configured */
+   if (priv->modem_id == -1)
+   priv->modem_id = bbdev->data->dev_id;
+
+   /* Reset Global variables */
+   priv->num_ldpc_enc_queues = 0;
+   priv->num_ldpc_dec_queues = 0;
+   priv->num_valid_queues = 0;
+   priv->max_nb_queues = init_params->queues_num;
+
+   rte_bbdev_log(INFO, "Setting Up %s: DevId=%d, ModemId=%d",
+   name, bbdev->data->dev_id, priv->modem_id);
bbdev->dev_ops = NULL;
bbdev->device = &vdev->device;
bbdev->data->socket_id = 0;
@@ -121,7 +170,7 @@ static int
 la12xx_bbdev_probe(struct rte_vdev_device *vdev)
 {
struct bbdev_la12xx_params init_params = {
-   8
+   8, -1,
};
const char *name;
const char *input_args;
@@ -173,5 +222,6 @@ static struct rte_vdev_driver bbdev_la12xx_pmd_drv = {
 
 RTE_PMD_REGISTER_VDEV(DRIVER_NAME, bbdev_la12xx_pmd_drv);
 RTE_PMD_REGISTER_PARAM_STRING(DRIVER_NAME,
-   LA12XX_MAX_NB_QUEUES_ARG"=");
+   LA12XX_MAX_NB_QUEUES_ARG"="
+   LA12XX_VDEV_MODEM_ID_ARG "= ");
 RTE_LOG_REGISTER_DEFAULT(bbdev_la12xx_logtype, NOTICE);
diff --git a/drivers/baseband/la12xx/bbdev_la12xx.h 
b/drivers/baseband/la12xx/bbdev_la12xx.h
new file mode 100644
index 0

[dpdk-dev] [PATCH v7 7/9] app/bbdev: enable la12xx for bbdev

2021-09-28 Thread nipun . gupta
From: Hemant Agrawal 

this patch adds la12xx driver in test bbdev

Signed-off-by: Hemant Agrawal 
---
 app/test-bbdev/meson.build | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/app/test-bbdev/meson.build b/app/test-bbdev/meson.build
index edb9deef84..a726a5b3fa 100644
--- a/app/test-bbdev/meson.build
+++ b/app/test-bbdev/meson.build
@@ -23,3 +23,6 @@ endif
 if dpdk_conf.has('RTE_BASEBAND_ACC100')
 deps += ['baseband_acc100']
 endif
+if dpdk_conf.has('RTE_LIBRTE_PMD_BBDEV_LA12XX')
+   deps += ['baseband_la12xx']
+endif
-- 
2.17.1



[dpdk-dev] [PATCH v7 8/9] app/bbdev: handle endianness of test data

2021-09-28 Thread nipun . gupta
From: Nipun Gupta 

With data input, output and harq also supported in big
endian format, this patch updates the testbbdev application
to handle the endianness conversion as directed by the
the driver being used.

If the driver supports big endian data processing, conversion
from little endian to big is handled by the testbbdev application.

Signed-off-by: Nipun Gupta 
---
 app/test-bbdev/test_bbdev_perf.c | 62 
 1 file changed, 62 insertions(+)

diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c
index 469597b8b3..a0f565ee3f 100644
--- a/app/test-bbdev/test_bbdev_perf.c
+++ b/app/test-bbdev/test_bbdev_perf.c
@@ -227,6 +227,64 @@ clear_soft_out_cap(uint32_t *op_flags)
*op_flags &= ~RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT;
 }
 
+static inline void
+reverse_op(struct op_data_entries *op)
+{
+   uint8_t nb_segs = op->nb_segments;
+   uint32_t *data, len;
+   int complete, rem, i, j;
+   uint8_t *rem_data, temp;
+
+   /* Validate each mbuf segment length */
+   for (i = 0; i < nb_segs; ++i) {
+   len = op->segments[i].length;
+   data = op->segments[i].addr;
+
+   /* Swap complete u32 bytes */
+   complete = len / 4;
+   for (j = 0; j < complete; j++)
+   data[j] = rte_bswap32(data[j]);
+
+   /* Swap any remaining data for last seg */
+   if (i == (nb_segs - 1)) {
+   rem = len % 4;
+   rem_data = (uint8_t *)&data[j];
+   for (j = 0; j < rem/2; j++) {
+   temp = rem_data[j];
+   rem_data[j] = rem_data[rem - j - 1];
+   rem_data[rem - j - 1] = temp;
+   }
+   }
+   }
+}
+
+static inline void
+reverse_all_ops(void)
+{
+   unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs,
+   nb_harq_inputs, nb_harq_outputs;
+
+   nb_inputs = test_vector.entries[DATA_INPUT].nb_segments;
+   if (nb_inputs)
+   reverse_op(&test_vector.entries[DATA_INPUT]);
+
+   nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments;
+   if (nb_soft_outputs)
+   reverse_op(&test_vector.entries[DATA_SOFT_OUTPUT]);
+
+   nb_hard_outputs = test_vector.entries[DATA_HARD_OUTPUT].nb_segments;
+   if (nb_hard_outputs)
+   reverse_op(&test_vector.entries[DATA_HARD_OUTPUT]);
+
+   nb_harq_inputs  = test_vector.entries[DATA_HARQ_INPUT].nb_segments;
+   if (nb_harq_inputs)
+   reverse_op(&test_vector.entries[DATA_HARQ_INPUT]);
+
+   nb_harq_outputs = test_vector.entries[DATA_HARQ_OUTPUT].nb_segments;
+   if (nb_harq_outputs)
+   reverse_op(&test_vector.entries[DATA_HARQ_OUTPUT]);
+}
+
 static int
 check_dev_cap(const struct rte_bbdev_info *dev_info)
 {
@@ -234,6 +292,7 @@ check_dev_cap(const struct rte_bbdev_info *dev_info)
unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs,
nb_harq_inputs, nb_harq_outputs;
const struct rte_bbdev_op_cap *op_cap = dev_info->drv.capabilities;
+   uint8_t be_data = dev_info->drv.support_be_data;
 
nb_inputs = test_vector.entries[DATA_INPUT].nb_segments;
nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments;
@@ -245,6 +304,9 @@ check_dev_cap(const struct rte_bbdev_info *dev_info)
if (op_cap->type != test_vector.op_type)
continue;
 
+   if (be_data)
+   reverse_all_ops();
+
if (op_cap->type == RTE_BBDEV_OP_TURBO_DEC) {
const struct rte_bbdev_op_cap_turbo_dec *cap =
&op_cap->cap.turbo_dec;
-- 
2.17.1



[dpdk-dev] [PATCH v7 5/9] baseband/la12xx: add queue and modem config support

2021-09-28 Thread nipun . gupta
From: Hemant Agrawal 

This patch add support for connecting with modem
and creating the ipc channel as queues with modem
for the exchange of data.

Signed-off-by: Nipun Gupta 
Signed-off-by: Hemant Agrawal 
---
 MAINTAINERS|   1 +
 doc/guides/bbdevs/index.rst|   1 +
 doc/guides/bbdevs/la12xx.rst   |  81 +++
 doc/guides/rel_notes/release_21_11.rst |   5 +
 drivers/baseband/la12xx/bbdev_la12xx.c | 555 -
 drivers/baseband/la12xx/bbdev_la12xx.h |  17 +-
 drivers/baseband/la12xx/bbdev_la12xx_ipc.h | 189 ++-
 7 files changed, 836 insertions(+), 13 deletions(-)
 create mode 100644 doc/guides/bbdevs/la12xx.rst

diff --git a/MAINTAINERS b/MAINTAINERS
index e3f0e8759f..1bf39820d6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1296,6 +1296,7 @@ NXP LA12xx driver
 M: Nipun Gupta 
 M: Hemant Agrawal 
 F: drivers/baseband/la12xx/
+F: doc/guides/bbdevs/la12xx.rst
 
 
 Rawdev Drivers
diff --git a/doc/guides/bbdevs/index.rst b/doc/guides/bbdevs/index.rst
index 4445cbd1b0..cedd706fa6 100644
--- a/doc/guides/bbdevs/index.rst
+++ b/doc/guides/bbdevs/index.rst
@@ -14,3 +14,4 @@ Baseband Device Drivers
 fpga_lte_fec
 fpga_5gnr_fec
 acc100
+la12xx
diff --git a/doc/guides/bbdevs/la12xx.rst b/doc/guides/bbdevs/la12xx.rst
new file mode 100644
index 00..3c9ac5c047
--- /dev/null
+++ b/doc/guides/bbdevs/la12xx.rst
@@ -0,0 +1,81 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+Copyright 2021 NXP
+
+NXP LA12xx Poll Mode Driver
+===
+
+The BBDEV LA12xx poll mode driver (PMD) supports an implementation for
+offloading High Phy processing functions like LDPC Encode / Decode 5GNR 
wireless
+acceleration function, using PCI based LA12xx Software defined radio.
+
+More information can be found at `NXP Official Website
+`_.
+
+Features
+
+
+LA12xx PMD supports the following features:
+
+- Maximum of 8 UL queues
+- Maximum of 8 DL queues
+- PCIe Gen-3 x8 Interface
+- MSI-X
+
+Installation
+
+
+Section 3 of the DPDK manual provides instructions on installing and compiling 
DPDK.
+
+DPDK requires hugepages to be configured as detailed in section 2 of the DPDK 
manual.
+
+Initialization
+--
+
+The device can be listed on the host console with:
+
+
+Use the following lspci command to get the multiple LA12xx processor ids. The
+device ID of the LA12xx baseband processor is "1c30".
+
+.. code-block:: console
+
+  sudo lspci -nn
+
+...
+0001:01:00.0 Power PC [0b20]: Freescale Semiconductor Inc Device [1957:1c30] (
+rev 10)
+...
+0002:01:00.0 Power PC [0b20]: Freescale Semiconductor Inc Device [1957:1c30] (
+rev 10)
+
+
+Prerequisites
+-
+
+Currently supported by DPDK:
+
+- NXP LA1224 BSP **1.0+**.
+- NXP LA1224 PCIe Modem card connected to ARM host.
+
+- Follow the DPDK :ref:`Getting Started Guide for Linux ` to setup 
the basic DPDK environment.
+
+* Use dev arg option ``modem=0`` to identify the modem instance for a given
+  device. This is required only if more than 1 modem cards are attached to 
host.
+  this is optional and the default value is 0.
+  e.g. ``--vdev=baseband_la12xx,modem=0``
+
+* Use dev arg option ``max_nb_queues=x`` to specify the maximum number of 
queues
+  to be used for communication with offload device i.e. modem. default is 16.
+  e.g. ``--vdev=baseband_la12xx,max_nb_queues=4``
+
+Enabling logs
+-
+
+For enabling logs, use the following EAL parameter:
+
+.. code-block:: console
+
+   ./your_bbdev_application  --log-level=la12xx:
+
+Using ``bb.la12xx`` as log matching criteria, all Baseband PMD logs can be
+enabled which are lower than logging ``level``.
diff --git a/doc/guides/rel_notes/release_21_11.rst 
b/doc/guides/rel_notes/release_21_11.rst
index ad7c1afec0..60b92c9a9f 100644
--- a/doc/guides/rel_notes/release_21_11.rst
+++ b/doc/guides/rel_notes/release_21_11.rst
@@ -91,6 +91,11 @@ New Features
   Added command-line options to specify total number of processes and
   current process ID. Each process owns subset of Rx and Tx queues.
 
+* **Added NXP LA12xx baseband PMD.**
+
+  * Added a new baseband PMD driver for NXP LA12xx Software defined radio.
+  * See the :doc:`../bbdevs/la12xx` for more details.
+
 
 Removed Items
 -
diff --git a/drivers/baseband/la12xx/bbdev_la12xx.c 
b/drivers/baseband/la12xx/bbdev_la12xx.c
index f9f32d665a..46ee5b4d70 100644
--- a/drivers/baseband/la12xx/bbdev_la12xx.c
+++ b/drivers/baseband/la12xx/bbdev_la12xx.c
@@ -3,6 +3,11 @@
  */
 
 #include 
+#include 
+#include 
+#include 
+#include 
+#include 
 
 #include 
 #include 
@@ -29,11 +34,552 @@ struct bbdev_la12xx_params {
 #define LA12XX_VDEV_MODEM_ID_ARG   "modem"
 #define LA12XX_MAX_MODEM 4
 
+#define LA12XX_MAX_CORES   4
+#define LA12XX_LDPC_ENC_C

[dpdk-dev] [PATCH v7 6/9] baseband/la12xx: add enqueue and dequeue support

2021-09-28 Thread nipun . gupta
From: Hemant Agrawal 

Add support for enqueue and dequeue the LDPC enc/dec
from the modem device.

Signed-off-by: Nipun Gupta 
Signed-off-by: Hemant Agrawal 
---
 doc/guides/bbdevs/features/la12xx.ini  |  13 +
 doc/guides/bbdevs/la12xx.rst   |  47 ++-
 drivers/baseband/la12xx/bbdev_la12xx.c | 328 -
 drivers/baseband/la12xx/bbdev_la12xx_ipc.h |  37 +++
 4 files changed, 420 insertions(+), 5 deletions(-)
 create mode 100644 doc/guides/bbdevs/features/la12xx.ini

diff --git a/doc/guides/bbdevs/features/la12xx.ini 
b/doc/guides/bbdevs/features/la12xx.ini
new file mode 100644
index 00..0aec5eecb6
--- /dev/null
+++ b/doc/guides/bbdevs/features/la12xx.ini
@@ -0,0 +1,13 @@
+;
+; Supported features of the 'la12xx' bbdev driver.
+;
+; Refer to default.ini for the full list of available PMD features.
+;
+[Features]
+Turbo Decoder (4G) = N
+Turbo Encoder (4G) = N
+LDPC Decoder (5G)  = Y
+LDPC Encoder (5G)  = Y
+LLR/HARQ Compression   = N
+HW Accelerated = Y
+BBDEV API  = Y
diff --git a/doc/guides/bbdevs/la12xx.rst b/doc/guides/bbdevs/la12xx.rst
index 3c9ac5c047..b111ec0dd6 100644
--- a/doc/guides/bbdevs/la12xx.rst
+++ b/doc/guides/bbdevs/la12xx.rst
@@ -16,10 +16,11 @@ Features
 
 LA12xx PMD supports the following features:
 
+- LDPC Encode in the DL
+- LDPC Decode in the UL
 - Maximum of 8 UL queues
 - Maximum of 8 DL queues
 - PCIe Gen-3 x8 Interface
-- MSI-X
 
 Installation
 
@@ -79,3 +80,47 @@ For enabling logs, use the following EAL parameter:
 
 Using ``bb.la12xx`` as log matching criteria, all Baseband PMD logs can be
 enabled which are lower than logging ``level``.
+
+Test Application
+
+
+BBDEV provides a test application, ``test-bbdev.py`` and range of test data 
for testing
+the functionality of LA12xx for FEC encode and decode, depending on the device
+capabilities. The test application is located under app->test-bbdev folder and 
has the
+following options:
+
+.. code-block:: console
+
+  "-p", "--testapp-path": specifies path to the bbdev test app.
+  "-e", "--eal-params" : EAL arguments which are passed to the test app.
+  "-t", "--timeout": Timeout in seconds (default=300).
+  "-c", "--test-cases" : Defines test cases to run. Run all if not specified.
+  "-v", "--test-vector": Test vector path 
(default=dpdk_path+/app/test-bbdev/test_vectors/bbdev_null.data).
+  "-n", "--num-ops": Number of operations to process on device 
(default=32).
+  "-b", "--burst-size" : Operations enqueue/dequeue burst size (default=32).
+  "-s", "--snr": SNR in dB used when generating LLRs for bler 
tests.
+  "-s", "--iter_max"   : Number of iterations for LDPC decoder.
+  "-l", "--num-lcores" : Number of lcores to run (default=16).
+  "-i", "--init-device" : Initialise PF device with default values.
+
+
+To execute the test application tool using simple decode or encode data,
+type one of the following:
+
+.. code-block:: console
+
+  ./test-bbdev.py -e="--vdev=baseband_la12xx,socket_id=0,max_nb_queues=8" -c 
validation -n 64 -b 1 -v ./ldpc_dec_default.data
+  ./test-bbdev.py -e="--vdev=baseband_la12xx,socket_id=0,max_nb_queues=8" -c 
validation -n 64 -b 1 -v ./ldpc_enc_default.data
+
+The test application ``test-bbdev.py``, supports the ability to configure the 
PF device with
+a default set of values, if the "-i" or "- -init-device" option is included. 
The default values
+are defined in test_bbdev_perf.c.
+
+
+Test Vectors
+
+
+In addition to the simple LDPC decoder and LDPC encoder tests, bbdev also 
provides
+a range of additional tests under the test_vectors folder, which may be 
useful. The results
+of these tests will depend on the LA12xx FEC capabilities which may cause some
+testcases to be skipped, but no failure should be reported.
diff --git a/drivers/baseband/la12xx/bbdev_la12xx.c 
b/drivers/baseband/la12xx/bbdev_la12xx.c
index 46ee5b4d70..69ca83cee6 100644
--- a/drivers/baseband/la12xx/bbdev_la12xx.c
+++ b/drivers/baseband/la12xx/bbdev_la12xx.c
@@ -120,6 +120,10 @@ la12xx_queue_release(struct rte_bbdev *dev, uint16_t q_id)
((uint64_t) ((unsigned long) (A) \
- ((uint64_t)ipc_priv->hugepg_start.host_vaddr)))
 
+#define MODEM_P2V(A) \
+   ((uint64_t) ((unsigned long) (A) \
+   + (unsigned long)(ipc_priv->peb_start.host_vaddr)))
+
 static int ipc_queue_configure(uint32_t channel_id,
ipc_t instance, struct bbdev_la12xx_q_priv *q_priv)
 {
@@ -334,6 +338,318 @@ static const struct rte_bbdev_ops pmd_ops = {
.queue_release = la12xx_queue_release,
.start = la12xx_start
 };
+
+static inline int
+is_bd_ring_full(uint32_t ci, uint32_t ci_flag,
+   uint32_t pi, uint32_t pi_flag)
+{
+   if (pi == ci) {
+   if (pi_flag != ci_flag)
+   return 1; /* Ring is Full */
+   }
+   return 0;
+}
+
+static inline int
+prepare_ldpc_enc_op(struct rte_b

[dpdk-dev] [PATCH v7 9/9] app/bbdev: add test vectors for transport blocks

2021-09-28 Thread nipun . gupta
From: Nipun Gupta 

This patch adds two test vectors for transport block in network byte
order:
- LDPC encode for Transport Block
- LDPC decode for Transport block

Signed-off-by: Nipun Gupta 
---
 app/test-bbdev/test_vectors/ldpc_dec_tb.data | 265 +++
 app/test-bbdev/test_vectors/ldpc_enc_tb.data |  95 +++
 2 files changed, 360 insertions(+)
 create mode 100644 app/test-bbdev/test_vectors/ldpc_dec_tb.data
 create mode 100644 app/test-bbdev/test_vectors/ldpc_enc_tb.data

diff --git a/app/test-bbdev/test_vectors/ldpc_dec_tb.data 
b/app/test-bbdev/test_vectors/ldpc_dec_tb.data
new file mode 100644
index 00..5882e8aafe
--- /dev/null
+++ b/app/test-bbdev/test_vectors/ldpc_dec_tb.data
@@ -0,0 +1,265 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2021 NXP
+
+op_type =
+RTE_BBDEV_OP_LDPC_DEC
+
+input0 =
+0x7f818181, 0x7f7f817f, 0x7f817f81, 0x817f8181, 0x817f7f81, 0x817f7f81, 
0x7f817f7f, 0x7f7f7f81,
+0x7f7f7f7f, 0x817f7f7f, 0x7f7f8181, 0x817f7f7f, 0x7f7f817f, 0x8181817f, 
0x817f7f81, 0x7f7f8181,
+0x81817f81, 0x7f7f7f81, 0x81817f7f, 0x7f81817f, 0x817f7f81, 0x7f817f81, 
0x8181817f, 0x7f7f7f81,
+0x7f7f817f, 0x81817f7f, 0x7f81817f, 0x7f7f817f, 0x817f817f, 0x7f7f817f, 
0x7f7f7f81, 0x7f7f7f81,
+0x7f817f7f, 0x7f818181, 0x7f818181, 0x8181817f, 0x7f7f8181, 0x7f7f7f7f, 
0x7f817f7f, 0x81818181,
+0x7f7f817f, 0x7f7f7f7f, 0x817f8181, 0x7f7f7f81, 0x817f817f, 0x817f8181, 
0x81817f7f, 0x7f7f7f7f,
+0x81817f7f, 0x7f81817f, 0x817f7f7f, 0x817f7f81, 0x7f817f7f, 0x817f817f, 
0x81817f81, 0x817f7f7f,
+0x817f7f81, 0x817f817f, 0x8181817f, 0x81818181, 0x81818181, 0x817f7f7f, 
0x7f817f81, 0x817f7f7f,
+0x7f817f7f, 0x7f817f7f, 0x7f818181, 0x7f818181, 0x817f817f, 0x81817f81, 
0x7f81817f, 0x817f817f,
+0x7f81817f, 0x817f7f81, 0x7f7f817f, 0x817f8181, 0x817f7f81, 0x81817f7f, 
0x8181817f, 0x7f7f7f7f,
+0x817f7f81, 0x7f81817f, 0x7f7f7f7f, 0x7f817f81, 0x7f817f81, 0x817f7f7f, 
0x81818181, 0x7f7f8181,
+0x7f818181, 0x81817f7f, 0x7f817f81, 0x7f81817f, 0x7f7f8181, 0x7f7f817f, 
0x7f7f817f, 0x81817f81,
+0x7f818181, 0x8181817f, 0x7f817f81, 0x7f7f8181, 0x7f7f8181, 0x817f7f81, 
0x7f7f7f7f, 0x7f817f7f,
+0x7f7f8181, 0x7f817f7f, 0x7f818181, 0x81817f7f, 0x817f7f7f, 0x81817f81, 
0x7f817f7f, 0x7f81817f,
+0x7f81817f, 0x817f7f81, 0x7f817f7f, 0x817f817f, 0x7f7f817f, 0x817f7f81, 
0x817f817f, 0x817f8181,
+0x817f817f, 0x7f817f7f, 0x7f817f7f, 0x8181817f, 0x7f818181, 0x7f817f7f, 
0x7f818181, 0x7f7f817f,
+0x817f8181, 0x8181817f, 0x7f817f7f, 0x7f7f817f, 0x7f7f817f, 0x7f7f8181, 
0x817f7f7f, 0x817f8181,
+0x7f7f817f, 0x7f7f7f81, 0x817f7f81, 0x7f7f7f81, 0x7f7f7f7f, 0x817f8181, 
0x81818181, 0x81817f81,
+0x817f7f81, 0x7f7f817f, 0x7f817f7f, 0x7f7f8181, 0x7f7f7f81, 0x7f817f81, 
0x817f8181, 0x81817f7f,
+0x7f7f817f, 0x7f817f81, 0x7f817f81, 0x7f7f7f81, 0x81818181, 0x81817f7f, 
0x7f7f817f, 0x7f817f81,
+0x7f7f8181, 0x7f81817f, 0x817f8181, 0x7f7f8181, 0x7f7f7f81, 0x8181817f, 
0x7f817f81, 0x81817f7f,
+0x817f7f81, 0x817f8181, 0x817f7f7f, 0x7f7f817f, 0x817f7f7f, 0x81817f81, 
0x7f7f7f7f, 0x817f7f7f,
+0x817f7f81, 0x7f817f81, 0x8181817f, 0x81817f7f, 0x817f7f81, 0x7f818181, 
0x7f7f817f, 0x7f818181,
+0x7f7f7f7f, 0x7f7f8181, 0x7f7f817f, 0x7f817f81, 0x817f7f7f, 0x817f817f, 
0x7f7f7f81, 0x7f7f7f81,
+0x7f7f817f, 0x817f8181, 0x81817f81, 0x817f7f7f, 0x7f7f7f81, 0x817f7f7f, 
0x7f7f7f7f, 0x7f7f817f,
+0x81817f81, 0x7f7f7f81, 0x81817f7f, 0x81818181, 0x817f7f81, 0x817f817f, 
0x817f7f7f, 0x7f7f7f7f,
+0x7f81817f, 0x8181817f, 0x7f7f817f, 0x817f7f81, 0x7f81817f, 0x817f7f81, 
0x7f7f817f, 0x7f818181,
+0x817f7f7f, 0x817f7f81, 0x81817f81, 0x81817f81, 0x8181817f, 0x7f817f7f, 
0x7f7f7f81, 0x8181817f,
+0x7f817f81, 0x8181817f, 0x7f7f7f81, 0x817f8181, 0x817f7f81, 0x81817f81, 
0x7f7f817f, 0x7f7f817f,
+0x817f7f7f, 0x817f8181, 0x7f817f7f, 0x817f7f81, 0x7f7f7f81, 0x7f7f7f7f, 
0x817f8181, 0x7f817f81,
+0x81817f81, 0x7f7f7f81, 0x817f7f7f, 0x817f817f, 0x81817f7f, 0x817f7f81, 
0x7f81817f, 0x817f817f,
+0x81817f81, 0x8181817f, 0x7f818181, 0x7f81817f, 0x8181817f, 0x817f7f7f, 
0x7f817f7f, 0x8181817f,
+0x7f7f7f7f, 0x81817f7f, 0x7f7f7f81, 0x817f7f81, 0x7f7f7f81, 0x7f817f7f, 
0x7f7f7f7f, 0x817f7f81,
+0x7f818181, 0x817f7f7f, 0x7f7f7f81, 0x817f7f7f, 0x81818181, 0x81817f7f, 
0x7f817f81, 0x7f7f7f81,
+0x7f818181, 0x817f8181, 0x81817f81, 0x8181817f, 0x7f7f8181, 0x817f7f81, 
0x7f81817f, 0x7f7f817f,
+0x7f7f8181, 0x7f817f7f, 0x8181817f, 0x7f817f81, 0x7f817f7f, 0x7f7f8181, 
0x7f818181, 0x7f7f8181,
+0x817f7f81, 0x81817f81, 0x7f81817f, 0x81817f81, 0x817f7f7f, 0x7f818181, 
0x8181817f, 0x817f8181,
+0x7f7f7f81, 0x7f81817f, 0x81817f7f, 0x7f817f81, 0x7f7f817f, 0x7f7f8181, 
0x7f81817f, 0x7f81817f,
+0x7f818181, 0x817f7f7f, 0x817f8181, 0x7f7f8181, 0x8181817f, 0x7f817f81, 
0x817f8181, 0x817f817f,
+0x7f7f817f, 0x81817f81, 0x7f817f7f, 0x7f81817f, 0x817f817f, 0x81817f81, 
0x7f7f7f7f, 0x8181817f,
+0x7f817f81, 0x7f817f7f, 0x7f817f81, 0x7f817f7f, 0x7f7f7f81, 0x817f817f, 
0x7f81817f, 0x817f7f81,
+0x81818181, 0x7f817f81, 0x7f7f7f81, 0x7f81817f, 0x817f7f7f, 0x817f7f81, 
0x817f7f7f, 0x81817f81,
+0x7f7f817f, 0x817f8181, 0x81818181, 

Re: [dpdk-dev] [PATCH v3 0/3] Add user specified IV with lookaside IPsec

2021-09-28 Thread Akhil Goyal
> Add support for using user provided IV with lookaside protocol (IPsec). Using
> this option, application can provide IV to be used per operation. This
> option can be used for knownn vector tests (which is otherwise impossible
> due to random nature of IV) as well as if application wishes to use its
> own random generator source.
> 
> Depends on
> http://patches.dpdk.org/project/dpdk/list/?series=18642
Applied to dpdk-next-crypto

Thanks.


[dpdk-dev] [PATCH v2] net/virtio: fix virtio-user init when using existing tap

2021-09-28 Thread David Marchand
When attaching to an existing mono queue tap, the virtio-user was not
reporting that the virtio device was not properly initialised which
prevented from starting the port later.

$ ip tuntap add test mode tap
$ dpdk-testpmd --vdev \
  net_virtio_user0,iface=test,path=/dev/vhost-net,queues=2 -- -i

...
virtio_user_dev_init_mac(): (/dev/vhost-net) No valid MAC in devargs or
device, use random
vhost_kernel_open_tap(): TUNSETIFF failed: Invalid argument
vhost_kernel_enable_queue_pair(): fail to open tap for vhost kernel
virtio_user_start_device(): (/dev/vhost-net) Failed to start device
...
Configuring Port 0 (socket 0)
vhost_kernel_open_tap(): TUNSETIFF failed: Invalid argument
vhost_kernel_enable_queue_pair(): fail to open tap for vhost kernel
virtio_set_multiple_queues(): Multiqueue configured but send command
failed, this is too late now...
Fail to start port 0: Invalid argument
Please stop the ports first
Done

The virtio-user with vhost-kernel backend was going through a lot
of complications to initialise tap fds only when using them.

For each qp enabled for the first time, a tapfd was created via
TUNSETIFF with unneeded additional steps (see below) and then mapped to
the right qp in the vhost-net backend.
Unneeded steps (as long as it has been done once for the port):
- tap features were queried while this is a constant on a running
  system,
- the device name in DPDK was updated,
- the mac address of the tap was set,

On subsequent qps state change, the vhost-net backend fd mapping was
updated and the associated queue/tapfd were disabled/enabled via
TUNSETQUEUE.

Now, this patch simplifies the whole logic by keeping all tapfds opened
and in enabled state (from the tap point of view) at all time.

Unused ioctl defines are removed.

Tap features are validated earlier to fail initialisation asap.
Tap name discovery and mac address configuration are moved when
configuring qp 0.

To support attaching to mono queue tap, the virtio-user driver now tries
to attach in multi queue first, then fallbacks to mono queue.

Finally (but this is more for consistency), VIRTIO_NET_F_MQ feature is
exposed only if the underlying tap supports multi queue.

Signed-off-by: David Marchand 
---
Changes since v1:
- refactored tap_open() following Olivier comment and updated log
  messages level accordingly,
- added more error logs,

---

 drivers/net/virtio/virtio_user/vhost_kernel.c |  92 +
 .../net/virtio/virtio_user/vhost_kernel_tap.c | 180 +-
 .../net/virtio/virtio_user/vhost_kernel_tap.h |  16 +-
 3 files changed, 153 insertions(+), 135 deletions(-)

diff --git a/drivers/net/virtio/virtio_user/vhost_kernel.c 
b/drivers/net/virtio/virtio_user/vhost_kernel.c
index d65f89e1fc..202a8cdee1 100644
--- a/drivers/net/virtio/virtio_user/vhost_kernel.c
+++ b/drivers/net/virtio/virtio_user/vhost_kernel.c
@@ -120,9 +120,9 @@ vhost_kernel_set_owner(struct virtio_user_dev *dev)
 static int
 vhost_kernel_get_features(struct virtio_user_dev *dev, uint64_t *features)
 {
-   int ret;
-   unsigned int tap_features;
struct vhost_kernel_data *data = dev->backend_data;
+   unsigned int tap_flags;
+   int ret;
 
ret = vhost_kernel_ioctl(data->vhostfds[0], VHOST_GET_FEATURES, 
features);
if (ret < 0) {
@@ -130,7 +130,7 @@ vhost_kernel_get_features(struct virtio_user_dev *dev, 
uint64_t *features)
return -1;
}
 
-   ret = tap_support_features(&tap_features);
+   ret = tap_get_flags(data->tapfds[0], &tap_flags);
if (ret < 0) {
PMD_DRV_LOG(ERR, "Failed to get TAP features");
return -1;
@@ -140,7 +140,7 @@ vhost_kernel_get_features(struct virtio_user_dev *dev, 
uint64_t *features)
 * but not claimed by vhost-net, so we add them back when
 * reporting to upper layer.
 */
-   if (tap_features & IFF_VNET_HDR) {
+   if (tap_flags & IFF_VNET_HDR) {
*features |= VHOST_KERNEL_GUEST_OFFLOADS_MASK;
*features |= VHOST_KERNEL_HOST_OFFLOADS_MASK;
}
@@ -148,7 +148,7 @@ vhost_kernel_get_features(struct virtio_user_dev *dev, 
uint64_t *features)
/* vhost_kernel will not declare this feature, but it does
 * support multi-queue.
 */
-   if (tap_features & IFF_MULTI_QUEUE)
+   if (tap_flags & IFF_MULTI_QUEUE)
*features |= (1ull << VIRTIO_NET_F_MQ);
 
return 0;
@@ -380,9 +380,20 @@ vhost_kernel_set_status(struct virtio_user_dev *dev 
__rte_unused, uint8_t status
 static int
 vhost_kernel_setup(struct virtio_user_dev *dev)
 {
-   int vhostfd;
-   uint32_t q, i;
struct vhost_kernel_data *data;
+   unsigned int tap_features;
+   unsigned int tap_flags;
+   const char *ifname;
+   uint32_t q, i;
+   int vhostfd;
+
+   if (tap_support_features(&tap_features) < 0)
+   return -1;
+
+   if ((tap_features & IFF_VNET_HDR) == 0) {
+   PMD_INIT_LOG(ERR, "

Re: [dpdk-dev] [PATCH] net/i40e: fix Rx packet statistics

2021-09-28 Thread Kevin Traynor

On 28/09/2021 03:12, Zhang, AlvinX wrote:

-Original Message-
From: Kevin Traynor 
Sent: Tuesday, September 28, 2021 12:00 AM
To: Zhang, AlvinX ; Xing, Beilei
; Guo, Junfeng 
Cc: dev@dpdk.org; sta...@dpdk.org; Zhang, Qi Z ;
Yigit, Ferruh 
Subject: Re: [dpdk-dev] [PATCH] net/i40e: fix Rx packet statistics

On 26/09/2021 08:57, Alvin Zhang wrote:

Some packets are discarded by the NIC because they are larger than the
MTU, these packets should be counted as "RX error" instead of "RX
packet".

The register 'GL_RXERR1' can count above discarded packets.
This patch adds reading and calculation of the 'GL_RXERR1' counter
when reporting DPDK statistics.

Fixes: f4a91c38b4ad ("i40e: add extended stats")
Cc: sta...@dpdk.org

Signed-off-by: Alvin Zhang 
---
   drivers/net/i40e/i40e_ethdev.c | 16 +---
   drivers/net/i40e/i40e_ethdev.h | 10 ++
   2 files changed, 23 insertions(+), 3 deletions(-)



It's a bit hard to understand the code for someone not familiar with the i40e
stats. I think it needs careful review from i40e maintainers. A few questions
below,

Did you test this with testpmd? Can you show an example of a test where these
packets are now correctly accounted for?


The issue as below:

sendp(Ether()/IP()/Raw('x' * 1500), iface="enp24s0f1")
-- Forward statistics for port 0 --
RX-packets: 1 RX-dropped: 0 RX-total: 1
TX-packets: 0 TX-dropped: 0 TX-total: 0


Although we didn't really got the packet, but the statistic indicates a packet 
has been received successfully.
We can add above example to commit log in V2.



Hi Alvin. Thanks for answering my questions and showing how it was 
tested. I don't have any more comments. I won't ack just because I am 
not familiar enough with the i40e stats in general to give a meaningful ack.


Kevin.



I see there is also an RXERR2 register that catches other errors, does it need 
to
be considered as well?


We are not sure whether the packets counted by RXERR2 also will be counted by 
"Rx-packets".
So in this patch we only consider RXERR1 and fix the issue mentioned above.




diff --git a/drivers/net/i40e/i40e_ethdev.c
b/drivers/net/i40e/i40e_ethdev.c index 7a2a828..30a2cdf 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -532,7 +532,7 @@ static int i40e_sw_tunnel_filter_insert(struct i40e_pf

*pf,

   /* store statistics names and its offset in stats structure */
   struct rte_i40e_xstats_name_off {
char name[RTE_ETH_XSTATS_NAME_SIZE];
-   unsigned offset;
+   int offset;


It is unusual to see you changing an offset to an int. You are expecting 
negative
offsets?


   };

   static const struct rte_i40e_xstats_name_off
rte_i40e_stats_strings[] = { @@ -542,6 +542,8 @@ struct

rte_i40e_xstats_name_off {

{"rx_dropped_packets", offsetof(struct i40e_eth_stats, rx_discards)},
{"rx_unknown_protocol_packets", offsetof(struct i40e_eth_stats,
rx_unknown_protocol)},
+   {"rx_err1", offsetof(struct i40e_pf, rx_err1) -
+   offsetof(struct i40e_pf, stats)},


Here offsetof(struct i40e_pf, rx_err1) - offsetof(struct i40e_pf, stats) may be 
a negative value.



rx_err1 is correct by datasheet but meaningless to a user. Suggest to find a 
more
descriptive name, or document what it is, or tell the user to reference the
datasheet.


I will update it in v2.




{"tx_unicast_packets", offsetof(struct i40e_eth_stats, tx_unicast)},
{"tx_multicast_packets", offsetof(struct i40e_eth_stats,

tx_multicast)},

{"tx_broadcast_packets", offsetof(struct i40e_eth_stats,
tx_broadcast)}, @@ -3238,6 +3240,10 @@ void

i40e_flex_payload_reg_set_default(struct i40e_hw *hw)

pf->offset_loaded,
&os->eth.rx_unknown_protocol,
&ns->eth.rx_unknown_protocol);
+   i40e_stat_update_48(hw, I40E_GL_RXERR1_H(hw->pf_id +

I40E_MAX_VF),

+   I40E_GL_RXERR1_L(hw->pf_id + I40E_MAX_VF),
+   pf->offset_loaded, &pf->rx_err1_offset,
+   &pf->rx_err1);
i40e_stat_update_48_in_64(hw, I40E_GLPRT_GOTCH(hw->port),
  I40E_GLPRT_GOTCL(hw->port),
  pf->offset_loaded, &os->eth.tx_bytes, @@ 
-3437,7

+3443,8 @@

void i40e_flex_payload_reg_set_default(struct i40e_hw *hw)
stats->ipackets = pf->main_vsi->eth_stats.rx_unicast +
pf->main_vsi->eth_stats.rx_multicast +
pf->main_vsi->eth_stats.rx_broadcast -
-   pf->main_vsi->eth_stats.rx_discards;
+   pf->main_vsi->eth_stats.rx_discards -
+   pf->rx_err1;
stats->opackets = ns->eth.tx_unicast +
ns->eth.tx_multicast +
ns->eth.tx_b

[dpdk-dev] [PATCH] net/i40e: fix remove MAC/VLAN addresses error

2021-09-28 Thread Robin Zhang
Firmware will return I40E_AQ_RC_ENOENT when try to delete non-existent
MAC/VLAN addresses from the HW filtering, this should not be considered as
an Admin Queue error. But in i40e_asq_send_command, it will return
I40E_ERR_ADMIN_QUEUE_ERROR if the return value of Admin Queue command
processed by Firmware is not I40E_AQ_RC_OK or I40E_AQ_RC_EBUSY.

Use i40e_aq_remove_macvlan_v2 instead so that we can get the corresponding
Admin Queue status, and not report as an error in DPDK when Firmware
return I40E_AQ_RC_ENOENT.

Fixes: 4861cde46116 ("i40e: new poll mode driver")

Signed-off-by: Robin Zhang 
---
 drivers/net/i40e/i40e_ethdev.c | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index acbe7380b1..fdc9943034 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -7036,6 +7036,7 @@ i40e_remove_macvlan_filters(struct i40e_vsi *vsi,
int ret = I40E_SUCCESS;
struct i40e_hw *hw = I40E_VSI_TO_HW(vsi);
struct i40e_aqc_remove_macvlan_element_data *req_list;
+   enum i40e_admin_queue_err aq_status;
 
if (filter == NULL  || total == 0)
return I40E_ERR_PARAM;
@@ -7083,11 +7084,17 @@ i40e_remove_macvlan_filters(struct i40e_vsi *vsi,
req_list[i].flags = rte_cpu_to_le_16(flags);
}
 
-   ret = i40e_aq_remove_macvlan(hw, vsi->seid, req_list,
-   actual_num, NULL);
+   ret = i40e_aq_remove_macvlan_v2(hw, vsi->seid, req_list,
+   actual_num, NULL, &aq_status);
+
if (ret != I40E_SUCCESS) {
-   PMD_DRV_LOG(ERR, "Failed to remove macvlan filter");
-   goto DONE;
+   /* Do not report as an error when firmware returns 
ENOENT */
+   if (aq_status == I40E_AQ_RC_ENOENT) {
+   ret = I40E_SUCCESS;
+   } else {
+   PMD_DRV_LOG(ERR, "Failed to remove macvlan 
filter");
+   goto DONE;
+   }
}
num += actual_num;
} while (num < total);
-- 
2.25.1



Re: [dpdk-dev] [PATCH v5 2/2] vhost: enable IOMMU for async vhost

2021-09-28 Thread Ding, Xuan
Hi Anatoly,

> -Original Message-
> From: Burakov, Anatoly 
> Sent: Monday, September 27, 2021 8:14 PM
> To: Ding, Xuan ; dev@dpdk.org;
> maxime.coque...@redhat.com; Xia, Chenbo 
> Cc: Hu, Jiayu ; Jiang, Cheng1 ;
> Richardson, Bruce ; Pai G, Sunil
> ; Wang, Yinan ; Yang, YvonneX
> 
> Subject: Re: [PATCH v5 2/2] vhost: enable IOMMU for async vhost
> 
> On 27-Sep-21 8:48 AM, Xuan Ding wrote:
> > The use of IOMMU has many advantages, such as isolation and address
> > translation. This patch extends the capbility of DMA engine to use
> > IOMMU if the DMA engine is bound to vfio.
> >
> > When set memory table, the guest memory will be mapped
> > into the default container of DPDK.
> >
> > Signed-off-by: Xuan Ding 
> > ---
> 
> 
> > /* Flush IOTLB cache as previous HVAs are now invalid */
> > @@ -1329,6 +1424,17 @@ vhost_user_set_mem_table(struct virtio_net
> **pdev, struct VhostUserMsg *msg,
> > goto free_guest_pages;
> > }
> >
> > +   if (dev->async_copy) {
> > +   dev->async_map_status = rte_zmalloc_socket("async-dma-
> map-status",
> > +   sizeof(bool) * memory->nregions, 0,
> numa_node);
> 
> Would it be useful to sanity check this to make sure we're not leaking
> memory?

Thanks for the catch, will add the check in next version.

Regards,
Xuan

> 
> --
> Thanks,
> Anatoly


Re: [dpdk-dev] [PATCH v5 5/5] devargs: parse global device syntax

2021-09-28 Thread Thomas Monjalon
28/09/2021 10:29, David Marchand:
> On Tue, Apr 13, 2021 at 5:15 AM Xueming Li  wrote:
> >
> > When parsing a devargs, try to parse using the global device syntax
> > first. Fallback on legacy syntax on error.
> >
> > Example of new global device syntax:
> >  -a bus=pci,addr=82:00.0/class=eth/driver=mlx5,dv_flow_en=1
> >
> > Signed-off-by: Xueming Li 
> > Reviewed-by: Gaetan Rivet 
> 
> Starting with a virtio user port, I get a warning:
> 
> # dpdk-testpmd --vdev
> net_virtio_user0,iface=test,path=/dev/vhost-net,queues=1
> --log-level=pmd.net.virtio.*:debug -- -i
> ...
> EAL: Unrecognized layer dev/vhost-net,queues=1
> ...
> 
> Am I doing something wrong?
> If not, could you have a look?

The new global syntax is using the slash / as separator.
We should detect legit use of slash in a path.
Here, the value starts with a slash so it should be easy to ignore.
Another way is to consider slash only if followed by "class=" or "driver="




Re: [dpdk-dev] [PATCH v5 2/2] examples/vhost: use API to check inflight packets

2021-09-28 Thread Kevin Traynor

On 28/09/2021 07:24, Xuan Ding wrote:

In async data path, call rte_vhost_async_get_inflight_thread_unsafe()
API to directly return the number of inflight packets instead of
maintaining a local variable.

Signed-off-by: Xuan Ding 
---
  examples/vhost/main.c | 25 +++--
  examples/vhost/main.h |  1 -
  2 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index d0bf1f31e3..3faac6d053 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -842,11 +842,8 @@ complete_async_pkts(struct vhost_dev *vdev)
  
  	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,

VIRTIO_RXQ, p_cpl, MAX_PKT_BURST);
-   if (complete_count) {
+   if (complete_count)
free_pkts(p_cpl, complete_count);
-   __atomic_sub_fetch(&vdev->pkts_inflight, complete_count, 
__ATOMIC_SEQ_CST);
-   }
-
  }
  
  static __rte_always_inline void

@@ -886,7 +883,6 @@ drain_vhost(struct vhost_dev *vdev)
  
  		complete_async_pkts(vdev);

ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ, m, 
nr_xmit);
-   __atomic_add_fetch(&vdev->pkts_inflight, ret, __ATOMIC_SEQ_CST);
  
  		enqueue_fail = nr_xmit - ret;

if (enqueue_fail)
@@ -1212,7 +1208,6 @@ drain_eth_rx(struct vhost_dev *vdev)
complete_async_pkts(vdev);
enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
VIRTIO_RXQ, pkts, rx_count);
-   __atomic_add_fetch(&vdev->pkts_inflight, enqueue_count, 
__ATOMIC_SEQ_CST);
  
  		enqueue_fail = rx_count - enqueue_count;

if (enqueue_fail)
@@ -1338,6 +1333,7 @@ destroy_device(int vid)
struct vhost_dev *vdev = NULL;
int lcore;
uint16_t i;



+   int pkts_inflight;


You can move this down to the block it is used in

  
  	TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {

if (vdev->vid == vid)
@@ -1384,13 +1380,13 @@ destroy_device(int vid)
  
  	if (async_vhost_driver) {

uint16_t n_pkt = 0;
-   struct rte_mbuf *m_cpl[vdev->pkts_inflight];
+   pkts_inflight = rte_vhost_async_get_inflight_thread_unsafe(vid, 
VIRTIO_RXQ);
+   struct rte_mbuf *m_cpl[pkts_inflight];
  
-		while (vdev->pkts_inflight) {

+   while (pkts_inflight) {
n_pkt = rte_vhost_clear_queue_thread_unsafe(vid, 
VIRTIO_RXQ,
-   m_cpl, vdev->pkts_inflight);
+   m_cpl, pkts_inflight);
free_pkts(m_cpl, n_pkt);
-   __atomic_sub_fetch(&vdev->pkts_inflight, n_pkt, 
__ATOMIC_SEQ_CST);


This is an infinite loop if there are pkts_inflight, need to recheck 
pkts_inflight in the loop.



}
  
  		rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);

@@ -1486,6 +1482,7 @@ static int
  vring_state_changed(int vid, uint16_t queue_id, int enable)
  {
struct vhost_dev *vdev = NULL;
+   int pkts_inflight;
  
  	TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {

if (vdev->vid == vid)
@@ -1500,13 +1497,13 @@ vring_state_changed(int vid, uint16_t queue_id, int 
enable)
if (async_vhost_driver) {
if (!enable) {
uint16_t n_pkt = 0;
-   struct rte_mbuf *m_cpl[vdev->pkts_inflight];
+   pkts_inflight = 
rte_vhost_async_get_inflight_thread_unsafe(vid, queue_id);
+   struct rte_mbuf *m_cpl[pkts_inflight];
  
-			while (vdev->pkts_inflight) {

+   while (pkts_inflight) {
n_pkt = 
rte_vhost_clear_queue_thread_unsafe(vid, queue_id,
-   m_cpl, 
vdev->pkts_inflight);
+   m_cpl, pkts_inflight);
free_pkts(m_cpl, n_pkt);
-   __atomic_sub_fetch(&vdev->pkts_inflight, n_pkt, 
__ATOMIC_SEQ_CST);


Same comments as destroy_device


}
}
}
diff --git a/examples/vhost/main.h b/examples/vhost/main.h
index e7b1ac60a6..0ccdce4b4a 100644
--- a/examples/vhost/main.h
+++ b/examples/vhost/main.h
@@ -51,7 +51,6 @@ struct vhost_dev {
uint64_t features;
size_t hdr_len;
uint16_t nr_vrings;
-   uint16_t pkts_inflight;
struct rte_vhost_memory *mem;
struct device_statistics stats;
TAILQ_ENTRY(vhost_dev) global_vdev_entry;





Re: [dpdk-dev] [PATCH 6/6] raw/cnxk_bphy: use named value for queue count

2021-09-28 Thread Jerin Jacob
On Mon, Jul 26, 2021 at 7:29 PM Tomasz Duszynski  wrote:
>
> From: Jakub Palider 
>
> Queue counter is used in a few places so it was given some
> reasonable name.
>
> Signed-off-by: Jakub Palider 

Series Acked-by: Jerin Jacob 
Series applied to dpdk-next-net-mrvl/for-next-net. Thanks.


> ---
>  drivers/raw/cnxk_bphy/cnxk_bphy.c | 2 ++
>  drivers/raw/cnxk_bphy/cnxk_bphy_irq.h | 3 ++-
>  2 files changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/raw/cnxk_bphy/cnxk_bphy.c 
> b/drivers/raw/cnxk_bphy/cnxk_bphy.c
> index affc5c3e83..030beff05e 100644
> --- a/drivers/raw/cnxk_bphy/cnxk_bphy.c
> +++ b/drivers/raw/cnxk_bphy/cnxk_bphy.c
> @@ -51,6 +51,8 @@ bphy_rawdev_selftest(uint16_t dev_id)
> queues = rte_rawdev_queue_count(dev_id);
> if (queues == 0)
> return -ENODEV;
> +   if (queues != BPHY_QUEUE_CNT)
> +   return -EINVAL;
>
> ret = rte_rawdev_start(dev_id);
> if (ret)
> diff --git a/drivers/raw/cnxk_bphy/cnxk_bphy_irq.h 
> b/drivers/raw/cnxk_bphy/cnxk_bphy_irq.h
> index b55147b93e..f141677e2a 100644
> --- a/drivers/raw/cnxk_bphy/cnxk_bphy_irq.h
> +++ b/drivers/raw/cnxk_bphy/cnxk_bphy_irq.h
> @@ -23,10 +23,11 @@ struct bphy_irq_queue {
>  };
>
>  struct bphy_device {
> +#define BPHY_QUEUE_CNT 1
> struct roc_bphy_irq_chip *irq_chip;
> struct bphy_mem mem;
> /* bphy irq interface supports single queue only */
> -   struct bphy_irq_queue queues[1];
> +   struct bphy_irq_queue queues[BPHY_QUEUE_CNT];
>  };
>
>  int cnxk_bphy_intr_init(uint16_t dev_id);
> --
> 2.25.1
>


Re: [dpdk-dev] [PATCH] net/ice: add ability to reduce the Rx latency

2021-09-28 Thread Kevin Traynor

On 22/09/2021 03:16, Zhang, AlvinX wrote:

-Original Message-
From: Kevin Traynor 
Sent: Tuesday, September 21, 2021 5:21 PM
To: Zhang, AlvinX ; Zhang, Qi Z
; Guo, Junfeng 
Cc: dev@dpdk.org
Subject: Re: [dpdk-dev] [PATCH] net/ice: add ability to reduce the Rx latency

On 18/09/2021 02:33, Zhang, AlvinX wrote:

-Original Message-
From: Kevin Traynor 
Sent: Saturday, September 18, 2021 1:25 AM
To: Zhang, AlvinX ; Zhang, Qi Z
; Guo, Junfeng 
Cc: dev@dpdk.org
Subject: Re: [dpdk-dev] [PATCH] net/ice: add ability to reduce the Rx
latency

On 14/09/2021 02:31, Alvin Zhang wrote:

This patch adds a devarg parameter to enable/disable reducing the Rx
latency.

Signed-off-by: Alvin Zhang 
---
   doc/guides/nics/ice.rst  |  8 
   drivers/net/ice/ice_ethdev.c | 26 +++---
drivers/net/ice/ice_ethdev.h |  1 +
   3 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/doc/guides/nics/ice.rst b/doc/guides/nics/ice.rst index
5bc472f..3db0430 100644
--- a/doc/guides/nics/ice.rst
+++ b/doc/guides/nics/ice.rst
@@ -219,6 +219,14 @@ Runtime Config Options

 These ICE_DBG_XXX are defined in

``drivers/net/ice/base/ice_type.h``.


+- ``Reduce Rx interrupts and latency`` (default ``0``)
+
+  vRAN workloads require low latency DPDK interface for the front
+ haul  interface connection to Radio. Now we can reduce Rx
+ interrupts and  latency by specify ``1`` for parameter ``rx-low-latency``::
+
+-a :88:00.0,rx-low-latency=1
+


When would a user select this and when not? What is the trade off?

The text is a bit unclear. It looks below like it reduces the
interrupt latency, but not the number of interrupts. Maybe I got it wrong.


Yes, it reduces the interrupt latency, We will refine the doc in next
patch.



Thanks, the text in v2 is clearer.





   Driver compilation and testing
   --

diff --git a/drivers/net/ice/ice_ethdev.c
b/drivers/net/ice/ice_ethdev.c index a4cd39c..85662e4 100644
--- a/drivers/net/ice/ice_ethdev.c
+++ b/drivers/net/ice/ice_ethdev.c
@@ -29,12 +29,14 @@
   #define ICE_PIPELINE_MODE_SUPPORT_ARG

"pipeline-mode-support"

   #define ICE_PROTO_XTR_ARG "proto_xtr"
   #define ICE_HW_DEBUG_MASK_ARG "hw_debug_mask"
+#define ICE_RX_LOW_LATENCY"rx-low-latency"

   static const char * const ice_valid_args[] = {
ICE_SAFE_MODE_SUPPORT_ARG,
ICE_PIPELINE_MODE_SUPPORT_ARG,
ICE_PROTO_XTR_ARG,
ICE_HW_DEBUG_MASK_ARG,
+   ICE_RX_LOW_LATENCY,
NULL
   };

@@ -1827,6 +1829,9 @@ static int ice_parse_devargs(struct
rte_eth_dev

*dev)

if (ret)
goto bail;

+   ret = rte_kvargs_process(kvlist, ICE_RX_LOW_LATENCY,
+&parse_bool, &ad->devargs.rx_low_latency);
+
   bail:
rte_kvargs_free(kvlist);
return ret;
@@ -3144,8 +3149,9 @@ static int ice_init_rss(struct ice_pf *pf)  {
struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
uint32_t val, val_tx;
-   int i;
+   int rx_low_latency, i;

+   rx_low_latency = vsi->adapter->devargs.rx_low_latency;
for (i = 0; i < nb_queue; i++) {
/*do actual bind*/
val = (msix_vect & QINT_RQCTL_MSIX_INDX_M) | @@ -3155,8

+3161,21 @@

static int ice_init_rss(struct ice_pf *pf)

PMD_DRV_LOG(INFO, "queue %d is binding to vect %d",
base_queue + i, msix_vect);
+
/* set ITR0 value */
-   ICE_WRITE_REG(hw, GLINT_ITR(0, msix_vect), 0x2);
+   if (rx_low_latency) {
+   /**
+* Empirical configuration for optimal real time
+* latency reduced interrupt throttling to 2us
+*/
+   ICE_WRITE_REG(hw, GLINT_ITR(0, msix_vect), 0x1);


Why not set this to 0? "Setting the INTERVAL to zero enables
immediate interrupt."



Didn't see a reply to this comment?

I'm not requesting a change, just asking if there is a reason you didn't choose 
the
lowest latency setting, and if you should?


Setting the INTERVAL to zero enable immediate interrupt, which will cause more 
interrupts at high packets rates,
and more interrupts will consume more PCI bandwidth and CPU cycles.
Setting to 2us is a performance trade-off.


ok, thanks.




+   ICE_WRITE_REG(hw, QRX_ITR(base_queue + i),
+ QRX_ITR_NO_EXPR_M);
+   } else {
+   ICE_WRITE_REG(hw, GLINT_ITR(0, msix_vect), 0x2);
+   ICE_WRITE_REG(hw, QRX_ITR(base_queue + i), 0);
+   }
+
ICE_WRITE_REG(hw, QINT_RQCTL(base_queue + i), val);
ICE_WRITE_REG(hw, QINT_TQCTL(base_queue + i), val_tx);
}
@@ -5314,7 +5333,8 @@ static int ice_xstats_get_names(__rte_unused

struct rte_eth_dev *dev,

  ICE_HW_DEBUG_MASK_ARG "=0xXXX"
  

Re: [dpdk-dev] [dpdk-stable] [PATCH v4] mbuf: fix reset on mbuf free

2021-09-28 Thread Ananyev, Konstantin


> 
> Hi,
> 
> I've re-read the entire thread.
> If I understand correctly, the root problem was (in initial patch):
> 
> >   m1 = rte_pktmbuf_alloc(mp);
> >   rte_pktmbuf_append(m1, 500);
> >   m2 = rte_pktmbuf_alloc(mp);
> >   rte_pktmbuf_append(m2, 500);
> >   rte_pktmbuf_chain(m1, m2);
> >   m0 = rte_pktmbuf_alloc(mp);
> >   rte_pktmbuf_append(m0, 500);
> >   rte_pktmbuf_chain(m0, m1);
> >
> > As rte_pktmbuf_chain() does not reset nb_seg in the initial m1 segment
> > (this is not required), after this code the mbuf chain have 3
> > segments:
> >   - m0: next=m1, nb_seg=3
> >   - m1: next=m2, nb_seg=2
> >   - m2: next=NULL, nb_seg=1
> >
> The proposed fix was to ALWAYS set next and nb_seg fields on mbuf_free(),
> regardless next field content. That would perform unconditional write
> to mbuf, 

I don't think it is a correct understanding see below.

Current code:
if (m->next != NULL) {
   m->next = NULL;
  m->nb_segs = 1;
}

Proposed code:
if (m->next != NULL)
 m->next = NULL;
if (m->nb_segs != 1)
m->nb_segs = 1;

So what this patch adds: one more load and compare.
Note that load is from the first mbuf cache line, which
already has to be in the L1 cache by that time.

As I remember the reported slowdown is really tiny.
My vote would be to go ahead with this patch.

> and might affect the configurations, where are no multi-segment
> packets at al. mbuf_free() is "backbone" API, it is used by all cases, all
> scenaries are affected.
> 
> As far as I know, the current approach for nb_seg field - it contains other
> value than 1 only in the first mbuf , for the following segments,  it should
> not be considered at all (only the first segment fields are valid), and it is
> supposed to contain 1, as it was initially allocated from the pool.
> 
> In the example above the problem was introduced by
> rte_pktmbuf_chain(). Could we consider fixing the rte_pktmbuf_chain()
> (used in potentially fewer common sceneries)  instead of touching
> the extremely common rte_mbuf_free() ?
> 
> With best regards,
> Slava
> 
> > -Original Message-
> > From: Thomas Monjalon 
> > Sent: Tuesday, September 28, 2021 11:29
> > To: Olivier Matz ; Ali Alnubani
> > ; Slava Ovsiienko 
> > Cc: Morten Brørup ; dev@dpdk.org; David
> > Marchand ; Alexander Kozyrev
> > ; Ferruh Yigit ;
> > zhaoyan.c...@intel.com; Andrew Rybchenko
> > ; Ananyev, Konstantin
> > ; Ajit Khaparde
> > ; jer...@marvell.com
> > Subject: Re: [dpdk-dev] [dpdk-stable] [PATCH v4] mbuf: fix reset on mbuf
> > free
> >
> > Follow-up again:
> > We have added a note in 21.08, we should fix it in 21.11.
> > If there are no counter proposal, I suggest applying this patch, no matter 
> > the
> > performance regression.
> >
> >
> > 30/07/2021 16:54, Thomas Monjalon:
> > > 30/07/2021 16:35, Morten Brørup:
> > > > > From: Olivier Matz [mailto:olivier.m...@6wind.com]
> > > > > Sent: Friday, 30 July 2021 14.37
> > > > >
> > > > > Hi Thomas,
> > > > >
> > > > > On Sat, Jul 24, 2021 at 10:47:34AM +0200, Thomas Monjalon wrote:
> > > > > > What's the follow-up for this patch?
> > > > >
> > > > > Unfortunatly, I still don't have the time to work on this topic yet.
> > > > >
> > > > > In my initial tests, in our lab, I didn't notice any performance
> > > > > regression, but Ali has seen an impact (0.5M PPS, but I don't know
> > > > > how much in percent).
> > > > >
> > > > >
> > > > > > 19/01/2021 15:04, Slava Ovsiienko:
> > > > > > > Hi, All
> > > > > > >
> > > > > > > Could we postpose this patch at least to rc2? We would like to
> > > > > conduct more investigations?
> > > > > > >
> > > > > > > With best regards, Slava
> > > > > > >
> > > > > > > From: Olivier Matz 
> > > > > > > > On Mon, Jan 18, 2021 at 05:52:32PM +, Ali Alnubani wrote:
> > > > > > > > > Hi,
> > > > > > > > > (Sorry had to resend this to some recipients due to mail
> > > > > > > > > server
> > > > > problems).
> > > > > > > > >
> > > > > > > > > Just confirming that I can still reproduce the regression
> > > > > > > > > with
> > > > > single core and
> > > > > > > > 64B frames on other servers.
> > > > > > > >
> > > > > > > > Many thanks for the feedback. Can you please detail what is
> > > > > > > > the
> > > > > amount of
> > > > > > > > performance loss in percent, and confirm the test case? (I
> > > > > suppose it is
> > > > > > > > testpmd io forward).
> > > > > > > >
> > > > > > > > Unfortunatly, I won't be able to spend a lot of time on this
> > > > > > > > soon
> > > > > (sorry for
> > > > > > > > that). So I see at least these 2 options:
> > > > > > > >
> > > > > > > > - postpone the patch again, until I can find more time to 
> > > > > > > > analyze
> > > > > > > >   and optimize
> > > > > > > > - apply the patch if the performance loss is acceptable
> > > > > > > > compared
> > > > > to
> > > > > > > >   the added value of fixing a bug
> > > > > > > >
> > > > > > [...]
> > > > >
> > > > > Statu quo...
> > > > >
> > > > > Olivier
> > > > >
> > > >
> > > > The decision should be 

[dpdk-dev] [PATCH] crypto/octeontx2: fix lookaside IPsec capabilities

2021-09-28 Thread Tejasree Kondoj
Adding cbc, sha1-hmac and sha256-hmac to lookaside IPsec capabilities.

Fixes: 8f685ec2d545 ("crypto/octeontx2: support AES-CBC SHA1-HMAC")
Fixes: 61baeec4682c ("crypto/octeontx2: support AES-CBC SHA256-128-HMAC")

Signed-off-by: Tejasree Kondoj 
---
 .../octeontx2/otx2_cryptodev_capabilities.c   | 64 +++
 1 file changed, 64 insertions(+)

diff --git a/drivers/crypto/octeontx2/otx2_cryptodev_capabilities.c 
b/drivers/crypto/octeontx2/otx2_cryptodev_capabilities.c
index 80f3729995..ba3fbbbe22 100644
--- a/drivers/crypto/octeontx2/otx2_cryptodev_capabilities.c
+++ b/drivers/crypto/octeontx2/otx2_cryptodev_capabilities.c
@@ -767,6 +767,69 @@ static const struct rte_cryptodev_capabilities 
sec_caps_aes[] = {
}, }
}, }
},
+   {   /* AES CBC */
+   .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+   {.sym = {
+   .xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER,
+   {.cipher = {
+   .algo = RTE_CRYPTO_CIPHER_AES_CBC,
+   .block_size = 16,
+   .key_size = {
+   .min = 16,
+   .max = 32,
+   .increment = 8
+   },
+   .iv_size = {
+   .min = 16,
+   .max = 16,
+   .increment = 0
+   }
+   }, }
+   }, }
+   },
+};
+
+static const struct rte_cryptodev_capabilities sec_caps_sha1_sha2[] = {
+   {   /* SHA1 HMAC */
+   .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+   {.sym = {
+   .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,
+   {.auth = {
+   .algo = RTE_CRYPTO_AUTH_SHA1_HMAC,
+   .block_size = 64,
+   .key_size = {
+   .min = 1,
+   .max = 1024,
+   .increment = 1
+   },
+   .digest_size = {
+   .min = 12,
+   .max = 20,
+   .increment = 8
+   },
+   }, }
+   }, }
+   },
+   {   /* SHA256 HMAC */
+   .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+   {.sym = {
+   .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,
+   {.auth = {
+   .algo = RTE_CRYPTO_AUTH_SHA256_HMAC,
+   .block_size = 64,
+   .key_size = {
+   .min = 1,
+   .max = 1024,
+   .increment = 1
+   },
+   .digest_size = {
+   .min = 16,
+   .max = 32,
+   .increment = 16
+   },
+   }, }
+   }, }
+   },
 };
 
 static const struct rte_security_capability
@@ -849,6 +912,7 @@ void
 otx2_crypto_sec_capabilities_init(union cpt_eng_caps *hw_caps)
 {
SEC_CAPS_ADD(hw_caps, aes);
+   SEC_CAPS_ADD(hw_caps, sha1_sha2);
 
sec_caps_add(caps_end, RTE_DIM(caps_end));
 }
-- 
2.27.0



Re: [dpdk-dev] [PATCH v5 1/2] eventdev: add rx queue conf get api

2021-09-28 Thread Jayatheerthan, Jay
> -Original Message-
> From: Kundapura, Ganapati 
> Sent: Thursday, September 16, 2021 6:21 PM
> To: Jayatheerthan, Jay ; jerinjac...@gmail.com
> Cc: dev@dpdk.org; Yigit, Ferruh 
> Subject: [PATCH v5 1/2] eventdev: add rx queue conf get api
> 
> Added rte_event_eth_rx_adapter_queue_conf_get() API to get rx queue
> information - event queue identifier, flags for handling received packets,
> scheduler type, event priority, polling frequency of the receive queue
> and flow identifier in rte_event_eth_rx_adapter_queue_conf structure
> 
> Signed-off-by: Ganapati Kundapura 
> 
> ---
> v5:
> * Filled queue_conf after memzone lookup
> * PMD callback if not NULL, invoked to override queue_conf struct
> * Added memzone lookup for stats_get(), stats_reset(), service_id_get()
>   api's called by secondary applications.
> 
> v4:
> * squashed 1/3 and 3/3
> * reused rte_event_eth_rx_adapter_queue_conf structure in place of
>   rte_event_eth_rx_adapter_queue_info
> * renamed to rte_event_eth_rx_adapter_queue_info_get() to
>   rte_event_eth_rx_adapter_queue_conf_get to align with
>   rte_event_eth_rx_adapter_queue_conf structure
> 
> v3:
> * Split single patch into implementaion, test and document updation
>   patches separately
> 
> v2:
> * Fixed build issue due to missing entry in version.map
> 
> v1:
> * Initial patch with implementaion, test and doc together
> ---
> ---
>  .../prog_guide/event_ethernet_rx_adapter.rst   |  8 ++
>  lib/eventdev/eventdev_pmd.h| 28 +++
>  lib/eventdev/rte_event_eth_rx_adapter.c| 91 
> +-
>  lib/eventdev/rte_event_eth_rx_adapter.h| 27 +++
>  lib/eventdev/version.map   |  1 +
>  5 files changed, 154 insertions(+), 1 deletion(-)
> 
> diff --git a/doc/guides/prog_guide/event_ethernet_rx_adapter.rst 
> b/doc/guides/prog_guide/event_ethernet_rx_adapter.rst
> index 0780b6f..ce23d8a 100644
> --- a/doc/guides/prog_guide/event_ethernet_rx_adapter.rst
> +++ b/doc/guides/prog_guide/event_ethernet_rx_adapter.rst
> @@ -146,6 +146,14 @@ if the callback is supported, and the counts maintained 
> by the service function,
>  if one exists. The service function also maintains a count of cycles for 
> which
>  it was not able to enqueue to the event device.
> 
> +Getting Adapter queue config
> +
> +
> +The  ``rte_event_eth_rx_adapter_queue_conf_get()`` function reports
> +flags for handling received packets, event queue identifier, scheduler type,
> +event priority, polling frequency of the receive queue and flow identifier
> +in struct ``rte_event_eth_rx_adapter_queue_conf``.
> +
>  Interrupt Based Rx Queues
>  ~~
> 
> diff --git a/lib/eventdev/eventdev_pmd.h b/lib/eventdev/eventdev_pmd.h
> index 63b3bc4..e69644b 100644
> --- a/lib/eventdev/eventdev_pmd.h
> +++ b/lib/eventdev/eventdev_pmd.h
> @@ -562,6 +562,32 @@ typedef int (*eventdev_eth_rx_adapter_queue_del_t)
>   int32_t rx_queue_id);
> 
>  /**
> + * Retrieve Rx adapter queue config information for the specified
> + * rx queue ID.
> + *
> + * @param dev
> + *  Event device pointer
> + *
> + * @param eth_dev
> + *  Ethernet device pointer
> + *
> + * @param rx_queue_id
> + *  Ethernet device receive queue index.
> + *
> + * @param[out] queue_conf
> + *  Pointer to rte_event_eth_rx_adapter_queue_conf structure
> + *
> + * @return
> + *  - 0: Success
> + *  - <0: Error code on failure.
> + */
> +typedef int (*eventdev_eth_rx_adapter_queue_conf_get_t)
> + (const struct rte_eventdev *dev,
> + const struct rte_eth_dev *eth_dev,
> + uint16_t rx_queue_id,
> + struct rte_event_eth_rx_adapter_queue_conf *queue_conf);
> +
> +/**
>   * Start ethernet Rx adapter. This callback is invoked if
>   * the caps returned from eventdev_eth_rx_adapter_caps_get(.., eth_port_id)
>   * has RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT set and Rx queues
> @@ -1081,6 +1107,8 @@ struct rte_eventdev_ops {
>   /**< Add Rx queues to ethernet Rx adapter */
>   eventdev_eth_rx_adapter_queue_del_t eth_rx_adapter_queue_del;
>   /**< Delete Rx queues from ethernet Rx adapter */
> + eventdev_eth_rx_adapter_queue_conf_get_t eth_rx_adapter_queue_conf_get;
> + /**< Get Rx adapter queue info */
>   eventdev_eth_rx_adapter_start_t eth_rx_adapter_start;
>   /**< Start ethernet Rx adapter */
>   eventdev_eth_rx_adapter_stop_t eth_rx_adapter_stop;
> diff --git a/lib/eventdev/rte_event_eth_rx_adapter.c 
> b/lib/eventdev/rte_event_eth_rx_adapter.c
> index f2dc695..6cc4210 100644
> --- a/lib/eventdev/rte_event_eth_rx_adapter.c
> +++ b/lib/eventdev/rte_event_eth_rx_adapter.c
> @@ -40,6 +40,8 @@
>  /* Sentinel value to detect initialized file handle */
>  #define INIT_FD  -1
> 
> +#define RXA_ADAPTER_ARRAY "rte_event_eth_rx_adapter_array"
> +
>  /*
>   * Used to store port and queue ID of

Re: [dpdk-dev] [PATCH v5 2/2] test/event: Add rx queue conf get test in rx adapter autotest

2021-09-28 Thread Jayatheerthan, Jay
> -Original Message-
> From: Kundapura, Ganapati 
> Sent: Thursday, September 16, 2021 6:21 PM
> To: Jayatheerthan, Jay ; jerinjac...@gmail.com
> Cc: dev@dpdk.org; Yigit, Ferruh 
> Subject: [PATCH v5 2/2] test/event: Add rx queue conf get test in rx adapter 
> autotest
> 
> Add unit tests for rte_event_eth_rx_adapter_queue_conf_get()
> in rx adapter autotest
> 
> Signed-off-by: Ganapati Kundapura 
> ---
>  app/test/test_event_eth_rx_adapter.c | 22 ++
>  1 file changed, 22 insertions(+)
> 
> diff --git a/app/test/test_event_eth_rx_adapter.c 
> b/app/test/test_event_eth_rx_adapter.c
> index add4d8a..13664a3 100644
> --- a/app/test/test_event_eth_rx_adapter.c
> +++ b/app/test/test_event_eth_rx_adapter.c
> @@ -747,6 +747,27 @@ adapter_stats(void)
>   return TEST_SUCCESS;
>  }
> 
> +static int
> +adapter_queue_conf(void)
> +{
> + int err;
> + struct rte_event_eth_rx_adapter_queue_conf queue_conf;
> +
> + err = rte_event_eth_rx_adapter_queue_conf_get(TEST_INST_ID, TEST_DEV_ID,
> +   0, &queue_conf);
> + TEST_ASSERT(err == 0, "Expected 0 got %d", err);
> +
> + err = rte_event_eth_rx_adapter_queue_conf_get(TEST_INST_ID, TEST_DEV_ID,
> +   -1, &queue_conf);
> + TEST_ASSERT(err == -EINVAL, "Expected -EINVAL got %d", err);
> +
> + err = rte_event_eth_rx_adapter_queue_conf_get(TEST_INST_ID, TEST_DEV_ID,
> +   0, NULL);
> + TEST_ASSERT(err == -EINVAL, "Expected -EINVAL got %d", err);
> +
> + return TEST_SUCCESS;
> +}
> +
>  static struct unit_test_suite event_eth_rx_tests = {
>   .suite_name = "rx event eth adapter test suite",
>   .setup = testsuite_setup,
> @@ -759,6 +780,7 @@ static struct unit_test_suite event_eth_rx_tests = {
>   adapter_multi_eth_add_del),
>   TEST_CASE_ST(adapter_create, adapter_free, adapter_start_stop),
>   TEST_CASE_ST(adapter_create, adapter_free, adapter_stats),
> + TEST_CASE_ST(adapter_create, adapter_free, adapter_queue_conf),
>   TEST_CASES_END() /**< NULL terminate unit test array */
>   }
>  };
> --
> 2.6.4

Acked-by: Jay Jayatheerthan 



Re: [dpdk-dev] [PATCH v1] ethdev: introduce shared Rx queue

2021-09-28 Thread Jerin Jacob
On Sun, Sep 26, 2021 at 11:06 AM Xueming(Steven) Li  wrote:
>
> On Wed, 2021-08-11 at 13:04 +0100, Ferruh Yigit wrote:
> > On 8/11/2021 9:28 AM, Xueming(Steven) Li wrote:
> > >
> > >
> > > > -Original Message-
> > > > From: Jerin Jacob 
> > > > Sent: Wednesday, August 11, 2021 4:03 PM
> > > > To: Xueming(Steven) Li 
> > > > Cc: dpdk-dev ; Ferruh Yigit ; 
> > > > NBU-Contact-Thomas Monjalon ;
> > > > Andrew Rybchenko 
> > > > Subject: Re: [dpdk-dev] [PATCH v1] ethdev: introduce shared Rx queue
> > > >
> > > > On Mon, Aug 9, 2021 at 7:46 PM Xueming(Steven) Li  
> > > > wrote:
> > > > >
> > > > > Hi,
> > > > >
> > > > > > -Original Message-
> > > > > > From: Jerin Jacob 
> > > > > > Sent: Monday, August 9, 2021 9:51 PM
> > > > > > To: Xueming(Steven) Li 
> > > > > > Cc: dpdk-dev ; Ferruh Yigit ;
> > > > > > NBU-Contact-Thomas Monjalon ; Andrew Rybchenko
> > > > > > 
> > > > > > Subject: Re: [dpdk-dev] [PATCH v1] ethdev: introduce shared Rx queue
> > > > > >
> > > > > > On Mon, Aug 9, 2021 at 5:18 PM Xueming Li  
> > > > > > wrote:
> > > > > > >
> > > > > > > In current DPDK framework, each RX queue is pre-loaded with mbufs
> > > > > > > for incoming packets. When number of representors scale out in a
> > > > > > > switch domain, the memory consumption became significant. Most
> > > > > > > important, polling all ports leads to high cache miss, high
> > > > > > > latency and low throughput.
> > > > > > >
> > > > > > > This patch introduces shared RX queue. Ports with same
> > > > > > > configuration in a switch domain could share RX queue set by 
> > > > > > > specifying sharing group.
> > > > > > > Polling any queue using same shared RX queue receives packets from
> > > > > > > all member ports. Source port is identified by mbuf->port.
> > > > > > >
> > > > > > > Port queue number in a shared group should be identical. Queue
> > > > > > > index is
> > > > > > > 1:1 mapped in shared group.
> > > > > > >
> > > > > > > Share RX queue is supposed to be polled on same thread.
> > > > > > >
> > > > > > > Multiple groups is supported by group ID.
> > > > > >
> > > > > > Is this offload specific to the representor? If so can this name be 
> > > > > > changed specifically to representor?
> > > > >
> > > > > Yes, PF and representor in switch domain could take advantage.
> > > > >
> > > > > > If it is for a generic case, how the flow ordering will be 
> > > > > > maintained?
> > > > >
> > > > > Not quite sure that I understood your question. The control path of is
> > > > > almost same as before, PF and representor port still needed, rte 
> > > > > flows not impacted.
> > > > > Queues still needed for each member port, descriptors(mbuf) will be
> > > > > supplied from shared Rx queue in my PMD implementation.
> > > >
> > > > My question was if create a generic RTE_ETH_RX_OFFLOAD_SHARED_RXQ 
> > > > offload, multiple ethdev receive queues land into the same
> > > > receive queue, In that case, how the flow order is maintained for 
> > > > respective receive queues.
> > >
> > > I guess the question is testpmd forward stream? The forwarding logic has 
> > > to be changed slightly in case of shared rxq.
> > > basically for each packet in rx_burst result, lookup source stream 
> > > according to mbuf->port, forwarding to target fs.
> > > Packets from same source port could be grouped as a small burst to 
> > > process, this will accelerates the performance if traffic come from
> > > limited ports. I'll introduce some common api to do shard rxq forwarding, 
> > > call it with packets handling callback, so it suites for
> > > all forwarding engine. Will sent patches soon.
> > >
> >
> > All ports will put the packets in to the same queue (share queue), right? 
> > Does
> > this means only single core will poll only, what will happen if there are
> > multiple cores polling, won't it cause problem?
> >
> > And if this requires specific changes in the application, I am not sure 
> > about
> > the solution, can't this work in a transparent way to the application?
>
> Discussed with Jerin, new API introduced in v3 2/8 that aggregate ports
> in same group into one new port. Users could schedule polling on the
> aggregated port instead of all member ports.

The v3 still has testpmd changes in fastpath. Right? IMO, For this
feature, we should not change fastpath of testpmd
application. Instead, testpmd can use aggregated ports probably as
separate fwd_engine to show how to use this feature.

>
> >
> > Overall, is this for optimizing memory for the port represontors? If so 
> > can't we
> > have a port representor specific solution, reducing scope can reduce the
> > complexity it brings?
> >
> > > > If this offload is only useful for representor case, Can we make this 
> > > > offload specific to representor the case by changing its name and
> > > > scope.
> > >
> > > It works for both PF and representors in same switch domain, for 
> > > application like OVS, few changes to apply.
> > >
> > > >
> > > >
> > > > >
> > > >

Re: [dpdk-dev] [RFC 0/7] hide eth dev related structures

2021-09-28 Thread Ananyev, Konstantin

> On Wed, Sep 22, 2021 at 8:38 PM Ananyev, Konstantin
>  wrote:
> >
> >
> > > > Hi Jerin,
> > > >
> > > > > > NOTE: This is just an RFC to start further discussion and collect 
> > > > > > the feedback.
> > > > > > Due to significant amount of work, changes required are applied 
> > > > > > only to two
> > > > > > PMDs so far: net/i40e and net/ice.
> > > > > > So to build it you'll need to add:
> > > > > > -Denable_drivers='common/*,mempool/*,net/ice,net/i40e'
> > > > > > to your config options.
> > > > >
> > > > > >
> > > > > > That approach was selected to avoid(/minimize) possible performance 
> > > > > > losses.
> > > > > >
> > > > > > So far I done only limited amount functional and performance 
> > > > > > testing.
> > > > > > Didn't spot any functional problems, and performance numbers
> > > > > > remains the same before and after the patch on my box (testpmd, 
> > > > > > macswap fwd).
> > > > >
> > > > >
> > > > > Based on testing on octeonxt2. We see some regression in testpmd and
> > > > > bit on l3fwd too.
> > > > >
> > > > > Without patch: 73.5mpps/core in testpmd iofwd
> > > > > With out patch: 72 5mpps/core in testpmd iofwd
> > > > >
> > > > > Based on my understanding it is due to additional indirection.
> > > >
> > > > From your patch below, it looks like not actually additional 
> > > > indirection,
> > > > but extra memory dereference - func and dev pointers are now stored
> > > > at different places.
> > >
> > > Yup. I meant the same. We are on the same page.
> > >
> > > > Plus the fact that now we dereference rte_eth_devices[]
> > > > data inside PMD function. Which probably prevents compiler and CPU to 
> > > > load
> > > >  rte_eth_devices[port_id].data and rte_eth_devices[port_id]. 
> > > > pre_tx_burst_cbs[queue_id]
> > > > in advance before calling actual RX/TX function.
> > >
> > > Yes.
> > >
> > > > About your approach: I don’t mind to add extra opaque 'void *data' 
> > > > pointer,
> > > > but would prefer not to expose callback invocations code into inline 
> > > > function.
> > > > Main reason for that - I think it still need to be reworked to allow 
> > > > adding/removing
> > > > callbacks without stopping the device. Something similar to what was 
> > > > done for cryptodev
> > > > callbacks. To be able to do that in future without another ABI breakage 
> > > > callbacks related part
> > > > needs to be kept internal.
> > > > Though what we probably can do: add two dynamic arrays of opaque 
> > > > pointers to  rte_eth_burst_api.
> > > > One for rx/tx queue data pointers, second for rx/tx callback pointers.
> > > > To be more specific, something like:
> > > >
> > > > typedef uint16_t (*rte_eth_rx_burst_t)( void *rxq, struct rte_mbuf 
> > > > **rx_pkts, uint16_t nb_pkts, void *cbs);
> > > > typedef uint16_t (*rte_eth_tx_burst_t)(void *txq, struct rte_mbuf 
> > > > **tx_pkts, uint16_t nb_pkts, void *cbs);
> > > > 
> > > >
> > > > struct rte_eth_burst_api {
> > > > rte_eth_rx_burst_t rx_pkt_burst;
> > > > /**< PMD receive function. */
> > > > rte_eth_tx_burst_t tx_pkt_burst;
> > > > /**< PMD transmit function. */
> > > > rte_eth_tx_prep_t tx_pkt_prepare;
> > > > /**< PMD transmit prepare function. */
> > > > rte_eth_rx_queue_count_t rx_queue_count;
> > > > /**< Get the number of used RX descriptors. */
> > > > rte_eth_rx_descriptor_status_t rx_descriptor_status;
> > > > /**< Check the status of a Rx descriptor. */
> > > > rte_eth_tx_descriptor_status_t tx_descriptor_status;
> > > > /**< Check the status of a Tx descriptor. */
> > > > struct {
> > > >  void **queue_data;   /* point to 
> > > > rte_eth_devices[port_id].data-> rx_queues */
> > > >  void **cbs;  /*  points to 
> > > > rte_eth_devices[port_id].post_rx_burst_cbs */
> > > >} rx_data, tx_data;
> > > > } __rte_cache_aligned;
> > > >
> > > > static inline uint16_t
> > > > rte_eth_rx_burst(uint16_t port_id, uint16_t queue_id,
> > > >  struct rte_mbuf **rx_pkts, const uint16_t nb_pkts)
> > > > {
> > > >struct rte_eth_burst_api *p;
> > > >
> > > > if (port_id >= RTE_MAX_ETHPORTS || queue_id >= 
> > > > RTE_MAX_QUEUES_PER_PORT)
> > > > return 0;
> > > >
> > > >   p =  &rte_eth_burst_api[port_id];
> > > >   return p->rx_pkt_burst(p->rx_data.queue_data[queue_id], rx_pkts, 
> > > > nb_pkts, p->rx_data.cbs[queue_id]);
> > >
> > >
> > >
> > > That works.
> > >
> > >
> > > > }
> > > >
> > > > Same for TX.
> > > >
> > > > If that looks ok to everyone, I'll try to prepare next version based on 
> > > > that.
> > >
> > >
> > > Looks good to me.
> > >
> > > > In theory that should avoid extra dereference problem and even reduce 
> > > > indirection.
> > > > As a drawback data->rxq/txq should always be allocated for 
> > > > RTE_MAX_QUEUES_PER_PORT entries,
> > > > but I presume that’s not a big deal.
> > > >
> > > >

Re: [dpdk-dev] [dpdk-stable] [PATCH v4] mbuf: fix reset on mbuf free

2021-09-28 Thread Morten Brørup
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Slava Ovsiienko
> Sent: Tuesday, 28 September 2021 11.01
> 
> Hi,
> 
> I've re-read the entire thread.
> If I understand correctly, the root problem was (in initial patch):
> 
> >   m1 = rte_pktmbuf_alloc(mp);
> >   rte_pktmbuf_append(m1, 500);
> >   m2 = rte_pktmbuf_alloc(mp);
> >   rte_pktmbuf_append(m2, 500);
> >   rte_pktmbuf_chain(m1, m2);
> >   m0 = rte_pktmbuf_alloc(mp);
> >   rte_pktmbuf_append(m0, 500);
> >   rte_pktmbuf_chain(m0, m1);
> >
> > As rte_pktmbuf_chain() does not reset nb_seg in the initial m1
> segment
> > (this is not required), after this code the mbuf chain have 3
> > segments:
> >   - m0: next=m1, nb_seg=3
> >   - m1: next=m2, nb_seg=2
> >   - m2: next=NULL, nb_seg=1
> >
> The proposed fix was to ALWAYS set next and nb_seg fields on
> mbuf_free(),
> regardless next field content. That would perform unconditional write
> to mbuf, and might affect the configurations, where are no multi-
> segment
> packets at al. mbuf_free() is "backbone" API, it is used by all cases,
> all
> scenaries are affected.
> 
> As far as I know, the current approach for nb_seg field - it contains
> other
> value than 1 only in the first mbuf , for the following segments,  it
> should
> not be considered at all (only the first segment fields are valid), and
> it is
> supposed to contain 1, as it was initially allocated from the pool.
> 
> In the example above the problem was introduced by
> rte_pktmbuf_chain(). Could we consider fixing the rte_pktmbuf_chain()
> (used in potentially fewer common sceneries)  instead of touching
> the extremely common rte_mbuf_free() ?
> 
> With best regards,
> Slava

Great idea, Slava!

Changing the invariant for 'nb_segs', so it must be 1, except in the first 
segment of a segmented packet.

Thinking further about it, perhaps we can achieve even higher performance by a 
minor additional modification: Use 0 instead of 1? Or offset 'nb_segs' by -1, 
so it reflects the number of additional segments?

And perhaps combining the invariants for 'nb_segs' and 'next' could provide 
even more performance improvements. I don't know, just sharing a thought.

Anyway, I vote for fixing the bug. One way or the other!

-Morten

> 
> > -Original Message-
> > From: Thomas Monjalon 
> > Sent: Tuesday, September 28, 2021 11:29
> >
> > Follow-up again:
> > We have added a note in 21.08, we should fix it in 21.11.
> > If there are no counter proposal, I suggest applying this patch, no
> matter the
> > performance regression.
> >
> >
> > 30/07/2021 16:54, Thomas Monjalon:
> > > 30/07/2021 16:35, Morten Brørup:
> > > > > From: Olivier Matz [mailto:olivier.m...@6wind.com]
> > > > > Sent: Friday, 30 July 2021 14.37
> > > > >
> > > > > Hi Thomas,
> > > > >
> > > > > On Sat, Jul 24, 2021 at 10:47:34AM +0200, Thomas Monjalon
> wrote:
> > > > > > What's the follow-up for this patch?
> > > > >
> > > > > Unfortunatly, I still don't have the time to work on this topic
> yet.
> > > > >
> > > > > In my initial tests, in our lab, I didn't notice any
> performance
> > > > > regression, but Ali has seen an impact (0.5M PPS, but I don't
> know
> > > > > how much in percent).
> > > > >
> > > > >
> > > > > > 19/01/2021 15:04, Slava Ovsiienko:
> > > > > > > Hi, All
> > > > > > >
> > > > > > > Could we postpose this patch at least to rc2? We would like
> to
> > > > > conduct more investigations?
> > > > > > >
> > > > > > > With best regards, Slava
> > > > > > >
> > > > > > > From: Olivier Matz 
> > > > > > > > On Mon, Jan 18, 2021 at 05:52:32PM +, Ali Alnubani
> wrote:
> > > > > > > > > Hi,
> > > > > > > > > (Sorry had to resend this to some recipients due to
> mail
> > > > > > > > > server
> > > > > problems).
> > > > > > > > >
> > > > > > > > > Just confirming that I can still reproduce the
> regression
> > > > > > > > > with
> > > > > single core and
> > > > > > > > 64B frames on other servers.
> > > > > > > >
> > > > > > > > Many thanks for the feedback. Can you please detail what
> is
> > > > > > > > the
> > > > > amount of
> > > > > > > > performance loss in percent, and confirm the test case?
> (I
> > > > > suppose it is
> > > > > > > > testpmd io forward).
> > > > > > > >
> > > > > > > > Unfortunatly, I won't be able to spend a lot of time on
> this
> > > > > > > > soon
> > > > > (sorry for
> > > > > > > > that). So I see at least these 2 options:
> > > > > > > >
> > > > > > > > - postpone the patch again, until I can find more time to
> analyze
> > > > > > > >   and optimize
> > > > > > > > - apply the patch if the performance loss is acceptable
> > > > > > > > compared
> > > > > to
> > > > > > > >   the added value of fixing a bug
> > > > > > > >
> > > > > > [...]
> > > > >
> > > > > Statu quo...
> > > > >
> > > > > Olivier
> > > > >
> > > >
> > > > The decision should be simple:
> > > >
> > > > Does the DPDK project support segmented packets?
> > > > If yes, then apply the patch to fix the bug!
> > > >
> > > > If anyone seriously cares abou

Re: [dpdk-dev] [RFC v2 3/5] ethdev: copy ethdev 'burst' API into separate structure

2021-09-28 Thread Ananyev, Konstantin

> > >
> > > Copy public function pointers (rx_pkt_burst(), etc.) and related
> > > pointers to internal data from rte_eth_dev structure into a separate flat
> > > array. We can keep it public to still use inline functions for 'fast' 
> > > calls
> > > (like rte_eth_rx_burst(), etc.) to avoid/minimize slowdown.
> > > The intention is to make rte_eth_dev and related structures internal.
> > > That should allow future possible changes to core eth_dev strcutures
> > > to be transaprent to the user and help to avoid ABI/API breakages.
> > >
> > > Signed-off-by: Konstantin Ananyev 
> > > ---
> > >  lib/ethdev/ethdev_private.c  | 53 
> > >  lib/ethdev/ethdev_private.h  |  7 +
> > >  lib/ethdev/rte_ethdev.c  | 17 
> > >  lib/ethdev/rte_ethdev_core.h | 45 ++
> > >  4 files changed, 122 insertions(+)
> > >
> > > diff --git a/lib/ethdev/ethdev_private.c b/lib/ethdev/ethdev_private.c
> > > index 012cf73ca2..a1683da77b 100644
> > > --- a/lib/ethdev/ethdev_private.c
> > > +++ b/lib/ethdev/ethdev_private.c
> > > @@ -174,3 +174,56 @@ rte_eth_devargs_parse_representor_ports(char *str, 
> > > void *data)
> > >   RTE_LOG(ERR, EAL, "wrong representor format: %s\n", str);
> > >   return str == NULL ? -1 : 0;
> > >  }
> > > +
> > > +static uint16_t
> > > +dummy_eth_rx_burst(__rte_unused void *rxq,
> > > + __rte_unused struct rte_mbuf **rx_pkts,
> > > + __rte_unused uint16_t nb_pkts)
> > > +{
> > > + RTE_ETHDEV_LOG(ERR, "rx_pkt_burst for unconfigured port\n");
> > > + rte_errno = ENOTSUP;
> > > + return 0;
> > > +}
> > > +
> > > +static uint16_t
> > > +dummy_eth_tx_burst(__rte_unused void *txq,
> > > + __rte_unused struct rte_mbuf **tx_pkts,
> > > + __rte_unused uint16_t nb_pkts)
> > > +{
> > > + RTE_ETHDEV_LOG(ERR, "tx_pkt_burst for unconfigured port\n");
> > > + rte_errno = ENOTSUP;
> > > + return 0;
> > > +}
> > > +
> > > +void
> > > +eth_dev_burst_api_reset(struct rte_eth_burst_api *rba)
> > > +{
> > > + static void *dummy_data[RTE_MAX_QUEUES_PER_PORT];
> > > + static const struct rte_eth_burst_api dummy_api = {
> > > + .rx_pkt_burst = dummy_eth_rx_burst,
> > > + .tx_pkt_burst = dummy_eth_tx_burst,
> > > + .rxq = {.data = dummy_data, .clbk = dummy_data,},
> > > + .txq = {.data = dummy_data, .clbk = dummy_data,},
> > > + };
> > > +
> > > + *rba = dummy_api;
> > > +}
> > > +
> > > +void
> > > +eth_dev_burst_api_setup(struct rte_eth_burst_api *rba,
> > > + const struct rte_eth_dev *dev)
> > > +{
> > > + rba->rx_pkt_burst = dev->rx_pkt_burst;
> > > + rba->tx_pkt_burst = dev->tx_pkt_burst;
> > > + rba->tx_pkt_prepare = dev->tx_pkt_prepare;
> > > + rba->rx_queue_count = dev->rx_queue_count;
> > > + rba->rx_descriptor_status = dev->rx_descriptor_status;
> > > + rba->tx_descriptor_status = dev->tx_descriptor_status;
> > > +
> > > + rba->rxq.data = dev->data->rx_queues;
> > > + rba->rxq.clbk = (void **)(uintptr_t)dev->post_rx_burst_cbs;
> > > +
> > > + rba->txq.data = dev->data->tx_queues;
> > > + rba->txq.clbk = (void **)(uintptr_t)dev->pre_tx_burst_cbs;
> > > +}
> > > +
> > > diff --git a/lib/ethdev/ethdev_private.h b/lib/ethdev/ethdev_private.h
> > > index 9bb0879538..54921f4860 100644
> > > --- a/lib/ethdev/ethdev_private.h
> > > +++ b/lib/ethdev/ethdev_private.h
> > > @@ -30,6 +30,13 @@ eth_find_device(const struct rte_eth_dev *_start, 
> > > rte_eth_cmp_t cmp,
> > >  /* Parse devargs value for representor parameter. */
> > >  int rte_eth_devargs_parse_representor_ports(char *str, void *data);
> > >
> > > +/* reset eth 'burst' API to dummy values */
> > > +void eth_dev_burst_api_reset(struct rte_eth_burst_api *rba);
> > > +
> > > +/* setup eth 'burst' API to ethdev values */
> > > +void eth_dev_burst_api_setup(struct rte_eth_burst_api *rba,
> > > + const struct rte_eth_dev *dev);
> > > +
> > >  #ifdef __cplusplus
> > >  }
> > >  #endif
> > > diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
> > > index 424bc260fa..5904bb7bae 100644
> > > --- a/lib/ethdev/rte_ethdev.c
> > > +++ b/lib/ethdev/rte_ethdev.c
> > > @@ -44,6 +44,9 @@
> > >  static const char *MZ_RTE_ETH_DEV_DATA = "rte_eth_dev_data";
> > >  struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS];
> > >
> > > +/* public 'fast/burst' API */
> > > +struct rte_eth_burst_api rte_eth_burst_api[RTE_MAX_ETHPORTS];
> > > +
> > >  /* spinlock for eth device callbacks */
> > >  static rte_spinlock_t eth_dev_cb_lock = RTE_SPINLOCK_INITIALIZER;
> > >
> > > @@ -1788,6 +1791,9 @@ rte_eth_dev_start(uint16_t port_id)
> > >   (*dev->dev_ops->link_update)(dev, 0);
> > >   }
> > >
> > > + /* expose selection of PMD rx/tx function */
> > > + eth_dev_burst_api_setup(rte_eth_burst_api + port_id, dev);
> > > +
> > >   rte_ethdev_trace_start(port_id);
> > > 

Re: [dpdk-dev] [PATCH v2 01/15] ethdev: introduce shared Rx queue

2021-09-28 Thread Xueming(Steven) Li
On Thu, 2021-09-16 at 09:46 +0530, Jerin Jacob wrote:
> On Wed, Sep 15, 2021 at 8:15 PM Xueming(Steven) Li  
> wrote:
> > 
> > Hi Jerin,
> > 
> > On Mon, 2021-08-30 at 15:01 +0530, Jerin Jacob wrote:
> > > On Sat, Aug 28, 2021 at 7:46 PM Xueming(Steven) Li  
> > > wrote:
> > > > 
> > > > 
> > > > 
> > > > > -Original Message-
> > > > > From: Jerin Jacob 
> > > > > Sent: Thursday, August 26, 2021 7:58 PM
> > > > > To: Xueming(Steven) Li 
> > > > > Cc: dpdk-dev ; Ferruh Yigit ; 
> > > > > NBU-Contact-Thomas Monjalon ;
> > > > > Andrew Rybchenko 
> > > > > Subject: Re: [PATCH v2 01/15] ethdev: introduce shared Rx queue
> > > > > 
> > > > > On Thu, Aug 19, 2021 at 5:39 PM Xueming(Steven) Li 
> > > > >  wrote:
> > > > > > 
> > > > > > 
> > > > > > 
> > > > > > > -Original Message-
> > > > > > > From: Jerin Jacob 
> > > > > > > Sent: Thursday, August 19, 2021 1:27 PM
> > > > > > > To: Xueming(Steven) Li 
> > > > > > > Cc: dpdk-dev ; Ferruh Yigit 
> > > > > > > ;
> > > > > > > NBU-Contact-Thomas Monjalon ; Andrew 
> > > > > > > Rybchenko
> > > > > > > 
> > > > > > > Subject: Re: [PATCH v2 01/15] ethdev: introduce shared Rx queue
> > > > > > > 
> > > > > > > On Wed, Aug 18, 2021 at 4:44 PM Xueming(Steven) Li 
> > > > > > >  wrote:
> > > > > > > > 
> > > > > > > > 
> > > > > > > > 
> > > > > > > > > -Original Message-
> > > > > > > > > From: Jerin Jacob 
> > > > > > > > > Sent: Tuesday, August 17, 2021 11:12 PM
> > > > > > > > > To: Xueming(Steven) Li 
> > > > > > > > > Cc: dpdk-dev ; Ferruh Yigit
> > > > > > > > > ; NBU-Contact-Thomas Monjalon
> > > > > > > > > ; Andrew Rybchenko
> > > > > > > > > 
> > > > > > > > > Subject: Re: [PATCH v2 01/15] ethdev: introduce shared Rx 
> > > > > > > > > queue
> > > > > > > > > 
> > > > > > > > > On Tue, Aug 17, 2021 at 5:01 PM Xueming(Steven) Li 
> > > > > > > > >  wrote:
> > > > > > > > > > 
> > > > > > > > > > 
> > > > > > > > > > 
> > > > > > > > > > > -Original Message-
> > > > > > > > > > > From: Jerin Jacob 
> > > > > > > > > > > Sent: Tuesday, August 17, 2021 5:33 PM
> > > > > > > > > > > To: Xueming(Steven) Li 
> > > > > > > > > > > Cc: dpdk-dev ; Ferruh Yigit
> > > > > > > > > > > ; NBU-Contact-Thomas Monjalon
> > > > > > > > > > > ; Andrew Rybchenko
> > > > > > > > > > > 
> > > > > > > > > > > Subject: Re: [PATCH v2 01/15] ethdev: introduce shared Rx
> > > > > > > > > > > queue
> > > > > > > > > > > 
> > > > > > > > > > > On Wed, Aug 11, 2021 at 7:34 PM Xueming Li 
> > > > > > > > > > >  wrote:
> > > > > > > > > > > > 
> > > > > > > > > > > > In current DPDK framework, each RX queue is pre-loaded
> > > > > > > > > > > > with mbufs for incoming packets. When number of
> > > > > > > > > > > > representors scale out in a switch domain, the memory
> > > > > > > > > > > > consumption became significant. Most important, polling
> > > > > > > > > > > > all ports leads to high cache miss, high latency and 
> > > > > > > > > > > > low throughput.
> > > > > > > > > > > > 
> > > > > > > > > > > > This patch introduces shared RX queue. Ports with same
> > > > > > > > > > > > configuration in a switch domain could share RX queue 
> > > > > > > > > > > > set by specifying sharing group.
> > > > > > > > > > > > Polling any queue using same shared RX queue receives
> > > > > > > > > > > > packets from all member ports. Source port is 
> > > > > > > > > > > > identified by mbuf->port.
> > > > > > > > > > > > 
> > > > > > > > > > > > Port queue number in a shared group should be identical.
> > > > > > > > > > > > Queue index is
> > > > > > > > > > > > 1:1 mapped in shared group.
> > > > > > > > > > > > 
> > > > > > > > > > > > Share RX queue must be polled on single thread or core.
> > > > > > > > > > > > 
> > > > > > > > > > > > Multiple groups is supported by group ID.
> > > > > > > > > > > > 
> > > > > > > > > > > > Signed-off-by: Xueming Li 
> > > > > > > > > > > > Cc: Jerin Jacob 
> > > > > > > > > > > > ---
> > > > > > > > > > > > Rx queue object could be used as shared Rx queue object,
> > > > > > > > > > > > it's important to clear all queue control callback api 
> > > > > > > > > > > > that using queue object:
> > > > > > > > > > > > 
> > > > > > > > > > > > https://mails.dpdk.org/archives/dev/2021-July/215574.html
> > > > > > > > > > > 
> > > > > > > > > > > >  #undef RTE_RX_OFFLOAD_BIT2STR diff --git
> > > > > > > > > > > > a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h 
> > > > > > > > > > > > index
> > > > > > > > > > > > d2b27c351f..a578c9db9d 100644
> > > > > > > > > > > > --- a/lib/ethdev/rte_ethdev.h
> > > > > > > > > > > > +++ b/lib/ethdev/rte_ethdev.h
> > > > > > > > > > > > @@ -1047,6 +1047,7 @@ struct rte_eth_rxconf {
> > > > > > > > > > > > uint8_t rx_drop_en; /**< Drop packets if no 
> > > > > > > > > > > > descriptors are available. */
> > > > > > > > > > > > uint8_t rx_deferred_start; /**< Do not start 
> > > > > > > > > > > > queue with rte_eth_dev_start(). */
> > > > > > > > > > > > uint16_t rx_ns

Re: [dpdk-dev] [PATCH] net/i40e: fix remove MAC/VLAN addresses error

2021-09-28 Thread Kevin Traynor

On 28/09/2021 09:40, Robin Zhang wrote:

Firmware will return I40E_AQ_RC_ENOENT when try to delete non-existent
MAC/VLAN addresses from the HW filtering, this should not be considered as
an Admin Queue error. But in i40e_asq_send_command, it will return
I40E_ERR_ADMIN_QUEUE_ERROR if the return value of Admin Queue command
processed by Firmware is not I40E_AQ_RC_OK or I40E_AQ_RC_EBUSY.

Use i40e_aq_remove_macvlan_v2 instead so that we can get the corresponding
Admin Queue status, and not report as an error in DPDK when Firmware
return I40E_AQ_RC_ENOENT.

Fixes: 4861cde46116 ("i40e: new poll mode driver")



Is it relevant to stable releases (20.11/19.11) with earlier firmware?
https://git.dpdk.org/dpdk/tree/doc/guides/nics/i40e.rst#n101


Signed-off-by: Robin Zhang 
---
  drivers/net/i40e/i40e_ethdev.c | 15 +++
  1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index acbe7380b1..fdc9943034 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -7036,6 +7036,7 @@ i40e_remove_macvlan_filters(struct i40e_vsi *vsi,
int ret = I40E_SUCCESS;
struct i40e_hw *hw = I40E_VSI_TO_HW(vsi);
struct i40e_aqc_remove_macvlan_element_data *req_list;
+   enum i40e_admin_queue_err aq_status;
  
  	if (filter == NULL  || total == 0)

return I40E_ERR_PARAM;
@@ -7083,11 +7084,17 @@ i40e_remove_macvlan_filters(struct i40e_vsi *vsi,
req_list[i].flags = rte_cpu_to_le_16(flags);
}
  
-		ret = i40e_aq_remove_macvlan(hw, vsi->seid, req_list,

-   actual_num, NULL);
+   ret = i40e_aq_remove_macvlan_v2(hw, vsi->seid, req_list,
+   actual_num, NULL, &aq_status);
+
if (ret != I40E_SUCCESS) {
-   PMD_DRV_LOG(ERR, "Failed to remove macvlan filter");
-   goto DONE;
+   /* Do not report as an error when firmware returns 
ENOENT */
+   if (aq_status == I40E_AQ_RC_ENOENT) {
+   ret = I40E_SUCCESS;
+   } else {
+   PMD_DRV_LOG(ERR, "Failed to remove macvlan 
filter");
+   goto DONE;
+   }
}
num += actual_num;
} while (num < total);





Re: [dpdk-dev] [RFC 01/15] eventdev: make driver interface as internal

2021-09-28 Thread Jerin Jacob
On Tue, Aug 24, 2021 at 1:10 AM  wrote:
>
> From: Pavan Nikhilesh 
>
> Mark all the driver specific functions as internal, remove
> `rte` prefix from `struct rte_eventdev_ops`.
> Remove experimental tag from internal functions.
> Remove `eventdev_pmd.h` from non-internal header files.
>
> Signed-off-by: Pavan Nikhilesh 


Seems like ethdev side there is conscious with
https://patches.dpdk.org/project/dpdk/list/?series=19084
Could you respin the version similar to
https://patches.dpdk.org/project/dpdk/list/?series=19084 or the next
version v3 from Konstantin.
Since eventdev does not have a callback, largely this series aligns
with expected output. But please align function and structure name
etc with ethdev for next series. Marking as "Changes Requested".
Thanks for the rework.


Re: [dpdk-dev] [PATCH] net/virtio: do not use PMD log type

2021-09-28 Thread Maxime Coquelin




On 9/16/21 15:25, David Marchand wrote:

Fixes: 1982462eadea ("net/virtio: add Rx free threshold setting")
Cc: sta...@dpdk.org

Signed-off-by: David Marchand 
---
  drivers/net/virtio/virtio_rxtx.c | 8 
  1 file changed, 4 insertions(+), 4 deletions(-)



Reviewed-by: Maxime Coquelin 

Thanks,
Maxime



Re: [dpdk-dev] [PATCH 1/2] common/cnxk: setup nix and lbk in loop mode in 98xx

2021-09-28 Thread Jerin Jacob
On Fri, Jul 30, 2021 at 9:40 PM Harman Kalra  wrote:
>
> In case of 98xx, 2 NIX blocks and 4 LBK blocks are present. Moreover
> AF VFs are alternatively attached to NIX0 and NIX1 to ensure load
> balancing. To support loopback functionality between pairs NIX0/NIX1
> are attached to LBK1/LBK2 for transmission/reception respectively.
> But in this default configuration NIX blocks cannot receive the
> packets they sent from the same LBK, which is an important requirement
> as some ODP applications only uses one AF VF for loopback functionality.
> To support this scenario, NIX0 can use LBK0 (NIX1 - LBK3) by setting a
> loop flag while making LF alloc mailbox request.
>
> Signed-off-by: Harman Kalra 

Changed the subject to : common/cnxk: support loop mode for cn98xx

Series Acked-by: Jerin Jacob 
Series applied to dpdk-next-net-mrvl/for-next-net. Thanks.

> ---
>  drivers/common/cnxk/roc_mbox.h | 1 +
>  drivers/common/cnxk/roc_nix.c  | 5 -
>  2 files changed, 5 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/common/cnxk/roc_mbox.h b/drivers/common/cnxk/roc_mbox.h
> index b5da931b81..75d1ff1ef3 100644
> --- a/drivers/common/cnxk/roc_mbox.h
> +++ b/drivers/common/cnxk/roc_mbox.h
> @@ -723,6 +723,7 @@ struct nix_lf_alloc_req {
> uint64_t __io rx_cfg; /* See NIX_AF_LF(0..127)_RX_CFG */
> uint64_t __io way_mask;
>  #define NIX_LF_RSS_TAG_LSB_AS_ADDER BIT_ULL(0)
> +#define NIX_LF_LBK_BLK_SEL BIT_ULL(1)
> uint64_t flags;
>  };
>
> diff --git a/drivers/common/cnxk/roc_nix.c b/drivers/common/cnxk/roc_nix.c
> index 23d508b941..1621f77fb0 100644
> --- a/drivers/common/cnxk/roc_nix.c
> +++ b/drivers/common/cnxk/roc_nix.c
> @@ -145,9 +145,12 @@ roc_nix_lf_alloc(struct roc_nix *roc_nix, uint32_t 
> nb_rxq, uint32_t nb_txq,
> req->npa_func = idev_npa_pffunc_get();
> req->sso_func = idev_sso_pffunc_get();
> req->rx_cfg = rx_cfg;
> +   if (roc_nix_is_lbk(roc_nix) && roc_nix->enable_loop &&
> +   roc_model_is_cn98xx())
> +   req->flags = NIX_LF_LBK_BLK_SEL;
>
> if (!roc_nix->rss_tag_as_xor)
> -   req->flags = NIX_LF_RSS_TAG_LSB_AS_ADDER;
> +   req->flags |= NIX_LF_RSS_TAG_LSB_AS_ADDER;
>
> rc = mbox_process_msg(mbox, (void *)&rsp);
> if (rc)
> --
> 2.18.0
>


[dpdk-dev] [PATCH v3 0/6] Add SA lifetime in security

2021-09-28 Thread Anoob Joseph
Add SA lifetime configuration in security. SA lifetime tracking can be
offloaded on supported PMDs.

SA lifetime would cover soft & hard expiry in units of number of packets and
bytes. When SA soft expiry happens, the packet is successfuly processed but
with additional expiry notification. Crypto op structure, ``rte_crypto_op``
is updated to cover such notifications with lookaside protocol offloads.

SA hard expiration would cause IPsec processing to return an error.

PMDs crypto_cn10k, crypto_cn9k and crypto_octeontx2 are updated with their
respective lifetime tracking capabilities. Unit tests are added for soft and
hard expiry with number of packets.

Changes in v3:
- Removed explicit 0 setting of soft expiry configuration in
  ipsec-secgw (comment from Konstantin)

Changes in v2:
- Clear soft expiry configuration in ipsec-secgw
- Rebased on v3 of dependent series

Anoob Joseph (6):
  security: add SA lifetime configuration
  common/cnxk: support lifetime configuration
  crypto/octeontx2: add checks for life configuration
  test/crypto: add packets soft expiry tests
  test/crypto: add packets hard expiry tests
  examples/ipsec-secgw: clear soft expiry configuration

 app/test/test_cryptodev.c  | 38 +++-
 app/test/test_cryptodev_security_ipsec.c   | 40 +++--
 app/test/test_cryptodev_security_ipsec.h   |  5 +-
 .../test_cryptodev_security_ipsec_test_vectors.h   |  3 -
 doc/guides/rel_notes/deprecation.rst   |  5 --
 doc/guides/rel_notes/release_21_11.rst | 13 
 drivers/common/cnxk/cnxk_security.c| 70 ++
 drivers/crypto/cnxk/cn10k_cryptodev_ops.c  | 48 +++
 drivers/crypto/cnxk/cn9k_ipsec.c   |  6 +-
 drivers/crypto/octeontx2/otx2_ipsec_po.h   |  6 ++
 examples/ipsec-secgw/ipsec.c   |  1 -
 examples/ipsec-secgw/ipsec.h   |  2 -
 lib/cryptodev/rte_crypto.h | 18 +-
 lib/security/rte_security.h| 28 -
 14 files changed, 249 insertions(+), 34 deletions(-)

-- 
2.7.4



[dpdk-dev] [PATCH v3 1/6] security: add SA lifetime configuration

2021-09-28 Thread Anoob Joseph
Add SA lifetime configuration to register soft and hard expiry limits.
Expiry can be in units of number of packets or bytes. Crypto op
status is also updated to include new field, aux_flags, which can be
used to indicate cases such as soft expiry in case of lookaside
protocol operations.

In case of soft expiry, the packets are successfully IPsec processed but
the soft expiry would indicate that SA needs to be reconfigured. For
inline protocol capable ethdev, this would result in an eth event while
for lookaside protocol capable cryptodev, this can be communicated via
`rte_crypto_op.aux_flags` field.

In case of hard expiry, the packets will not be IPsec processed and
would result in error.

Signed-off-by: Anoob Joseph 
Acked-by: Konstantin Ananyev 

---
 .../test_cryptodev_security_ipsec_test_vectors.h   |  3 ---
 doc/guides/rel_notes/deprecation.rst   |  5 
 doc/guides/rel_notes/release_21_11.rst | 13 ++
 examples/ipsec-secgw/ipsec.c   |  2 +-
 examples/ipsec-secgw/ipsec.h   |  2 +-
 lib/cryptodev/rte_crypto.h | 18 +-
 lib/security/rte_security.h| 28 --
 7 files changed, 58 insertions(+), 13 deletions(-)

diff --git a/app/test/test_cryptodev_security_ipsec_test_vectors.h 
b/app/test/test_cryptodev_security_ipsec_test_vectors.h
index ae9cd24..38ea43d 100644
--- a/app/test/test_cryptodev_security_ipsec_test_vectors.h
+++ b/app/test/test_cryptodev_security_ipsec_test_vectors.h
@@ -98,7 +98,6 @@ struct ipsec_test_data pkt_aes_128_gcm = {
.proto = RTE_SECURITY_IPSEC_SA_PROTO_ESP,
.mode = RTE_SECURITY_IPSEC_SA_MODE_TUNNEL,
.tunnel.type = RTE_SECURITY_IPSEC_TUNNEL_IPV4,
-   .esn_soft_limit = 0,
.replay_win_sz = 0,
},
 
@@ -195,7 +194,6 @@ struct ipsec_test_data pkt_aes_192_gcm = {
.proto = RTE_SECURITY_IPSEC_SA_PROTO_ESP,
.mode = RTE_SECURITY_IPSEC_SA_MODE_TUNNEL,
.tunnel.type = RTE_SECURITY_IPSEC_TUNNEL_IPV4,
-   .esn_soft_limit = 0,
.replay_win_sz = 0,
},
 
@@ -295,7 +293,6 @@ struct ipsec_test_data pkt_aes_256_gcm = {
.proto = RTE_SECURITY_IPSEC_SA_PROTO_ESP,
.mode = RTE_SECURITY_IPSEC_SA_MODE_TUNNEL,
.tunnel.type = RTE_SECURITY_IPSEC_TUNNEL_IPV4,
-   .esn_soft_limit = 0,
.replay_win_sz = 0,
},
 
diff --git a/doc/guides/rel_notes/deprecation.rst 
b/doc/guides/rel_notes/deprecation.rst
index 70ef45e..69fbde0 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -275,8 +275,3 @@ Deprecation Notices
 * cmdline: ``cmdline`` structure will be made opaque to hide platform-specific
   content. On Linux and FreeBSD, supported prior to DPDK 20.11,
   original structure will be kept until DPDK 21.11.
-
-* cryptodev: The structure ``rte_crypto_op`` would be updated to reduce
-  reserved bytes to 2 (from 3), and use 1 byte to indicate warnings and other
-  information from the crypto/security operation. This field will be used to
-  communicate events such as soft expiry with IPsec in lookaside mode.
diff --git a/doc/guides/rel_notes/release_21_11.rst 
b/doc/guides/rel_notes/release_21_11.rst
index eef7f79..0b7ffa5 100644
--- a/doc/guides/rel_notes/release_21_11.rst
+++ b/doc/guides/rel_notes/release_21_11.rst
@@ -147,6 +147,13 @@ API Changes
   as it is for drivers only and should be private to DPDK, and not
   installed for app use.
 
+* cryptodev: use 1 reserved byte from ``rte_crypto_op`` for aux flags
+
+  * Updated the structure ``rte_crypto_op`` to reduce reserved bytes to
+  2 (from 3), and use 1 byte to indicate warnings and other information from
+  the crypto/security operation. This field will be used to communicate events
+  such as soft expiry with IPsec in lookaside mode.
+
 
 ABI Changes
 ---
@@ -168,6 +175,12 @@ ABI Changes
   * Added IPsec SA option to disable IV generation to allow known vector
 tests as well as usage of application provided IV on supported PMDs.
 
+* security: add IPsec SA lifetime configuration
+
+  * Added IPsec SA lifetime configuration to allow applications to configure
+soft and hard SA expiry limits. Limits can be either in units of packets or
+bytes.
+
 
 Known Issues
 
diff --git a/examples/ipsec-secgw/ipsec.c b/examples/ipsec-secgw/ipsec.c
index 5b032fe..4868294 100644
--- a/examples/ipsec-secgw/ipsec.c
+++ b/examples/ipsec-secgw/ipsec.c
@@ -49,7 +49,7 @@ set_ipsec_conf(struct ipsec_sa *sa, struct 
rte_security_ipsec_xform *ipsec)
}
/* TODO support for Transport */
}
-   ipsec->esn_soft_limit = IPSEC_OFFLOAD_ESN_SOFTLIMIT;
+   ipsec->life.packets_soft_limit = IPSEC_OFFLOAD_PKTS_SOFTLIMIT;
ipsec->replay_win_sz = app_sa_prm.window_size;
ipse

[dpdk-dev] [PATCH v3 2/6] common/cnxk: support lifetime configuration

2021-09-28 Thread Anoob Joseph
Add support for SA lifetime configuration. Expiry can
be either in units of octets or packets.

Also, updated cryptodev dequeue path to update crypto op result to
indicate soft expiry.

Signed-off-by: Anoob Joseph 

---
 drivers/common/cnxk/cnxk_security.c   | 70 +++
 drivers/crypto/cnxk/cn10k_cryptodev_ops.c | 48 -
 drivers/crypto/cnxk/cn9k_ipsec.c  |  6 ++-
 3 files changed, 112 insertions(+), 12 deletions(-)

diff --git a/drivers/common/cnxk/cnxk_security.c 
b/drivers/common/cnxk/cnxk_security.c
index 4f7fd1b..215d9fd 100644
--- a/drivers/common/cnxk/cnxk_security.c
+++ b/drivers/common/cnxk/cnxk_security.c
@@ -161,6 +161,26 @@ ot_ipsec_sa_common_param_fill(union roc_ot_ipsec_sa_word2 
*w2,
return -EINVAL;
}
 
+   if (ipsec_xfrm->life.packets_soft_limit != 0 ||
+   ipsec_xfrm->life.packets_hard_limit != 0) {
+   if (ipsec_xfrm->life.bytes_soft_limit != 0 ||
+   ipsec_xfrm->life.bytes_hard_limit != 0) {
+   plt_err("Expiry tracking with both packets & bytes is 
not supported");
+   return -EINVAL;
+   }
+   w2->s.life_unit = ROC_IE_OT_SA_LIFE_UNIT_PKTS;
+   }
+
+   if (ipsec_xfrm->life.bytes_soft_limit != 0 ||
+   ipsec_xfrm->life.bytes_hard_limit != 0) {
+   if (ipsec_xfrm->life.packets_soft_limit != 0 ||
+   ipsec_xfrm->life.packets_hard_limit != 0) {
+   plt_err("Expiry tracking with both packets & bytes is 
not supported");
+   return -EINVAL;
+   }
+   w2->s.life_unit = ROC_IE_OT_SA_LIFE_UNIT_OCTETS;
+   }
+
return 0;
 }
 
@@ -236,6 +256,31 @@ cnxk_ot_ipsec_inb_sa_fill(struct roc_ot_ipsec_inb_sa *sa,
 ROC_CTX_UNIT_128B) -
1;
 
+   /**
+* CPT MC triggers expiry when counter value changes from 2 to 1. To
+* mitigate this behaviour add 1 to the life counter values provided.
+*/
+
+   if (ipsec_xfrm->life.bytes_soft_limit) {
+   sa->ctx.soft_life = ipsec_xfrm->life.bytes_soft_limit + 1;
+   sa->w0.s.soft_life_dec = 1;
+   }
+
+   if (ipsec_xfrm->life.packets_soft_limit) {
+   sa->ctx.soft_life = ipsec_xfrm->life.packets_soft_limit + 1;
+   sa->w0.s.soft_life_dec = 1;
+   }
+
+   if (ipsec_xfrm->life.bytes_hard_limit) {
+   sa->ctx.hard_life = ipsec_xfrm->life.bytes_hard_limit + 1;
+   sa->w0.s.hard_life_dec = 1;
+   }
+
+   if (ipsec_xfrm->life.packets_hard_limit) {
+   sa->ctx.hard_life = ipsec_xfrm->life.packets_hard_limit + 1;
+   sa->w0.s.hard_life_dec = 1;
+   }
+
/* There are two words of CPT_CTX_HW_S for ucode to skip */
sa->w0.s.ctx_hdr_size = 1;
sa->w0.s.aop_valid = 1;
@@ -360,6 +405,31 @@ cnxk_ot_ipsec_outb_sa_fill(struct roc_ot_ipsec_outb_sa *sa,
/* IPID gen */
sa->w2.s.ipid_gen = 1;
 
+   /**
+* CPT MC triggers expiry when counter value changes from 2 to 1. To
+* mitigate this behaviour add 1 to the life counter values provided.
+*/
+
+   if (ipsec_xfrm->life.bytes_soft_limit) {
+   sa->ctx.soft_life = ipsec_xfrm->life.bytes_soft_limit + 1;
+   sa->w0.s.soft_life_dec = 1;
+   }
+
+   if (ipsec_xfrm->life.packets_soft_limit) {
+   sa->ctx.soft_life = ipsec_xfrm->life.packets_soft_limit + 1;
+   sa->w0.s.soft_life_dec = 1;
+   }
+
+   if (ipsec_xfrm->life.bytes_hard_limit) {
+   sa->ctx.hard_life = ipsec_xfrm->life.bytes_hard_limit + 1;
+   sa->w0.s.hard_life_dec = 1;
+   }
+
+   if (ipsec_xfrm->life.packets_hard_limit) {
+   sa->ctx.hard_life = ipsec_xfrm->life.packets_hard_limit + 1;
+   sa->w0.s.hard_life_dec = 1;
+   }
+
/* There are two words of CPT_CTX_HW_S for ucode to skip */
sa->w0.s.ctx_hdr_size = 1;
sa->w0.s.aop_valid = 1;
diff --git a/drivers/crypto/cnxk/cn10k_cryptodev_ops.c 
b/drivers/crypto/cnxk/cn10k_cryptodev_ops.c
index 3a1a4a2..3caf05a 100644
--- a/drivers/crypto/cnxk/cn10k_cryptodev_ops.c
+++ b/drivers/crypto/cnxk/cn10k_cryptodev_ops.c
@@ -348,12 +348,44 @@ cn10k_cpt_dequeue_post_process(struct cnxk_cpt_qp *qp,
   struct cpt_inflight_req *infl_req)
 {
struct cpt_cn10k_res_s *res = (struct cpt_cn10k_res_s *)&infl_req->res;
+   const uint8_t uc_compcode = res->uc_compcode;
+   const uint8_t compcode = res->compcode;
unsigned int sz;
 
-   if (likely(res->compcode == CPT_COMP_GOOD ||
-  res->compcode == CPT_COMP_WARN)) {
-   if (unlikely(res->uc_compcode)) {
-   if (res->uc_compcode == ROC_SE_ERR_GC_ICV_MISCOMPARE)
+   cop->status = RTE_CRYPTO_OP_STATUS_SUCCESS;
+
+   

[dpdk-dev] [PATCH v3 3/6] crypto/octeontx2: add checks for life configuration

2021-09-28 Thread Anoob Joseph
Lifetime tracking is not supported by hardware and is not implemented in
software either. Return failure when lifetime is configured.

Signed-off-by: Anoob Joseph 

---
 drivers/crypto/octeontx2/otx2_ipsec_po.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/crypto/octeontx2/otx2_ipsec_po.h 
b/drivers/crypto/octeontx2/otx2_ipsec_po.h
index b3e7456..b61c5e0 100644
--- a/drivers/crypto/octeontx2/otx2_ipsec_po.h
+++ b/drivers/crypto/octeontx2/otx2_ipsec_po.h
@@ -293,6 +293,12 @@ ipsec_po_xform_verify(struct rte_security_ipsec_xform 
*ipsec,
struct rte_crypto_sym_xform *auth_xform, *cipher_xform;
int ret;
 
+   if (ipsec->life.bytes_hard_limit != 0 ||
+   ipsec->life.bytes_soft_limit != 0 ||
+   ipsec->life.packets_hard_limit != 0 ||
+   ipsec->life.packets_soft_limit != 0)
+   return -ENOTSUP;
+
if (xform->type == RTE_CRYPTO_SYM_XFORM_AEAD)
return ipsec_po_xform_aead_verify(ipsec, xform);
 
-- 
2.7.4



[dpdk-dev] [PATCH v3 4/6] test/crypto: add packets soft expiry tests

2021-09-28 Thread Anoob Joseph
Add tests to validate packets soft expiry handling.

Signed-off-by: Anoob Joseph 

---
 app/test/test_cryptodev.c| 21 +++--
 app/test/test_cryptodev_security_ipsec.c | 18 --
 app/test/test_cryptodev_security_ipsec.h |  4 +++-
 3 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/app/test/test_cryptodev.c b/app/test/test_cryptodev.c
index f57a1a4..1befbeb 100644
--- a/app/test/test_cryptodev.c
+++ b/app/test/test_cryptodev.c
@@ -9045,7 +9045,7 @@ test_ipsec_proto_process(const struct ipsec_test_data 
td[],
/* Process crypto operation */
process_crypto_request(dev_id, ut_params->op);
 
-   ret = test_ipsec_status_check(ut_params->op, flags, dir);
+   ret = test_ipsec_status_check(ut_params->op, flags, dir, i + 1);
if (ret != TEST_SUCCESS)
goto crypto_op_free;
 
@@ -9115,7 +9115,8 @@ test_ipsec_proto_all(const struct ipsec_test_flags *flags)
unsigned int i, nb_pkts = 1, pass_cnt = 0;
int ret;
 
-   if (flags->iv_gen)
+   if (flags->iv_gen ||
+   flags->sa_expiry_pkts_soft)
nb_pkts = IPSEC_TEST_PACKETS_MAX;
 
for (i = 0; i < RTE_DIM(aead_list); i++) {
@@ -9180,6 +9181,18 @@ test_ipsec_proto_iv_gen(const void *data __rte_unused)
 }
 
 static int
+test_ipsec_proto_sa_exp_pkts_soft(const void *data __rte_unused)
+{
+   struct ipsec_test_flags flags;
+
+   memset(&flags, 0, sizeof(flags));
+
+   flags.sa_expiry_pkts_soft = true;
+
+   return test_ipsec_proto_all(&flags);
+}
+
+static int
 test_ipsec_proto_err_icv_corrupt(const void *data __rte_unused)
 {
struct ipsec_test_flags flags;
@@ -14136,6 +14149,10 @@ static struct unit_test_suite ipsec_proto_testsuite  = 
{
ut_setup_security, ut_teardown,
test_ipsec_proto_udp_encap),
TEST_CASE_NAMED_ST(
+   "SA expiry packets soft",
+   ut_setup_security, ut_teardown,
+   test_ipsec_proto_sa_exp_pkts_soft),
+   TEST_CASE_NAMED_ST(
"Negative test: ICV corruption",
ut_setup_security, ut_teardown,
test_ipsec_proto_err_icv_corrupt),
diff --git a/app/test/test_cryptodev_security_ipsec.c 
b/app/test/test_cryptodev_security_ipsec.c
index f371b15..56a44b5 100644
--- a/app/test/test_cryptodev_security_ipsec.c
+++ b/app/test/test_cryptodev_security_ipsec.c
@@ -173,6 +173,10 @@ test_ipsec_td_prepare(const struct crypto_param *param1,
 
if (flags->iv_gen)
td->ipsec_xform.options.iv_gen_disable = 0;
+
+   if (flags->sa_expiry_pkts_soft)
+   td->ipsec_xform.life.packets_soft_limit =
+   IPSEC_TEST_PACKETS_MAX - 1;
}
 
RTE_SET_USED(param2);
@@ -395,7 +399,8 @@ test_ipsec_post_process(struct rte_mbuf *m, const struct 
ipsec_test_data *td,
 int
 test_ipsec_status_check(struct rte_crypto_op *op,
const struct ipsec_test_flags *flags,
-   enum rte_security_ipsec_sa_direction dir)
+   enum rte_security_ipsec_sa_direction dir,
+   int pkt_num)
 {
int ret = TEST_SUCCESS;
 
@@ -406,7 +411,16 @@ test_ipsec_status_check(struct rte_crypto_op *op,
}
} else {
if (op->status != RTE_CRYPTO_OP_STATUS_SUCCESS) {
-   printf("Security op processing failed\n");
+   printf("Security op processing failed [pkt_num: %d]\n",
+  pkt_num);
+   ret = TEST_FAILED;
+   }
+   }
+
+   if (flags->sa_expiry_pkts_soft && pkt_num == IPSEC_TEST_PACKETS_MAX) {
+   if (!(op->aux_flags &
+ RTE_CRYPTO_OP_AUX_FLAGS_IPSEC_SOFT_EXPIRY)) {
+   printf("SA soft expiry (pkts) test failed\n");
ret = TEST_FAILED;
}
}
diff --git a/app/test/test_cryptodev_security_ipsec.h 
b/app/test/test_cryptodev_security_ipsec.h
index e1645f4..eed3476 100644
--- a/app/test/test_cryptodev_security_ipsec.h
+++ b/app/test/test_cryptodev_security_ipsec.h
@@ -49,6 +49,7 @@ struct ipsec_test_data {
 
 struct ipsec_test_flags {
bool display_alg;
+   bool sa_expiry_pkts_soft;
bool icv_corrupt;
bool iv_gen;
bool udp_encap;
@@ -114,6 +115,7 @@ int test_ipsec_post_process(struct rte_mbuf *m,
 
 int test_ipsec_status_check(struct rte_crypto_op *op,
const struct ipsec_test_flags *flags,
-   enum rte_security_ipsec_sa_direction dir);
+   enum rte_security_ipsec_sa_direction dir,
+   int pkt_num);
 
 #endif
-- 
2.7.4



[dpdk-dev] [PATCH v3 5/6] test/crypto: add packets hard expiry tests

2021-09-28 Thread Anoob Joseph
Add tests to validate packets hard expiry handling.

Signed-off-by: Anoob Joseph 

---
 app/test/test_cryptodev.c| 19 ++-
 app/test/test_cryptodev_security_ipsec.c | 22 +++---
 app/test/test_cryptodev_security_ipsec.h |  1 +
 3 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/app/test/test_cryptodev.c b/app/test/test_cryptodev.c
index 1befbeb..34b55a9 100644
--- a/app/test/test_cryptodev.c
+++ b/app/test/test_cryptodev.c
@@ -9116,7 +9116,8 @@ test_ipsec_proto_all(const struct ipsec_test_flags *flags)
int ret;
 
if (flags->iv_gen ||
-   flags->sa_expiry_pkts_soft)
+   flags->sa_expiry_pkts_soft ||
+   flags->sa_expiry_pkts_hard)
nb_pkts = IPSEC_TEST_PACKETS_MAX;
 
for (i = 0; i < RTE_DIM(aead_list); i++) {
@@ -9193,6 +9194,18 @@ test_ipsec_proto_sa_exp_pkts_soft(const void *data 
__rte_unused)
 }
 
 static int
+test_ipsec_proto_sa_exp_pkts_hard(const void *data __rte_unused)
+{
+   struct ipsec_test_flags flags;
+
+   memset(&flags, 0, sizeof(flags));
+
+   flags.sa_expiry_pkts_hard = true;
+
+   return test_ipsec_proto_all(&flags);
+}
+
+static int
 test_ipsec_proto_err_icv_corrupt(const void *data __rte_unused)
 {
struct ipsec_test_flags flags;
@@ -14153,6 +14166,10 @@ static struct unit_test_suite ipsec_proto_testsuite  = 
{
ut_setup_security, ut_teardown,
test_ipsec_proto_sa_exp_pkts_soft),
TEST_CASE_NAMED_ST(
+   "SA expiry packets hard",
+   ut_setup_security, ut_teardown,
+   test_ipsec_proto_sa_exp_pkts_hard),
+   TEST_CASE_NAMED_ST(
"Negative test: ICV corruption",
ut_setup_security, ut_teardown,
test_ipsec_proto_err_icv_corrupt),
diff --git a/app/test/test_cryptodev_security_ipsec.c 
b/app/test/test_cryptodev_security_ipsec.c
index 56a44b5..046536c 100644
--- a/app/test/test_cryptodev_security_ipsec.c
+++ b/app/test/test_cryptodev_security_ipsec.c
@@ -200,6 +200,10 @@ test_ipsec_td_update(struct ipsec_test_data td_inb[],
td_inb[i].input_text.data[icv_pos] += 1;
}
 
+   if (flags->sa_expiry_pkts_hard)
+   td_inb[i].ipsec_xform.life.packets_hard_limit =
+   IPSEC_TEST_PACKETS_MAX - 1;
+
if (flags->udp_encap)
td_inb[i].ipsec_xform.options.udp_encap = 1;
 
@@ -285,9 +289,10 @@ test_ipsec_td_verify(struct rte_mbuf *m, const struct 
ipsec_test_data *td,
uint8_t *output_text = rte_pktmbuf_mtod(m, uint8_t *);
uint32_t skip, len = rte_pktmbuf_pkt_len(m);
 
-   /* For negative tests, no need to do verification */
-   if (flags->icv_corrupt &&
-   td->ipsec_xform.direction == RTE_SECURITY_IPSEC_SA_DIR_INGRESS)
+   /* For tests with status as error for test success, skip verification */
+   if (td->ipsec_xform.direction == RTE_SECURITY_IPSEC_SA_DIR_INGRESS &&
+   (flags->icv_corrupt ||
+flags->sa_expiry_pkts_hard))
return TEST_SUCCESS;
 
if (td->ipsec_xform.direction == RTE_SECURITY_IPSEC_SA_DIR_EGRESS &&
@@ -404,6 +409,17 @@ test_ipsec_status_check(struct rte_crypto_op *op,
 {
int ret = TEST_SUCCESS;
 
+   if (dir == RTE_SECURITY_IPSEC_SA_DIR_INGRESS &&
+   flags->sa_expiry_pkts_hard &&
+   pkt_num == IPSEC_TEST_PACKETS_MAX) {
+   if (op->status != RTE_CRYPTO_OP_STATUS_ERROR) {
+   printf("SA hard expiry (pkts) test failed\n");
+   return TEST_FAILED;
+   } else {
+   return TEST_SUCCESS;
+   }
+   }
+
if (dir == RTE_SECURITY_IPSEC_SA_DIR_INGRESS && flags->icv_corrupt) {
if (op->status != RTE_CRYPTO_OP_STATUS_ERROR) {
printf("ICV corruption test case failed\n");
diff --git a/app/test/test_cryptodev_security_ipsec.h 
b/app/test/test_cryptodev_security_ipsec.h
index eed3476..18f3c64 100644
--- a/app/test/test_cryptodev_security_ipsec.h
+++ b/app/test/test_cryptodev_security_ipsec.h
@@ -50,6 +50,7 @@ struct ipsec_test_data {
 struct ipsec_test_flags {
bool display_alg;
bool sa_expiry_pkts_soft;
+   bool sa_expiry_pkts_hard;
bool icv_corrupt;
bool iv_gen;
bool udp_encap;
-- 
2.7.4



[dpdk-dev] [PATCH v3 6/6] examples/ipsec-secgw: clear soft expiry configuration

2021-09-28 Thread Anoob Joseph
Soft expiry is not a mandatory IPsec feature. It is verified separately
with IPsec unit tests. So configuration of the same is not required.
Also, soft expiry tracking can cause perf degradation with some PMDs.
Since a separate UT is available and the same setting in ipsec-secgw is
not verifying the functionality, remove the same by clearing life
configuration.

Signed-off-by: Anoob Joseph 

---
 examples/ipsec-secgw/ipsec.c | 1 -
 examples/ipsec-secgw/ipsec.h | 2 --
 2 files changed, 3 deletions(-)

diff --git a/examples/ipsec-secgw/ipsec.c b/examples/ipsec-secgw/ipsec.c
index 4868294..6817139 100644
--- a/examples/ipsec-secgw/ipsec.c
+++ b/examples/ipsec-secgw/ipsec.c
@@ -49,7 +49,6 @@ set_ipsec_conf(struct ipsec_sa *sa, struct 
rte_security_ipsec_xform *ipsec)
}
/* TODO support for Transport */
}
-   ipsec->life.packets_soft_limit = IPSEC_OFFLOAD_PKTS_SOFTLIMIT;
ipsec->replay_win_sz = app_sa_prm.window_size;
ipsec->options.esn = app_sa_prm.enable_esn;
ipsec->options.udp_encap = sa->udp_encap;
diff --git a/examples/ipsec-secgw/ipsec.h b/examples/ipsec-secgw/ipsec.h
index 90c81c1..8405c48 100644
--- a/examples/ipsec-secgw/ipsec.h
+++ b/examples/ipsec-secgw/ipsec.h
@@ -23,8 +23,6 @@
 
 #define MAX_DIGEST_SIZE 32 /* Bytes -- 256 bits */
 
-#define IPSEC_OFFLOAD_PKTS_SOFTLIMIT 0xff00
-
 #define IV_OFFSET  (sizeof(struct rte_crypto_op) + \
sizeof(struct rte_crypto_sym_op))
 
-- 
2.7.4



Re: [dpdk-dev] [PATCH v2] net/virtio: fix virtio-user init when using existing tap

2021-09-28 Thread Maxime Coquelin




On 9/28/21 10:51, David Marchand wrote:

When attaching to an existing mono queue tap, the virtio-user was not
reporting that the virtio device was not properly initialised which
prevented from starting the port later.

$ ip tuntap add test mode tap
$ dpdk-testpmd --vdev \
   net_virtio_user0,iface=test,path=/dev/vhost-net,queues=2 -- -i

...
virtio_user_dev_init_mac(): (/dev/vhost-net) No valid MAC in devargs or
device, use random
vhost_kernel_open_tap(): TUNSETIFF failed: Invalid argument
vhost_kernel_enable_queue_pair(): fail to open tap for vhost kernel
virtio_user_start_device(): (/dev/vhost-net) Failed to start device
...
Configuring Port 0 (socket 0)
vhost_kernel_open_tap(): TUNSETIFF failed: Invalid argument
vhost_kernel_enable_queue_pair(): fail to open tap for vhost kernel
virtio_set_multiple_queues(): Multiqueue configured but send command
failed, this is too late now...
Fail to start port 0: Invalid argument
Please stop the ports first
Done

The virtio-user with vhost-kernel backend was going through a lot
of complications to initialise tap fds only when using them.

For each qp enabled for the first time, a tapfd was created via
TUNSETIFF with unneeded additional steps (see below) and then mapped to
the right qp in the vhost-net backend.
Unneeded steps (as long as it has been done once for the port):
- tap features were queried while this is a constant on a running
   system,
- the device name in DPDK was updated,
- the mac address of the tap was set,

On subsequent qps state change, the vhost-net backend fd mapping was
updated and the associated queue/tapfd were disabled/enabled via
TUNSETQUEUE.

Now, this patch simplifies the whole logic by keeping all tapfds opened
and in enabled state (from the tap point of view) at all time.

Unused ioctl defines are removed.

Tap features are validated earlier to fail initialisation asap.
Tap name discovery and mac address configuration are moved when
configuring qp 0.

To support attaching to mono queue tap, the virtio-user driver now tries
to attach in multi queue first, then fallbacks to mono queue.

Finally (but this is more for consistency), VIRTIO_NET_F_MQ feature is
exposed only if the underlying tap supports multi queue.

Signed-off-by: David Marchand 
---
Changes since v1:
- refactored tap_open() following Olivier comment and updated log
   messages level accordingly,
- added more error logs,

---

  drivers/net/virtio/virtio_user/vhost_kernel.c |  92 +
  .../net/virtio/virtio_user/vhost_kernel_tap.c | 180 +-
  .../net/virtio/virtio_user/vhost_kernel_tap.h |  16 +-
  3 files changed, 153 insertions(+), 135 deletions(-)



Nice, thanks for the detailed commit message.

Reviewed-by: Maxime Coquelin 

Thanks,
Maxime



Re: [dpdk-dev] [PATCH] net/i40e: fix remove MAC/VLAN addresses error

2021-09-28 Thread Zhang, RobinX
Hi,

> -Original Message-
> From: Kevin Traynor 
> Sent: Tuesday, September 28, 2021 5:54 PM
> To: Zhang, RobinX ; dev@dpdk.org
> Cc: Xing, Beilei ; Guo, Junfeng
> ; Yang, SteveX 
> Subject: Re: [dpdk-dev] [PATCH] net/i40e: fix remove MAC/VLAN addresses
> error
> 
> On 28/09/2021 09:40, Robin Zhang wrote:
> > Firmware will return I40E_AQ_RC_ENOENT when try to delete non-
> existent
> > MAC/VLAN addresses from the HW filtering, this should not be
> > considered as an Admin Queue error. But in i40e_asq_send_command, it
> > will return I40E_ERR_ADMIN_QUEUE_ERROR if the return value of Admin
> > Queue command processed by Firmware is not I40E_AQ_RC_OK or
> I40E_AQ_RC_EBUSY.
> >
> > Use i40e_aq_remove_macvlan_v2 instead so that we can get the
> > corresponding Admin Queue status, and not report as an error in DPDK
> > when Firmware return I40E_AQ_RC_ENOENT.
> >
> > Fixes: 4861cde46116 ("i40e: new poll mode driver")
> >
> 
> Is it relevant to stable releases (20.11/19.11) with earlier firmware?
> https://git.dpdk.org/dpdk/tree/doc/guides/nics/i40e.rst#n101
> 

No, i40e_aq_remove_macvlan_v2 is added in latest i40e share code.
So this patch cannot cc stable either.

> > Signed-off-by: Robin Zhang 
> > ---
> >   drivers/net/i40e/i40e_ethdev.c | 15 +++
> >   1 file changed, 11 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/net/i40e/i40e_ethdev.c
> > b/drivers/net/i40e/i40e_ethdev.c index acbe7380b1..fdc9943034 100644
> > --- a/drivers/net/i40e/i40e_ethdev.c
> > +++ b/drivers/net/i40e/i40e_ethdev.c
> > @@ -7036,6 +7036,7 @@ i40e_remove_macvlan_filters(struct i40e_vsi *vsi,
> > int ret = I40E_SUCCESS;
> > struct i40e_hw *hw = I40E_VSI_TO_HW(vsi);
> > struct i40e_aqc_remove_macvlan_element_data *req_list;
> > +   enum i40e_admin_queue_err aq_status;
> >
> > if (filter == NULL  || total == 0)
> > return I40E_ERR_PARAM;
> > @@ -7083,11 +7084,17 @@ i40e_remove_macvlan_filters(struct i40e_vsi
> *vsi,
> > req_list[i].flags = rte_cpu_to_le_16(flags);
> > }
> >
> > -   ret = i40e_aq_remove_macvlan(hw, vsi->seid, req_list,
> > -   actual_num, NULL);
> > +   ret = i40e_aq_remove_macvlan_v2(hw, vsi->seid, req_list,
> > +   actual_num, NULL,
> &aq_status);
> > +
> > if (ret != I40E_SUCCESS) {
> > -   PMD_DRV_LOG(ERR, "Failed to remove macvlan
> filter");
> > -   goto DONE;
> > +   /* Do not report as an error when firmware returns
> ENOENT */
> > +   if (aq_status == I40E_AQ_RC_ENOENT) {
> > +   ret = I40E_SUCCESS;
> > +   } else {
> > +   PMD_DRV_LOG(ERR, "Failed to remove
> macvlan filter");
> > +   goto DONE;
> > +   }
> > }
> > num += actual_num;
> > } while (num < total);
> >



Re: [dpdk-dev] [PATCH v7 0/2] update gtp psc to use net hdr

2021-09-28 Thread Ferruh Yigit
On 8/23/2021 11:55 AM, Raslan Darawsheh wrote:
> This series add header definition for gtp_psc in the net
> library, and update the relevant rte_flow_item gtp_psc to
> use this new header definition.
> 
> Raslan Darawsheh (2):
>   net: add new ext hdr for gtp psc
>   ethdev: use ext hdr for gtp psc item
> 

Rebased on top latest next-net, need to update 'iavf_fdir.c'.

Series applied to dpdk-next-net/main, thanks.


Re: [dpdk-dev] [EXT] [PATCH] cryptodev: add telemetry callbacks

2021-09-28 Thread Akhil Goyal
> The cryptodev library now registers commands with telemetry, and
> implements the corresponding callback functions. These commands
> allow a list of cryptodevs and stats for a cryptodev to be
> queried.
> 
> An example usage can be seen below:
> 
> Connecting to /var/run/dpdk/rte/dpdk_telemetry.v2
> {"version": "DPDK 21.11.0-rc0", "pid": 1135019, "max_output_len": 16384}
> --> /
> {"/": ["/", "/cryptodev/list", "/cryptodev/stats", ...]}
> --> /cryptodev/list
> {"/cryptodev/list": {":1a:01.0_qat_sym": 0, ":1a:01.0_qat_asym": \
>   1}}
> --> /cryptodev/stats,0
> {"/cryptodev/stats": {"enqueued_count": 0, "dequeued_count": 0, \
>   "enqueue_err_count": 0, "dequeue_err_count": 0}}
> 
> Signed-off-by: Rebecca Troy 
> ---
Can we add documentation in cryptodevs.rst?


Re: [dpdk-dev] [EXT] [PATCH] cryptodev: add telemetry callbacks

2021-09-28 Thread Akhil Goyal
> The cryptodev library now registers commands with telemetry, and
> implements the corresponding callback functions. These commands
> allow a list of cryptodevs and stats for a cryptodev to be
> queried.
> 
> An example usage can be seen below:
> 
> Connecting to /var/run/dpdk/rte/dpdk_telemetry.v2
> {"version": "DPDK 21.11.0-rc0", "pid": 1135019, "max_output_len": 16384}
> --> /
> {"/": ["/", "/cryptodev/list", "/cryptodev/stats", ...]}
> --> /cryptodev/list
> {"/cryptodev/list": {":1a:01.0_qat_sym": 0, ":1a:01.0_qat_asym": \
>   1}}
> --> /cryptodev/stats,0
> {"/cryptodev/stats": {"enqueued_count": 0, "dequeued_count": 0, \
>   "enqueue_err_count": 0, "dequeue_err_count": 0}}
> 
> Signed-off-by: Rebecca Troy 
> ---
>  lib/cryptodev/rte_cryptodev.c | 62
> +++
Release notes also missing.


[dpdk-dev] [PATCH v4 0/6] Add SA lifetime in security

2021-09-28 Thread Anoob Joseph
Add SA lifetime configuration in security. SA lifetime tracking can be
offloaded on supported PMDs.

SA lifetime would cover soft & hard expiry in units of number of packets and
bytes. When SA soft expiry happens, the packet is successfuly processed but
with additional expiry notification. Crypto op structure, ``rte_crypto_op``
is updated to cover such notifications with lookaside protocol offloads.

SA hard expiration would cause IPsec processing to return an error.

PMDs crypto_cn10k, crypto_cn9k and crypto_octeontx2 are updated with their
respective lifetime tracking capabilities. Unit tests are added for soft and
hard expiry with number of packets.

Changes in v4:
- Removed extra comments around auxilliary flag macros
  (comment from Konstantin)

Changes in v3:
- Removed explicit 0 setting of soft expiry configuration in
  ipsec-secgw (comment from Konstantin)

Changes in v2:
- Clear soft expiry configuration in ipsec-secgw
- Rebased on v3 of dependent series

Anoob Joseph (6):
  security: add SA lifetime configuration
  common/cnxk: support lifetime configuration
  crypto/octeontx2: add checks for life configuration
  test/crypto: add packets soft expiry cases
  test/crypto: add packets hard expiry cases
  examples/ipsec-secgw: clear soft expiry configuration

 app/test/test_cryptodev.c  | 38 +++-
 app/test/test_cryptodev_security_ipsec.c   | 40 +++--
 app/test/test_cryptodev_security_ipsec.h   |  5 +-
 .../test_cryptodev_security_ipsec_test_vectors.h   |  3 -
 doc/guides/rel_notes/deprecation.rst   |  5 --
 doc/guides/rel_notes/release_21_11.rst | 13 
 drivers/common/cnxk/cnxk_security.c| 70 ++
 drivers/crypto/cnxk/cn10k_cryptodev_ops.c  | 48 +++
 drivers/crypto/cnxk/cn9k_ipsec.c   |  6 +-
 drivers/crypto/octeontx2/otx2_ipsec_po.h   |  6 ++
 examples/ipsec-secgw/ipsec.c   |  1 -
 examples/ipsec-secgw/ipsec.h   |  2 -
 lib/cryptodev/rte_crypto.h | 12 +++-
 lib/security/rte_security.h| 28 -
 14 files changed, 243 insertions(+), 34 deletions(-)

-- 
2.7.4



[dpdk-dev] [PATCH v4 1/6] security: add SA lifetime configuration

2021-09-28 Thread Anoob Joseph
Add SA lifetime configuration to register soft and hard expiry limits.
Expiry can be in units of number of packets or bytes. Crypto op
status is also updated to include new field, aux_flags, which can be
used to indicate cases such as soft expiry in case of lookaside
protocol operations.

In case of soft expiry, the packets are successfully IPsec processed but
the soft expiry would indicate that SA needs to be reconfigured. For
inline protocol capable ethdev, this would result in an eth event while
for lookaside protocol capable cryptodev, this can be communicated via
`rte_crypto_op.aux_flags` field.

In case of hard expiry, the packets will not be IPsec processed and
would result in error.

Signed-off-by: Anoob Joseph 
Acked-by: Konstantin Ananyev 

---
 .../test_cryptodev_security_ipsec_test_vectors.h   |  3 ---
 doc/guides/rel_notes/deprecation.rst   |  5 
 doc/guides/rel_notes/release_21_11.rst | 13 ++
 examples/ipsec-secgw/ipsec.c   |  2 +-
 examples/ipsec-secgw/ipsec.h   |  2 +-
 lib/cryptodev/rte_crypto.h | 12 +-
 lib/security/rte_security.h| 28 --
 7 files changed, 52 insertions(+), 13 deletions(-)

diff --git a/app/test/test_cryptodev_security_ipsec_test_vectors.h 
b/app/test/test_cryptodev_security_ipsec_test_vectors.h
index ae9cd24..38ea43d 100644
--- a/app/test/test_cryptodev_security_ipsec_test_vectors.h
+++ b/app/test/test_cryptodev_security_ipsec_test_vectors.h
@@ -98,7 +98,6 @@ struct ipsec_test_data pkt_aes_128_gcm = {
.proto = RTE_SECURITY_IPSEC_SA_PROTO_ESP,
.mode = RTE_SECURITY_IPSEC_SA_MODE_TUNNEL,
.tunnel.type = RTE_SECURITY_IPSEC_TUNNEL_IPV4,
-   .esn_soft_limit = 0,
.replay_win_sz = 0,
},
 
@@ -195,7 +194,6 @@ struct ipsec_test_data pkt_aes_192_gcm = {
.proto = RTE_SECURITY_IPSEC_SA_PROTO_ESP,
.mode = RTE_SECURITY_IPSEC_SA_MODE_TUNNEL,
.tunnel.type = RTE_SECURITY_IPSEC_TUNNEL_IPV4,
-   .esn_soft_limit = 0,
.replay_win_sz = 0,
},
 
@@ -295,7 +293,6 @@ struct ipsec_test_data pkt_aes_256_gcm = {
.proto = RTE_SECURITY_IPSEC_SA_PROTO_ESP,
.mode = RTE_SECURITY_IPSEC_SA_MODE_TUNNEL,
.tunnel.type = RTE_SECURITY_IPSEC_TUNNEL_IPV4,
-   .esn_soft_limit = 0,
.replay_win_sz = 0,
},
 
diff --git a/doc/guides/rel_notes/deprecation.rst 
b/doc/guides/rel_notes/deprecation.rst
index 70ef45e..69fbde0 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -275,8 +275,3 @@ Deprecation Notices
 * cmdline: ``cmdline`` structure will be made opaque to hide platform-specific
   content. On Linux and FreeBSD, supported prior to DPDK 20.11,
   original structure will be kept until DPDK 21.11.
-
-* cryptodev: The structure ``rte_crypto_op`` would be updated to reduce
-  reserved bytes to 2 (from 3), and use 1 byte to indicate warnings and other
-  information from the crypto/security operation. This field will be used to
-  communicate events such as soft expiry with IPsec in lookaside mode.
diff --git a/doc/guides/rel_notes/release_21_11.rst 
b/doc/guides/rel_notes/release_21_11.rst
index c93cc20..114631e 100644
--- a/doc/guides/rel_notes/release_21_11.rst
+++ b/doc/guides/rel_notes/release_21_11.rst
@@ -152,6 +152,13 @@ API Changes
   as it is for drivers only and should be private to DPDK, and not
   installed for app use.
 
+* cryptodev: use 1 reserved byte from ``rte_crypto_op`` for aux flags
+
+  * Updated the structure ``rte_crypto_op`` to reduce reserved bytes to
+  2 (from 3), and use 1 byte to indicate warnings and other information from
+  the crypto/security operation. This field will be used to communicate events
+  such as soft expiry with IPsec in lookaside mode.
+
 
 ABI Changes
 ---
@@ -174,6 +181,12 @@ ABI Changes
   have much processing in PMD specific callbacks but just 64-bit set/get.
   This avoids a per pkt function pointer jump overhead for such PMD's.
 
+* security: add IPsec SA lifetime configuration
+
+  * Added IPsec SA lifetime configuration to allow applications to configure
+soft and hard SA expiry limits. Limits can be either in units of packets or
+bytes.
+
 
 Known Issues
 
diff --git a/examples/ipsec-secgw/ipsec.c b/examples/ipsec-secgw/ipsec.c
index 5b032fe..4868294 100644
--- a/examples/ipsec-secgw/ipsec.c
+++ b/examples/ipsec-secgw/ipsec.c
@@ -49,7 +49,7 @@ set_ipsec_conf(struct ipsec_sa *sa, struct 
rte_security_ipsec_xform *ipsec)
}
/* TODO support for Transport */
}
-   ipsec->esn_soft_limit = IPSEC_OFFLOAD_ESN_SOFTLIMIT;
+   ipsec->life.packets_soft_limit = IPSEC_OFFLOAD_PKTS_SOFTLIMIT;
ipsec->replay_win_sz = app_sa_prm.window_size;
ipsec->op

[dpdk-dev] [PATCH v4 2/6] common/cnxk: support lifetime configuration

2021-09-28 Thread Anoob Joseph
Add support for SA lifetime configuration. Expiry can
be either in units of octets or packets.

Also, updated cryptodev dequeue path to update crypto op result to
indicate soft expiry.

Signed-off-by: Anoob Joseph 

---
 drivers/common/cnxk/cnxk_security.c   | 70 +++
 drivers/crypto/cnxk/cn10k_cryptodev_ops.c | 48 -
 drivers/crypto/cnxk/cn9k_ipsec.c  |  6 ++-
 3 files changed, 112 insertions(+), 12 deletions(-)

diff --git a/drivers/common/cnxk/cnxk_security.c 
b/drivers/common/cnxk/cnxk_security.c
index 4f7fd1b..215d9fd 100644
--- a/drivers/common/cnxk/cnxk_security.c
+++ b/drivers/common/cnxk/cnxk_security.c
@@ -161,6 +161,26 @@ ot_ipsec_sa_common_param_fill(union roc_ot_ipsec_sa_word2 
*w2,
return -EINVAL;
}
 
+   if (ipsec_xfrm->life.packets_soft_limit != 0 ||
+   ipsec_xfrm->life.packets_hard_limit != 0) {
+   if (ipsec_xfrm->life.bytes_soft_limit != 0 ||
+   ipsec_xfrm->life.bytes_hard_limit != 0) {
+   plt_err("Expiry tracking with both packets & bytes is 
not supported");
+   return -EINVAL;
+   }
+   w2->s.life_unit = ROC_IE_OT_SA_LIFE_UNIT_PKTS;
+   }
+
+   if (ipsec_xfrm->life.bytes_soft_limit != 0 ||
+   ipsec_xfrm->life.bytes_hard_limit != 0) {
+   if (ipsec_xfrm->life.packets_soft_limit != 0 ||
+   ipsec_xfrm->life.packets_hard_limit != 0) {
+   plt_err("Expiry tracking with both packets & bytes is 
not supported");
+   return -EINVAL;
+   }
+   w2->s.life_unit = ROC_IE_OT_SA_LIFE_UNIT_OCTETS;
+   }
+
return 0;
 }
 
@@ -236,6 +256,31 @@ cnxk_ot_ipsec_inb_sa_fill(struct roc_ot_ipsec_inb_sa *sa,
 ROC_CTX_UNIT_128B) -
1;
 
+   /**
+* CPT MC triggers expiry when counter value changes from 2 to 1. To
+* mitigate this behaviour add 1 to the life counter values provided.
+*/
+
+   if (ipsec_xfrm->life.bytes_soft_limit) {
+   sa->ctx.soft_life = ipsec_xfrm->life.bytes_soft_limit + 1;
+   sa->w0.s.soft_life_dec = 1;
+   }
+
+   if (ipsec_xfrm->life.packets_soft_limit) {
+   sa->ctx.soft_life = ipsec_xfrm->life.packets_soft_limit + 1;
+   sa->w0.s.soft_life_dec = 1;
+   }
+
+   if (ipsec_xfrm->life.bytes_hard_limit) {
+   sa->ctx.hard_life = ipsec_xfrm->life.bytes_hard_limit + 1;
+   sa->w0.s.hard_life_dec = 1;
+   }
+
+   if (ipsec_xfrm->life.packets_hard_limit) {
+   sa->ctx.hard_life = ipsec_xfrm->life.packets_hard_limit + 1;
+   sa->w0.s.hard_life_dec = 1;
+   }
+
/* There are two words of CPT_CTX_HW_S for ucode to skip */
sa->w0.s.ctx_hdr_size = 1;
sa->w0.s.aop_valid = 1;
@@ -360,6 +405,31 @@ cnxk_ot_ipsec_outb_sa_fill(struct roc_ot_ipsec_outb_sa *sa,
/* IPID gen */
sa->w2.s.ipid_gen = 1;
 
+   /**
+* CPT MC triggers expiry when counter value changes from 2 to 1. To
+* mitigate this behaviour add 1 to the life counter values provided.
+*/
+
+   if (ipsec_xfrm->life.bytes_soft_limit) {
+   sa->ctx.soft_life = ipsec_xfrm->life.bytes_soft_limit + 1;
+   sa->w0.s.soft_life_dec = 1;
+   }
+
+   if (ipsec_xfrm->life.packets_soft_limit) {
+   sa->ctx.soft_life = ipsec_xfrm->life.packets_soft_limit + 1;
+   sa->w0.s.soft_life_dec = 1;
+   }
+
+   if (ipsec_xfrm->life.bytes_hard_limit) {
+   sa->ctx.hard_life = ipsec_xfrm->life.bytes_hard_limit + 1;
+   sa->w0.s.hard_life_dec = 1;
+   }
+
+   if (ipsec_xfrm->life.packets_hard_limit) {
+   sa->ctx.hard_life = ipsec_xfrm->life.packets_hard_limit + 1;
+   sa->w0.s.hard_life_dec = 1;
+   }
+
/* There are two words of CPT_CTX_HW_S for ucode to skip */
sa->w0.s.ctx_hdr_size = 1;
sa->w0.s.aop_valid = 1;
diff --git a/drivers/crypto/cnxk/cn10k_cryptodev_ops.c 
b/drivers/crypto/cnxk/cn10k_cryptodev_ops.c
index 3a1a4a2..3caf05a 100644
--- a/drivers/crypto/cnxk/cn10k_cryptodev_ops.c
+++ b/drivers/crypto/cnxk/cn10k_cryptodev_ops.c
@@ -348,12 +348,44 @@ cn10k_cpt_dequeue_post_process(struct cnxk_cpt_qp *qp,
   struct cpt_inflight_req *infl_req)
 {
struct cpt_cn10k_res_s *res = (struct cpt_cn10k_res_s *)&infl_req->res;
+   const uint8_t uc_compcode = res->uc_compcode;
+   const uint8_t compcode = res->compcode;
unsigned int sz;
 
-   if (likely(res->compcode == CPT_COMP_GOOD ||
-  res->compcode == CPT_COMP_WARN)) {
-   if (unlikely(res->uc_compcode)) {
-   if (res->uc_compcode == ROC_SE_ERR_GC_ICV_MISCOMPARE)
+   cop->status = RTE_CRYPTO_OP_STATUS_SUCCESS;
+
+   

[dpdk-dev] [PATCH v4 3/6] crypto/octeontx2: add checks for life configuration

2021-09-28 Thread Anoob Joseph
Lifetime tracking is not supported by hardware and is not implemented in
software either. Return failure when lifetime is configured.

Signed-off-by: Anoob Joseph 

---
 drivers/crypto/octeontx2/otx2_ipsec_po.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/crypto/octeontx2/otx2_ipsec_po.h 
b/drivers/crypto/octeontx2/otx2_ipsec_po.h
index b3e7456..b61c5e0 100644
--- a/drivers/crypto/octeontx2/otx2_ipsec_po.h
+++ b/drivers/crypto/octeontx2/otx2_ipsec_po.h
@@ -293,6 +293,12 @@ ipsec_po_xform_verify(struct rte_security_ipsec_xform 
*ipsec,
struct rte_crypto_sym_xform *auth_xform, *cipher_xform;
int ret;
 
+   if (ipsec->life.bytes_hard_limit != 0 ||
+   ipsec->life.bytes_soft_limit != 0 ||
+   ipsec->life.packets_hard_limit != 0 ||
+   ipsec->life.packets_soft_limit != 0)
+   return -ENOTSUP;
+
if (xform->type == RTE_CRYPTO_SYM_XFORM_AEAD)
return ipsec_po_xform_aead_verify(ipsec, xform);
 
-- 
2.7.4



[dpdk-dev] [PATCH v4 4/6] test/crypto: add packets soft expiry cases

2021-09-28 Thread Anoob Joseph
Add tests to validate packets soft expiry handling.

Signed-off-by: Anoob Joseph 

---
 app/test/test_cryptodev.c| 21 +++--
 app/test/test_cryptodev_security_ipsec.c | 18 --
 app/test/test_cryptodev_security_ipsec.h |  4 +++-
 3 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/app/test/test_cryptodev.c b/app/test/test_cryptodev.c
index f57a1a4..1befbeb 100644
--- a/app/test/test_cryptodev.c
+++ b/app/test/test_cryptodev.c
@@ -9045,7 +9045,7 @@ test_ipsec_proto_process(const struct ipsec_test_data 
td[],
/* Process crypto operation */
process_crypto_request(dev_id, ut_params->op);
 
-   ret = test_ipsec_status_check(ut_params->op, flags, dir);
+   ret = test_ipsec_status_check(ut_params->op, flags, dir, i + 1);
if (ret != TEST_SUCCESS)
goto crypto_op_free;
 
@@ -9115,7 +9115,8 @@ test_ipsec_proto_all(const struct ipsec_test_flags *flags)
unsigned int i, nb_pkts = 1, pass_cnt = 0;
int ret;
 
-   if (flags->iv_gen)
+   if (flags->iv_gen ||
+   flags->sa_expiry_pkts_soft)
nb_pkts = IPSEC_TEST_PACKETS_MAX;
 
for (i = 0; i < RTE_DIM(aead_list); i++) {
@@ -9180,6 +9181,18 @@ test_ipsec_proto_iv_gen(const void *data __rte_unused)
 }
 
 static int
+test_ipsec_proto_sa_exp_pkts_soft(const void *data __rte_unused)
+{
+   struct ipsec_test_flags flags;
+
+   memset(&flags, 0, sizeof(flags));
+
+   flags.sa_expiry_pkts_soft = true;
+
+   return test_ipsec_proto_all(&flags);
+}
+
+static int
 test_ipsec_proto_err_icv_corrupt(const void *data __rte_unused)
 {
struct ipsec_test_flags flags;
@@ -14136,6 +14149,10 @@ static struct unit_test_suite ipsec_proto_testsuite  = 
{
ut_setup_security, ut_teardown,
test_ipsec_proto_udp_encap),
TEST_CASE_NAMED_ST(
+   "SA expiry packets soft",
+   ut_setup_security, ut_teardown,
+   test_ipsec_proto_sa_exp_pkts_soft),
+   TEST_CASE_NAMED_ST(
"Negative test: ICV corruption",
ut_setup_security, ut_teardown,
test_ipsec_proto_err_icv_corrupt),
diff --git a/app/test/test_cryptodev_security_ipsec.c 
b/app/test/test_cryptodev_security_ipsec.c
index f371b15..56a44b5 100644
--- a/app/test/test_cryptodev_security_ipsec.c
+++ b/app/test/test_cryptodev_security_ipsec.c
@@ -173,6 +173,10 @@ test_ipsec_td_prepare(const struct crypto_param *param1,
 
if (flags->iv_gen)
td->ipsec_xform.options.iv_gen_disable = 0;
+
+   if (flags->sa_expiry_pkts_soft)
+   td->ipsec_xform.life.packets_soft_limit =
+   IPSEC_TEST_PACKETS_MAX - 1;
}
 
RTE_SET_USED(param2);
@@ -395,7 +399,8 @@ test_ipsec_post_process(struct rte_mbuf *m, const struct 
ipsec_test_data *td,
 int
 test_ipsec_status_check(struct rte_crypto_op *op,
const struct ipsec_test_flags *flags,
-   enum rte_security_ipsec_sa_direction dir)
+   enum rte_security_ipsec_sa_direction dir,
+   int pkt_num)
 {
int ret = TEST_SUCCESS;
 
@@ -406,7 +411,16 @@ test_ipsec_status_check(struct rte_crypto_op *op,
}
} else {
if (op->status != RTE_CRYPTO_OP_STATUS_SUCCESS) {
-   printf("Security op processing failed\n");
+   printf("Security op processing failed [pkt_num: %d]\n",
+  pkt_num);
+   ret = TEST_FAILED;
+   }
+   }
+
+   if (flags->sa_expiry_pkts_soft && pkt_num == IPSEC_TEST_PACKETS_MAX) {
+   if (!(op->aux_flags &
+ RTE_CRYPTO_OP_AUX_FLAGS_IPSEC_SOFT_EXPIRY)) {
+   printf("SA soft expiry (pkts) test failed\n");
ret = TEST_FAILED;
}
}
diff --git a/app/test/test_cryptodev_security_ipsec.h 
b/app/test/test_cryptodev_security_ipsec.h
index e1645f4..eed3476 100644
--- a/app/test/test_cryptodev_security_ipsec.h
+++ b/app/test/test_cryptodev_security_ipsec.h
@@ -49,6 +49,7 @@ struct ipsec_test_data {
 
 struct ipsec_test_flags {
bool display_alg;
+   bool sa_expiry_pkts_soft;
bool icv_corrupt;
bool iv_gen;
bool udp_encap;
@@ -114,6 +115,7 @@ int test_ipsec_post_process(struct rte_mbuf *m,
 
 int test_ipsec_status_check(struct rte_crypto_op *op,
const struct ipsec_test_flags *flags,
-   enum rte_security_ipsec_sa_direction dir);
+   enum rte_security_ipsec_sa_direction dir,
+   int pkt_num);
 
 #endif
-- 
2.7.4



[dpdk-dev] [PATCH v4 5/6] test/crypto: add packets hard expiry cases

2021-09-28 Thread Anoob Joseph
Add tests to validate packets hard expiry handling.

Signed-off-by: Anoob Joseph 

---
 app/test/test_cryptodev.c| 19 ++-
 app/test/test_cryptodev_security_ipsec.c | 22 +++---
 app/test/test_cryptodev_security_ipsec.h |  1 +
 3 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/app/test/test_cryptodev.c b/app/test/test_cryptodev.c
index 1befbeb..34b55a9 100644
--- a/app/test/test_cryptodev.c
+++ b/app/test/test_cryptodev.c
@@ -9116,7 +9116,8 @@ test_ipsec_proto_all(const struct ipsec_test_flags *flags)
int ret;
 
if (flags->iv_gen ||
-   flags->sa_expiry_pkts_soft)
+   flags->sa_expiry_pkts_soft ||
+   flags->sa_expiry_pkts_hard)
nb_pkts = IPSEC_TEST_PACKETS_MAX;
 
for (i = 0; i < RTE_DIM(aead_list); i++) {
@@ -9193,6 +9194,18 @@ test_ipsec_proto_sa_exp_pkts_soft(const void *data 
__rte_unused)
 }
 
 static int
+test_ipsec_proto_sa_exp_pkts_hard(const void *data __rte_unused)
+{
+   struct ipsec_test_flags flags;
+
+   memset(&flags, 0, sizeof(flags));
+
+   flags.sa_expiry_pkts_hard = true;
+
+   return test_ipsec_proto_all(&flags);
+}
+
+static int
 test_ipsec_proto_err_icv_corrupt(const void *data __rte_unused)
 {
struct ipsec_test_flags flags;
@@ -14153,6 +14166,10 @@ static struct unit_test_suite ipsec_proto_testsuite  = 
{
ut_setup_security, ut_teardown,
test_ipsec_proto_sa_exp_pkts_soft),
TEST_CASE_NAMED_ST(
+   "SA expiry packets hard",
+   ut_setup_security, ut_teardown,
+   test_ipsec_proto_sa_exp_pkts_hard),
+   TEST_CASE_NAMED_ST(
"Negative test: ICV corruption",
ut_setup_security, ut_teardown,
test_ipsec_proto_err_icv_corrupt),
diff --git a/app/test/test_cryptodev_security_ipsec.c 
b/app/test/test_cryptodev_security_ipsec.c
index 56a44b5..046536c 100644
--- a/app/test/test_cryptodev_security_ipsec.c
+++ b/app/test/test_cryptodev_security_ipsec.c
@@ -200,6 +200,10 @@ test_ipsec_td_update(struct ipsec_test_data td_inb[],
td_inb[i].input_text.data[icv_pos] += 1;
}
 
+   if (flags->sa_expiry_pkts_hard)
+   td_inb[i].ipsec_xform.life.packets_hard_limit =
+   IPSEC_TEST_PACKETS_MAX - 1;
+
if (flags->udp_encap)
td_inb[i].ipsec_xform.options.udp_encap = 1;
 
@@ -285,9 +289,10 @@ test_ipsec_td_verify(struct rte_mbuf *m, const struct 
ipsec_test_data *td,
uint8_t *output_text = rte_pktmbuf_mtod(m, uint8_t *);
uint32_t skip, len = rte_pktmbuf_pkt_len(m);
 
-   /* For negative tests, no need to do verification */
-   if (flags->icv_corrupt &&
-   td->ipsec_xform.direction == RTE_SECURITY_IPSEC_SA_DIR_INGRESS)
+   /* For tests with status as error for test success, skip verification */
+   if (td->ipsec_xform.direction == RTE_SECURITY_IPSEC_SA_DIR_INGRESS &&
+   (flags->icv_corrupt ||
+flags->sa_expiry_pkts_hard))
return TEST_SUCCESS;
 
if (td->ipsec_xform.direction == RTE_SECURITY_IPSEC_SA_DIR_EGRESS &&
@@ -404,6 +409,17 @@ test_ipsec_status_check(struct rte_crypto_op *op,
 {
int ret = TEST_SUCCESS;
 
+   if (dir == RTE_SECURITY_IPSEC_SA_DIR_INGRESS &&
+   flags->sa_expiry_pkts_hard &&
+   pkt_num == IPSEC_TEST_PACKETS_MAX) {
+   if (op->status != RTE_CRYPTO_OP_STATUS_ERROR) {
+   printf("SA hard expiry (pkts) test failed\n");
+   return TEST_FAILED;
+   } else {
+   return TEST_SUCCESS;
+   }
+   }
+
if (dir == RTE_SECURITY_IPSEC_SA_DIR_INGRESS && flags->icv_corrupt) {
if (op->status != RTE_CRYPTO_OP_STATUS_ERROR) {
printf("ICV corruption test case failed\n");
diff --git a/app/test/test_cryptodev_security_ipsec.h 
b/app/test/test_cryptodev_security_ipsec.h
index eed3476..18f3c64 100644
--- a/app/test/test_cryptodev_security_ipsec.h
+++ b/app/test/test_cryptodev_security_ipsec.h
@@ -50,6 +50,7 @@ struct ipsec_test_data {
 struct ipsec_test_flags {
bool display_alg;
bool sa_expiry_pkts_soft;
+   bool sa_expiry_pkts_hard;
bool icv_corrupt;
bool iv_gen;
bool udp_encap;
-- 
2.7.4



[dpdk-dev] [PATCH v4 6/6] examples/ipsec-secgw: clear soft expiry configuration

2021-09-28 Thread Anoob Joseph
Soft expiry is not a mandatory IPsec feature. It is verified separately
with IPsec unit tests. So configuration of the same is not required.
Also, soft expiry tracking can cause perf degradation with some PMDs.
Since a separate UT is available and the same setting in ipsec-secgw is
not verifying the functionality, remove the same by clearing life
configuration.

Signed-off-by: Anoob Joseph 

---
 examples/ipsec-secgw/ipsec.c | 1 -
 examples/ipsec-secgw/ipsec.h | 2 --
 2 files changed, 3 deletions(-)

diff --git a/examples/ipsec-secgw/ipsec.c b/examples/ipsec-secgw/ipsec.c
index 4868294..6817139 100644
--- a/examples/ipsec-secgw/ipsec.c
+++ b/examples/ipsec-secgw/ipsec.c
@@ -49,7 +49,6 @@ set_ipsec_conf(struct ipsec_sa *sa, struct 
rte_security_ipsec_xform *ipsec)
}
/* TODO support for Transport */
}
-   ipsec->life.packets_soft_limit = IPSEC_OFFLOAD_PKTS_SOFTLIMIT;
ipsec->replay_win_sz = app_sa_prm.window_size;
ipsec->options.esn = app_sa_prm.enable_esn;
ipsec->options.udp_encap = sa->udp_encap;
diff --git a/examples/ipsec-secgw/ipsec.h b/examples/ipsec-secgw/ipsec.h
index 90c81c1..8405c48 100644
--- a/examples/ipsec-secgw/ipsec.h
+++ b/examples/ipsec-secgw/ipsec.h
@@ -23,8 +23,6 @@
 
 #define MAX_DIGEST_SIZE 32 /* Bytes -- 256 bits */
 
-#define IPSEC_OFFLOAD_PKTS_SOFTLIMIT 0xff00
-
 #define IV_OFFSET  (sizeof(struct rte_crypto_op) + \
sizeof(struct rte_crypto_sym_op))
 
-- 
2.7.4



[dpdk-dev] [PATCH] app/testpmd: support unequal number of RXQ and TXQ

2021-09-28 Thread nipun . gupta
From: Jun Yang 

The existing forwarding mode configures the total number of
queues as the minimum of rxq and txq, so eventually the number
of txq are same as rxq.
However in some scenarios, specially for flow control the
number of rxq and txq can be different.
This patch makes the txq and function of rxq for all such
scenario instead of keeping 1:1 relationship between the two.

Signed-off-by: Jun Yang 
---
 app/test-pmd/config.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index f5765b34f7..7e17f233ba 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -3000,8 +3000,6 @@ rss_fwd_config_setup(void)
int end;
 
nb_q = nb_rxq;
-   if (nb_q > nb_txq)
-   nb_q = nb_txq;
cur_fwd_config.nb_fwd_lcores = (lcoreid_t) nb_fwd_lcores;
cur_fwd_config.nb_fwd_ports = nb_fwd_ports;
cur_fwd_config.nb_fwd_streams =
@@ -3038,7 +3036,7 @@ rss_fwd_config_setup(void)
fs->rx_port = fwd_ports_ids[rxp];
fs->rx_queue = rxq;
fs->tx_port = fwd_ports_ids[txp];
-   fs->tx_queue = rxq;
+   fs->tx_queue = (rxq % nb_txq);
fs->peer_addr = fs->tx_port;
fs->retry_enabled = retry_enabled;
rxp++;
@@ -3253,7 +3251,7 @@ fwd_config_setup(void)
return;
}
 
-   if ((nb_rxq > 1) && (nb_txq > 1)){
+   if ((nb_rxq > 1) && (nb_txq > 1)) {
if (dcb_config) {
for (i = 0; i < nb_fwd_ports; i++) {
pt_id = fwd_ports_ids[i];
-- 
2.17.1



[dpdk-dev] [PATCH] app/testpmd: update raw flow to take hex input

2021-09-28 Thread nipun . gupta
From: Nipun Gupta 

This patch enables method to provide key and mask for raw rules
to be provided as hexadecimal values. There is new parameter
pattern_mask added to support this.

Signed-off-by: Nipun Gupta 
---
 app/test-pmd/cmdline_flow.c | 15 +++
 doc/guides/testpmd_app_ug/testpmd_funcs.rst | 13 +
 2 files changed, 28 insertions(+)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index 6cd99bf37f..a95b147d92 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -158,6 +158,7 @@ enum index {
ITEM_RAW_OFFSET,
ITEM_RAW_LIMIT,
ITEM_RAW_PATTERN,
+   ITEM_RAW_PATTERN_HEX,
ITEM_ETH,
ITEM_ETH_DST,
ITEM_ETH_SRC,
@@ -1046,6 +1047,7 @@ static const enum index item_raw[] = {
ITEM_RAW_OFFSET,
ITEM_RAW_LIMIT,
ITEM_RAW_PATTERN,
+   ITEM_RAW_PATTERN_HEX,
ITEM_NEXT,
ZERO,
 };
@@ -2487,6 +2489,19 @@ static const struct token token_list[] = {
 ARGS_ENTRY_ARB(sizeof(struct rte_flow_item_raw),
ITEM_RAW_PATTERN_SIZE)),
},
+   [ITEM_RAW_PATTERN_HEX] = {
+   .name = "pattern_hex",
+   .help = "hex string to look for",
+   .next = NEXT(item_raw,
+NEXT_ENTRY(HEX),
+NEXT_ENTRY(ITEM_PARAM_IS,
+   ITEM_PARAM_SPEC,
+   ITEM_PARAM_MASK)),
+   .args = ARGS(ARGS_ENTRY(struct rte_flow_item_raw, pattern),
+ARGS_ENTRY(struct rte_flow_item_raw, length),
+ARGS_ENTRY_ARB(sizeof(struct rte_flow_item_raw),
+   ITEM_RAW_PATTERN_SIZE)),
+   },
[ITEM_ETH] = {
.name = "eth",
.help = "match Ethernet header",
diff --git a/doc/guides/testpmd_app_ug/testpmd_funcs.rst 
b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
index 4f8751be1c..3a69d37037 100644
--- a/doc/guides/testpmd_app_ug/testpmd_funcs.rst
+++ b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
@@ -3637,6 +3637,7 @@ This section lists supported pattern items and their 
attributes, if any.
   - ``offset {integer}``: absolute or relative offset for pattern.
   - ``limit {unsigned}``: search area limit for start of pattern.
   - ``pattern {string}``: byte string to look for.
+  - ``pattern_hex {string}``: byte string (provided in hexadecimal) to look 
for.
 
 - ``eth``: match Ethernet header.
 
@@ -5036,6 +5037,18 @@ The meter policy action list: ``green -> green, yellow 
-> yellow, red -> red``.
testpmd> create port meter 0 1 13 1 yes 0x 0 0
testpmd> flow create 0 priority 0 ingress group 1 pattern eth / end actions 
meter mtr_id 1 / end
 
+Sample RAW rule
+~~~
+
+A RAW rule can be creted as following using ``pattern_hex`` key and mask.
+
+::
+
+testpmd> flow create 0 group 0 priority 1 ingress pattern raw relative is 
0 search is 0 offset
+ is 0 limit is 0 pattern_hex spec 
0a0a0a0a
+ pattern_hex mask 
 / end actions
+ queue index 4 / end
+
 BPF Functions
 --
 
-- 
2.17.1



Re: [dpdk-dev] [PATCH] net/iavf: fix multi-process shared data

2021-09-28 Thread Zhang, Qi Z



> -Original Message-
> From: dev  On Behalf Of dapengx...@intel.com
> Sent: Tuesday, September 28, 2021 11:38 AM
> To: Richardson, Bruce ; Ananyev, Konstantin
> ; Wu, Jingjing ; Xing,
> Beilei 
> Cc: dev@dpdk.org; Yu, DapengX ; sta...@dpdk.org
> Subject: [dpdk-dev] [PATCH] net/iavf: fix multi-process shared data
> 
> From: Dapeng Yu 
> 
> When the iavf_adapter instance is not initialized completedly in the primary
> process, the secondary process accesses its "rte_eth_dev"
> member, it causes secondary process crash.
> 
> This patch replaces adapter->eth_dev with rte_eth_devices[port_id] in the data
> paths where rte_eth_dev instance is accessed.
> 
> Fixes: f978c1c9b3b5 ("net/iavf: add RSS hash parsing in AVX path")
> Fixes: 9c9aa0040344 ("net/iavf: add offload path for Rx AVX512 flex
> descriptor")
> Fixes: 63660ea3ee0b ("net/iavf: add RSS hash parsing in SSE path")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Dapeng Yu 

Acked-by: Qi Zhang 

Applied to dpdk-next-net-intel.

Thanks
Qi



[dpdk-dev] [PATCH v2 0/3] add option to configure tunnel header verification

2021-09-28 Thread Tejasree Kondoj
Add option to indicate whether outer header verification need to be done
as part of inbound IPsec processing.
CNXK PMD support and unit tests are also added for the same.

Depends on
https://patches.dpdk.org/project/dpdk/list/?series=18743

v2:
* Removed deprecation notice

Tejasree Kondoj (3):
  security: add option to configure tunnel header verification
  common/cnxk: add support for tunnel header verification
  test/crypto: add tunnel header verification tests

 app/test/test_cryptodev.c | 45 +-
 app/test/test_cryptodev_security_ipsec.c  | 25 +++-
 app/test/test_cryptodev_security_ipsec.h  |  1 +
 ...st_cryptodev_security_ipsec_test_vectors.h |  3 +
 doc/guides/rel_notes/deprecation.rst  |  2 +-
 doc/guides/rel_notes/release_21_11.rst|  5 ++
 drivers/common/cnxk/cnxk_security.c   | 60 +++
 drivers/common/cnxk/roc_ie_ot.h   |  6 +-
 .../crypto/cnxk/cnxk_cryptodev_capabilities.c |  4 ++
 lib/security/rte_security.h   | 17 ++
 10 files changed, 163 insertions(+), 5 deletions(-)

-- 
2.27.0



Re: [dpdk-dev] [PATCH v6] net/ice: support IEEE 1588 PTP

2021-09-28 Thread Zhang, Qi Z



> -Original Message-
> From: Su, Simei 
> Sent: Tuesday, September 28, 2021 2:28 PM
> To: Zhang, Qi Z 
> Cc: dev@dpdk.org; Wang, Haiyue ; Su, Simei
> 
> Subject: [PATCH v6] net/ice: support IEEE 1588 PTP
> 
> Add ice support for new ethdev APIs to enable/disable and read/write/adjust
> IEEE1588 PTP timestamps. Currently, only scalar path supports 1588 PTP,
> vector path doesn't.
> 
> The example command for running ptpclient is as below:
> ./build/examples/dpdk-ptpclient -c 1 -n 3 -- -T 0 -p 0x1
> 
> Signed-off-by: Simei Su 

Acked-by: Qi Zhang 

Applied to dpdk-next-net-intel.

Thanks
Qi



[dpdk-dev] [PATCH v2 1/3] security: add option to configure tunnel header verification

2021-09-28 Thread Tejasree Kondoj
Add option to indicate whether outer header verification
need to be done as part of inbound IPsec processing.

With inline IPsec processing, SA lookup would be happening
in the Rx path of rte_ethdev. When rte_flow is configured to
support more than one SA, SPI would be used to lookup SA.
In such cases, additional verification would be required to
ensure duplicate SPIs are not getting processed in the inline path.

For lookaside cases, the same option can be used by application
to offload tunnel verification to the PMD.

These verifications would help in averting possible DoS attacks.

Signed-off-by: Tejasree Kondoj 
Acked-by: Akhil Goyal 
---
 doc/guides/rel_notes/deprecation.rst   |  2 +-
 doc/guides/rel_notes/release_21_11.rst |  5 +
 lib/security/rte_security.h| 17 +
 3 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/doc/guides/rel_notes/deprecation.rst 
b/doc/guides/rel_notes/deprecation.rst
index 69fbde0c70..80ae9a6372 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -238,7 +238,7 @@ Deprecation Notices
 
 * security: The IPsec SA config options ``struct 
rte_security_ipsec_sa_options``
   will be updated with new fields to support new features like IPsec inner
-  checksum, tunnel header verification, TSO in case of protocol offload.
+  checksum, TSO in case of protocol offload.
 
 * ipsec: The structure ``rte_ipsec_sa_prm`` will be extended with a new field
   ``hdr_l3_len`` to configure tunnel L3 header length.
diff --git a/doc/guides/rel_notes/release_21_11.rst 
b/doc/guides/rel_notes/release_21_11.rst
index 0b7ffa5e50..623b52d9c9 100644
--- a/doc/guides/rel_notes/release_21_11.rst
+++ b/doc/guides/rel_notes/release_21_11.rst
@@ -181,6 +181,11 @@ ABI Changes
 soft and hard SA expiry limits. Limits can be either in units of packets or
 bytes.
 
+* security: add IPsec SA option to configure tunnel header verification
+
+  * Added SA option to indicate whether outer header verification need to be
+done as part of inbound IPsec processing.
+
 
 Known Issues
 
diff --git a/lib/security/rte_security.h b/lib/security/rte_security.h
index 88147e1f57..a10c9b5f00 100644
--- a/lib/security/rte_security.h
+++ b/lib/security/rte_security.h
@@ -55,6 +55,14 @@ enum rte_security_ipsec_tunnel_type {
/**< Outer header is IPv6 */
 };
 
+/**
+ * IPSEC tunnel header verification mode
+ *
+ * Controls how outer IP header is verified in inbound.
+ */
+#define RTE_SECURITY_IPSEC_TUNNEL_VERIFY_DST_ADDR 0x1
+#define RTE_SECURITY_IPSEC_TUNNEL_VERIFY_SRC_DST_ADDR 0x2
+
 /**
  * Security context for crypto/eth devices
  *
@@ -206,6 +214,15 @@ struct rte_security_ipsec_sa_options {
 * by the PMD.
 */
uint32_t iv_gen_disable : 1;
+
+   /** Verify tunnel header in inbound
+* * ``RTE_SECURITY_IPSEC_TUNNEL_VERIFY_DST_ADDR``: Verify destination
+*   IP address.
+*
+* * ``RTE_SECURITY_IPSEC_TUNNEL_VERIFY_SRC_DST_ADDR``: Verify both
+*   source and destination IP addresses.
+*/
+   uint32_t tunnel_hdr_verify : 2;
 };
 
 /** IPSec security association direction */
-- 
2.27.0



[dpdk-dev] [PATCH v2 2/3] common/cnxk: add support for tunnel header verification

2021-09-28 Thread Tejasree Kondoj
Adding support to verify tunnel header in IPsec inbound.

Signed-off-by: Tejasree Kondoj 
---
 drivers/common/cnxk/cnxk_security.c   | 60 +++
 drivers/common/cnxk/roc_ie_ot.h   |  6 +-
 .../crypto/cnxk/cnxk_cryptodev_capabilities.c |  4 ++
 3 files changed, 69 insertions(+), 1 deletion(-)

diff --git a/drivers/common/cnxk/cnxk_security.c 
b/drivers/common/cnxk/cnxk_security.c
index 215d9fd4d1..cc5daf333c 100644
--- a/drivers/common/cnxk/cnxk_security.c
+++ b/drivers/common/cnxk/cnxk_security.c
@@ -199,6 +199,62 @@ ot_ipsec_inb_ctx_size(struct roc_ot_ipsec_inb_sa *sa)
return size;
 }
 
+static int
+ot_ipsec_inb_tunnel_hdr_fill(struct roc_ot_ipsec_inb_sa *sa,
+struct rte_security_ipsec_xform *ipsec_xfrm)
+{
+   struct rte_security_ipsec_tunnel_param *tunnel;
+
+   if (ipsec_xfrm->mode != RTE_SECURITY_IPSEC_SA_MODE_TUNNEL)
+   return 0;
+
+   if (ipsec_xfrm->options.tunnel_hdr_verify == 0)
+   return 0;
+
+   tunnel = &ipsec_xfrm->tunnel;
+
+   switch (tunnel->type) {
+   case RTE_SECURITY_IPSEC_TUNNEL_IPV4:
+   sa->w2.s.outer_ip_ver = ROC_IE_SA_IP_VERSION_4;
+   memcpy(&sa->outer_hdr.ipv4.src_addr, &tunnel->ipv4.src_ip,
+  sizeof(struct in_addr));
+   memcpy(&sa->outer_hdr.ipv4.dst_addr, &tunnel->ipv4.dst_ip,
+  sizeof(struct in_addr));
+
+   /* IP Source and Dest are in LE/CPU endian */
+   sa->outer_hdr.ipv4.src_addr =
+   rte_be_to_cpu_32(sa->outer_hdr.ipv4.src_addr);
+   sa->outer_hdr.ipv4.dst_addr =
+   rte_be_to_cpu_32(sa->outer_hdr.ipv4.dst_addr);
+
+   break;
+   case RTE_SECURITY_IPSEC_TUNNEL_IPV6:
+   sa->w2.s.outer_ip_ver = ROC_IE_SA_IP_VERSION_6;
+   memcpy(&sa->outer_hdr.ipv6.src_addr, &tunnel->ipv6.src_addr,
+  sizeof(struct in6_addr));
+   memcpy(&sa->outer_hdr.ipv6.dst_addr, &tunnel->ipv6.dst_addr,
+  sizeof(struct in6_addr));
+
+   break;
+   default:
+   return -EINVAL;
+   }
+
+   switch (ipsec_xfrm->options.tunnel_hdr_verify) {
+   case RTE_SECURITY_IPSEC_TUNNEL_VERIFY_DST_ADDR:
+   sa->w2.s.ip_hdr_verify = ROC_IE_OT_SA_IP_HDR_VERIFY_DST_ADDR;
+   break;
+   case RTE_SECURITY_IPSEC_TUNNEL_VERIFY_SRC_DST_ADDR:
+   sa->w2.s.ip_hdr_verify =
+   ROC_IE_OT_SA_IP_HDR_VERIFY_SRC_DST_ADDR;
+   break;
+   default:
+   return -ENOTSUP;
+   }
+
+   return 0;
+}
+
 int
 cnxk_ot_ipsec_inb_sa_fill(struct roc_ot_ipsec_inb_sa *sa,
  struct rte_security_ipsec_xform *ipsec_xfrm,
@@ -229,6 +285,10 @@ cnxk_ot_ipsec_inb_sa_fill(struct roc_ot_ipsec_inb_sa *sa,
sa->w0.s.ar_win = rte_log2_u32(replay_win_sz) - 5;
}
 
+   rc = ot_ipsec_inb_tunnel_hdr_fill(sa, ipsec_xfrm);
+   if (rc)
+   return rc;
+
/* Default options for pkt_out and pkt_fmt are with
 * second pass meta and no defrag.
 */
diff --git a/drivers/common/cnxk/roc_ie_ot.h b/drivers/common/cnxk/roc_ie_ot.h
index 1ff468841d..12c75afac2 100644
--- a/drivers/common/cnxk/roc_ie_ot.h
+++ b/drivers/common/cnxk/roc_ie_ot.h
@@ -180,7 +180,11 @@ union roc_ot_ipsec_sa_word2 {
uint64_t auth_type : 4;
 
uint64_t encap_type : 2;
-   uint64_t rsvd1 : 6;
+   uint64_t et_ovrwr_ddr_en : 1;
+   uint64_t esn_en : 1;
+   uint64_t tport_l4_incr_csum : 1;
+   uint64_t ip_hdr_verify : 2;
+   uint64_t rsvd5 : 1;
 
uint64_t rsvd2 : 7;
uint64_t async_mode : 1;
diff --git a/drivers/crypto/cnxk/cnxk_cryptodev_capabilities.c 
b/drivers/crypto/cnxk/cnxk_cryptodev_capabilities.c
index 4b97639e56..8a0cf289fd 100644
--- a/drivers/crypto/cnxk/cnxk_cryptodev_capabilities.c
+++ b/drivers/crypto/cnxk/cnxk_cryptodev_capabilities.c
@@ -920,6 +920,10 @@ cn10k_sec_caps_update(struct rte_security_capability 
*sec_cap)
 #ifdef LA_IPSEC_DEBUG
sec_cap->ipsec.options.iv_gen_disable = 1;
 #endif
+   } else {
+   if (sec_cap->ipsec.mode == RTE_SECURITY_IPSEC_SA_MODE_TUNNEL)
+   sec_cap->ipsec.options.tunnel_hdr_verify =
+   RTE_SECURITY_IPSEC_TUNNEL_VERIFY_SRC_DST_ADDR;
}
 }
 
-- 
2.27.0



[dpdk-dev] [PATCH v2 3/3] test/crypto: add tunnel header verification tests

2021-09-28 Thread Tejasree Kondoj
Add test cases to verify tunnel header in IPsec inbound.

Signed-off-by: Tejasree Kondoj 
---
 app/test/test_cryptodev.c | 45 ++-
 app/test/test_cryptodev_security_ipsec.c  | 25 ++-
 app/test/test_cryptodev_security_ipsec.h  |  1 +
 ...st_cryptodev_security_ipsec_test_vectors.h |  3 ++
 4 files changed, 71 insertions(+), 3 deletions(-)

diff --git a/app/test/test_cryptodev.c b/app/test/test_cryptodev.c
index 34b55a952e..665d19c0a4 100644
--- a/app/test/test_cryptodev.c
+++ b/app/test/test_cryptodev.c
@@ -8924,6 +8924,7 @@ test_ipsec_proto_process(const struct ipsec_test_data 
td[],
int salt_len, i, ret = TEST_SUCCESS;
struct rte_security_ctx *ctx;
uint8_t *input_text;
+   uint32_t verify;
 
ut_params->type = RTE_SECURITY_ACTION_TYPE_LOOKASIDE_PROTOCOL;
gbl_action_type = RTE_SECURITY_ACTION_TYPE_LOOKASIDE_PROTOCOL;
@@ -8933,11 +8934,19 @@ test_ipsec_proto_process(const struct ipsec_test_data 
td[],
/* Copy IPsec xform */
memcpy(&ipsec_xform, &td[0].ipsec_xform, sizeof(ipsec_xform));
 
+   dir = ipsec_xform.direction;
+   verify = flags->tunnel_hdr_verify;
+
+   if ((dir == RTE_SECURITY_IPSEC_SA_DIR_INGRESS) && verify) {
+   if (verify == RTE_SECURITY_IPSEC_TUNNEL_VERIFY_SRC_DST_ADDR)
+   src += 1;
+   else if (verify == RTE_SECURITY_IPSEC_TUNNEL_VERIFY_DST_ADDR)
+   dst += 1;
+   }
+
memcpy(&ipsec_xform.tunnel.ipv4.src_ip, &src, sizeof(src));
memcpy(&ipsec_xform.tunnel.ipv4.dst_ip, &dst, sizeof(dst));
 
-   dir = ipsec_xform.direction;
-
ctx = rte_cryptodev_get_sec_ctx(dev_id);
 
sec_cap_idx.action = ut_params->type;
@@ -9229,6 +9238,30 @@ test_ipsec_proto_udp_encap(const void *data __rte_unused)
return test_ipsec_proto_all(&flags);
 }
 
+static int
+test_ipsec_proto_tunnel_src_dst_addr_verify(const void *data __rte_unused)
+{
+   struct ipsec_test_flags flags;
+
+   memset(&flags, 0, sizeof(flags));
+
+   flags.tunnel_hdr_verify = RTE_SECURITY_IPSEC_TUNNEL_VERIFY_SRC_DST_ADDR;
+
+   return test_ipsec_proto_all(&flags);
+}
+
+static int
+test_ipsec_proto_tunnel_dst_addr_verify(const void *data __rte_unused)
+{
+   struct ipsec_test_flags flags;
+
+   memset(&flags, 0, sizeof(flags));
+
+   flags.tunnel_hdr_verify = RTE_SECURITY_IPSEC_TUNNEL_VERIFY_DST_ADDR;
+
+   return test_ipsec_proto_all(&flags);
+}
+
 static int
 test_PDCP_PROTO_all(void)
 {
@@ -14173,6 +14206,14 @@ static struct unit_test_suite ipsec_proto_testsuite  = 
{
"Negative test: ICV corruption",
ut_setup_security, ut_teardown,
test_ipsec_proto_err_icv_corrupt),
+   TEST_CASE_NAMED_ST(
+   "Tunnel dst addr verification",
+   ut_setup_security, ut_teardown,
+   test_ipsec_proto_tunnel_dst_addr_verify),
+   TEST_CASE_NAMED_ST(
+   "Tunnel src and dst addr verification",
+   ut_setup_security, ut_teardown,
+   test_ipsec_proto_tunnel_src_dst_addr_verify),
TEST_CASES_END() /**< NULL terminate unit test array */
}
 };
diff --git a/app/test/test_cryptodev_security_ipsec.c 
b/app/test/test_cryptodev_security_ipsec.c
index 046536cc9c..f040630655 100644
--- a/app/test/test_cryptodev_security_ipsec.c
+++ b/app/test/test_cryptodev_security_ipsec.c
@@ -86,6 +86,15 @@ test_ipsec_sec_caps_verify(struct rte_security_ipsec_xform 
*ipsec_xform,
return -ENOTSUP;
}
 
+   if ((ipsec_xform->direction == RTE_SECURITY_IPSEC_SA_DIR_INGRESS) &&
+   (ipsec_xform->options.tunnel_hdr_verify >
+   sec_cap->ipsec.options.tunnel_hdr_verify)) {
+   if (!silent)
+   RTE_LOG(INFO, USER1,
+   "Tunnel header verify is not supported\n");
+   return -ENOTSUP;
+   }
+
return 0;
 }
 
@@ -207,6 +216,9 @@ test_ipsec_td_update(struct ipsec_test_data td_inb[],
if (flags->udp_encap)
td_inb[i].ipsec_xform.options.udp_encap = 1;
 
+   td_inb[i].ipsec_xform.options.tunnel_hdr_verify =
+   flags->tunnel_hdr_verify;
+
/* Clear outbound specific flags */
td_inb[i].ipsec_xform.options.iv_gen_disable = 0;
}
@@ -292,7 +304,8 @@ test_ipsec_td_verify(struct rte_mbuf *m, const struct 
ipsec_test_data *td,
/* For tests with status as error for test success, skip verification */
if (td->ipsec_xform.direction == RTE_SECURITY_IPSEC_SA_DIR_INGRESS &&
(flags->icv_corrupt ||
-flags->sa_expiry_pkts_hard))
+flags->sa_expiry_pkts_hard ||
+flags->tunnel_hdr_verify))
return TEST_SUCCESS;
 
if

Re: [dpdk-dev] [PATCH v2] net/i40e: fix Rx packet statistics

2021-09-28 Thread Zhang, Qi Z



> -Original Message-
> From: dev  On Behalf Of Alvin Zhang
> Sent: Tuesday, September 28, 2021 11:23 AM
> To: Xing, Beilei ; Guo, Junfeng
> ; ktray...@redhat.com
> Cc: dev@dpdk.org; Zhang, AlvinX ; sta...@dpdk.org
> Subject: [dpdk-dev] [PATCH v2] net/i40e: fix Rx packet statistics
> 
> Some packets are discarded by the NIC because they are larger than the MTU,
> these packets should be counted as "RX error" instead of "RX packet", for
> example:
> 
>   pkt1 = Ether()/IP()/Raw('x' * 1400)
>   pkt2 = Ether()/IP()/Raw('x' * 1500)
> 
>    Forward statistics for port 0 -
>   RX-packets: 2 RX-dropped: 0 RX-total: 2
>   TX-packets: 1 TX-dropped: 0 TX-total: 1
>   
> 
>   Here the packet pkt2 has been discarded, but still was counted
>   by "RX-packets"
> 
> The register 'GL_RXERR1' can count above discarded packets.
> This patch adds reading and calculation of the 'GL_RXERR1' counter when
> reporting DPDK statistics.
> 
> Fixes: f4a91c38b4ad ("i40e: add extended stats")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Alvin Zhang 
> ---
>  drivers/net/i40e/i40e_ethdev.c | 16 +---
> drivers/net/i40e/i40e_ethdev.h | 10 ++
>  2 files changed, 23 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
> index 7a2a828..7a207b2 100644
> --- a/drivers/net/i40e/i40e_ethdev.c
> +++ b/drivers/net/i40e/i40e_ethdev.c
> @@ -532,7 +532,7 @@ static int i40e_sw_tunnel_filter_insert(struct i40e_pf
> *pf,
>  /* store statistics names and its offset in stats structure */  struct
> rte_i40e_xstats_name_off {
>   char name[RTE_ETH_XSTATS_NAME_SIZE];
> - unsigned offset;
> + int offset;
>  };
> 
>  static const struct rte_i40e_xstats_name_off rte_i40e_stats_strings[] = { @@
> -542,6 +542,8 @@ struct rte_i40e_xstats_name_off {
>   {"rx_dropped_packets", offsetof(struct i40e_eth_stats, rx_discards)},
>   {"rx_unknown_protocol_packets", offsetof(struct i40e_eth_stats,
>   rx_unknown_protocol)},
> + {"rx_size_error_packets", offsetof(struct i40e_pf, rx_err1) -
> +   offsetof(struct i40e_pf, stats)},
>   {"tx_unicast_packets", offsetof(struct i40e_eth_stats, tx_unicast)},
>   {"tx_multicast_packets", offsetof(struct i40e_eth_stats, tx_multicast)},
>   {"tx_broadcast_packets", offsetof(struct i40e_eth_stats, tx_broadcast)},
> @@ -3238,6 +3240,10 @@ void i40e_flex_payload_reg_set_default(struct
> i40e_hw *hw)
>   pf->offset_loaded,
>   &os->eth.rx_unknown_protocol,
>   &ns->eth.rx_unknown_protocol);
> + i40e_stat_update_48(hw, I40E_GL_RXERR1_H(hw->pf_id +
> I40E_MAX_VF),
> + I40E_GL_RXERR1_L(hw->pf_id + I40E_MAX_VF),
> + pf->offset_loaded, &pf->rx_err1_offset,
> + &pf->rx_err1);
>   i40e_stat_update_48_in_64(hw, I40E_GLPRT_GOTCH(hw->port),
> I40E_GLPRT_GOTCL(hw->port),
> pf->offset_loaded, &os->eth.tx_bytes, @@ 
> -3437,7
> +3443,8 @@ void i40e_flex_payload_reg_set_default(struct i40e_hw *hw)
>   stats->ipackets = pf->main_vsi->eth_stats.rx_unicast +
>   pf->main_vsi->eth_stats.rx_multicast +
>   pf->main_vsi->eth_stats.rx_broadcast -
> - pf->main_vsi->eth_stats.rx_discards;
> + pf->main_vsi->eth_stats.rx_discards -
> + pf->rx_err1;
>   stats->opackets = ns->eth.tx_unicast +
>   ns->eth.tx_multicast +
>   ns->eth.tx_broadcast;
> @@ -3451,7 +3458,8 @@ void i40e_flex_payload_reg_set_default(struct
> i40e_hw *hw)
>   pf->main_vsi->eth_stats.rx_discards;
>   stats->ierrors  = ns->crc_errors +
>   ns->rx_length_errors + ns->rx_undersize +
> - ns->rx_oversize + ns->rx_fragments + ns->rx_jabber;
> + ns->rx_oversize + ns->rx_fragments + ns->rx_jabber +
> + pf->rx_err1;
> 
>   if (pf->vfs) {
>   for (i = 0; i < pf->vf_num; i++) {
> @@ -6232,6 +6240,8 @@ struct i40e_vsi *
>   memset(&pf->stats_offset, 0, sizeof(struct i40e_hw_port_stats));
>   memset(&pf->internal_stats, 0, sizeof(struct i40e_eth_stats));
>   memset(&pf->internal_stats_offset, 0, sizeof(struct i40e_eth_stats));
> + pf->rx_err1 = 0;
> + pf->rx_err1_offset = 0;
> 
>   ret = i40e_pf_get_switch_config(pf);
>   if (ret != I40E_SUCCESS) {
> diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
> index cd6deab..846c8d4 100644
> --- a/drivers/net/i40e/i40e_ethdev.h
> +++ b/drivers/net/i40e/i40e_ethdev.h
> @@ -19,6 +19,13 @@
>  #include "base/i40e_type.h"
>  #include "base/virtchnl.h"
> 
> +#define I40E_GL_RXERR1_H(_i) (0

[dpdk-dev] [PATCH 00/11] net/sfc: support per-queue stats on EF100

2021-09-28 Thread Andrew Rybchenko
Implement per-queue Rx and Tx statistics for EF100 in software.
Packets and bytes stats are collected by the driver.

Ivan Ilchenko (11):
  net/sfc: rename array of SW stats descriptions
  net/sfc: rename accumulative SW stats to total
  net/sfc: rename SW stats structures
  net/sfc: fix cleanup order of SW stats
  net/sfc: fix missing const of SW stats descriptions
  net/sfc: optimize getting number of SW stats
  net/sfc: prepare having no some SW stats on an adapter
  net/sfc: add toggle to disable total stat
  net/sfc: add support for SW stats groups
  net/sfc: collect per queue stats in EF100 Rx datapath
  net/sfc: collect per queue stats in EF100 Tx datapath

 drivers/net/sfc/sfc.h  |  26 +-
 drivers/net/sfc/sfc_dp.h   |   2 +
 drivers/net/sfc/sfc_dp_rx.h|   1 +
 drivers/net/sfc/sfc_dp_tx.h|   1 +
 drivers/net/sfc/sfc_ef100_rx.c |  10 +-
 drivers/net/sfc/sfc_ef100_tx.c |   6 +-
 drivers/net/sfc/sfc_ethdev.c   | 136 ++--
 drivers/net/sfc/sfc_sw_stats.c | 569 +
 8 files changed, 581 insertions(+), 170 deletions(-)

-- 
2.30.2



[dpdk-dev] [PATCH 01/11] net/sfc: rename array of SW stats descriptions

2021-09-28 Thread Andrew Rybchenko
From: Ivan Ilchenko 

Rename global array of SW stats descriptions declared as
'struct sfc_sw_xstat_descr sfc_sw_xstats[]' to
'sfc_sw_stats_descr[]' to avoid ambiguity since there is
structure declared as 'struct sfc_sw_xstats'.

Signed-off-by: Ivan Ilchenko 
Signed-off-by: Andrew Rybchenko 
---
 drivers/net/sfc/sfc_sw_stats.c | 32 
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/net/sfc/sfc_sw_stats.c b/drivers/net/sfc/sfc_sw_stats.c
index 2b28ba29e6..2b8b1b56e9 100644
--- a/drivers/net/sfc/sfc_sw_stats.c
+++ b/drivers/net/sfc/sfc_sw_stats.c
@@ -49,7 +49,7 @@ sfc_get_sw_xstat_val_tx_dbells(struct sfc_adapter *sa, 
uint16_t qid)
return 0;
 }
 
-struct sfc_sw_xstat_descr sfc_sw_xstats[] = {
+struct sfc_sw_xstat_descr sfc_sw_stats_descr[] = {
{
.name = "dbells",
.type = SFC_SW_STATS_RX,
@@ -334,9 +334,9 @@ sfc_sw_xstats_get_nb_supported(struct sfc_adapter *sa)
 
SFC_ASSERT(sfc_adapter_is_locked(sa));
 
-   for (i = 0; i < RTE_DIM(sfc_sw_xstats); i++) {
+   for (i = 0; i < RTE_DIM(sfc_sw_stats_descr); i++) {
nb_supported += sfc_sw_xstat_get_nb_supported(sa,
-&sfc_sw_xstats[i]);
+   &sfc_sw_stats_descr[i]);
}
 
return nb_supported;
@@ -357,8 +357,8 @@ sfc_sw_xstats_get_vals(struct sfc_adapter *sa,
 
sw_xstats_offset = *nb_supported;
 
-   for (i = 0; i < RTE_DIM(sfc_sw_xstats); i++) {
-   sfc_sw_xstat_get_values(sa, &sfc_sw_xstats[i], xstats,
+   for (i = 0; i < RTE_DIM(sfc_sw_stats_descr); i++) {
+   sfc_sw_xstat_get_values(sa, &sfc_sw_stats_descr[i], xstats,
xstats_count, nb_written, nb_supported);
}
 
@@ -380,8 +380,8 @@ sfc_sw_xstats_get_names(struct sfc_adapter *sa,
 
sfc_adapter_lock(sa);
 
-   for (i = 0; i < RTE_DIM(sfc_sw_xstats); i++) {
-   ret = sfc_sw_stat_get_names(sa, &sfc_sw_xstats[i],
+   for (i = 0; i < RTE_DIM(sfc_sw_stats_descr); i++) {
+   ret = sfc_sw_stat_get_names(sa, &sfc_sw_stats_descr[i],
xstats_names, xstats_count,
nb_written, nb_supported);
if (ret != 0) {
@@ -410,8 +410,8 @@ sfc_sw_xstats_get_vals_by_id(struct sfc_adapter *sa,
 
sw_xstats_offset = *nb_supported;
 
-   for (i = 0; i < RTE_DIM(sfc_sw_xstats); i++) {
-   sfc_sw_xstat_get_values_by_id(sa, &sfc_sw_xstats[i], ids,
+   for (i = 0; i < RTE_DIM(sfc_sw_stats_descr); i++) {
+   sfc_sw_xstat_get_values_by_id(sa, &sfc_sw_stats_descr[i], ids,
  values, n, nb_supported);
}
 
@@ -435,9 +435,9 @@ sfc_sw_xstats_get_names_by_id(struct sfc_adapter *sa,
 
sfc_adapter_lock(sa);
 
-   for (i = 0; i < RTE_DIM(sfc_sw_xstats); i++) {
-   ret = sfc_sw_xstat_get_names_by_id(sa, &sfc_sw_xstats[i], ids,
-  xstats_names, size,
+   for (i = 0; i < RTE_DIM(sfc_sw_stats_descr); i++) {
+   ret = sfc_sw_xstat_get_names_by_id(sa, &sfc_sw_stats_descr[i],
+  ids, xstats_names, size,
   nb_supported);
if (ret != 0) {
sfc_adapter_unlock(sa);
@@ -488,8 +488,8 @@ sfc_sw_xstats_reset(struct sfc_adapter *sa)
 
SFC_ASSERT(sfc_adapter_is_locked(sa));
 
-   for (i = 0; i < RTE_DIM(sfc_sw_xstats); i++) {
-   sw_xstat = &sfc_sw_xstats[i];
+   for (i = 0; i < RTE_DIM(sfc_sw_stats_descr); i++) {
+   sw_xstat = &sfc_sw_stats_descr[i];
sfc_sw_xstat_reset(sa, sw_xstat, reset_vals);
reset_vals += sfc_sw_xstat_get_nb_supported(sa, sw_xstat);
}
@@ -502,9 +502,9 @@ sfc_sw_xstats_configure(struct sfc_adapter *sa)
size_t nb_supported = 0;
unsigned int i;
 
-   for (i = 0; i < RTE_DIM(sfc_sw_xstats); i++)
+   for (i = 0; i < RTE_DIM(sfc_sw_stats_descr); i++)
nb_supported += sfc_sw_xstat_get_nb_supported(sa,
-   &sfc_sw_xstats[i]);
+   &sfc_sw_stats_descr[i]);
 
*reset_vals = rte_realloc(*reset_vals,
  nb_supported * sizeof(**reset_vals), 0);
-- 
2.30.2



[dpdk-dev] [PATCH 02/11] net/sfc: rename accumulative SW stats to total

2021-09-28 Thread Andrew Rybchenko
From: Ivan Ilchenko 

This is a better word used in RTE docs.

Signed-off-by: Ivan Ilchenko 
Signed-off-by: Andrew Rybchenko 
---
 drivers/net/sfc/sfc_sw_stats.c | 52 +-
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/drivers/net/sfc/sfc_sw_stats.c b/drivers/net/sfc/sfc_sw_stats.c
index 2b8b1b56e9..a9f1790d38 100644
--- a/drivers/net/sfc/sfc_sw_stats.c
+++ b/drivers/net/sfc/sfc_sw_stats.c
@@ -126,7 +126,7 @@ sfc_sw_stat_get_queue_count(struct sfc_adapter *sa,
 static unsigned int
 sfc_sw_xstat_per_queue_get_count(unsigned int nb_queues)
 {
-   /* Take into account the accumulative xstat of all queues */
+   /* Take into account the total xstat of all queues */
return nb_queues > 0 ? 1 + nb_queues : 0;
 }
 
@@ -160,7 +160,7 @@ sfc_sw_stat_get_names(struct sfc_adapter *sa,
*nb_supported += sfc_sw_xstat_per_queue_get_count(nb_queues);
 
/*
-* The order of each software xstat type is the accumulative xstat
+* The order of each software xstat type is the total xstat
 * followed by per-queue xstats.
 */
if (*nb_written < xstats_names_sz) {
@@ -206,7 +206,7 @@ sfc_sw_xstat_get_names_by_id(struct sfc_adapter *sa,
*nb_supported += sfc_sw_xstat_per_queue_get_count(nb_queues);
 
/*
-* The order of each software xstat type is the accumulative xstat
+* The order of each software xstat type is the total xstat
 * followed by per-queue xstats.
 */
for (i = 0; i < size; i++) {
@@ -232,8 +232,8 @@ sfc_sw_xstat_get_values(struct sfc_adapter *sa,
 {
unsigned int qid;
uint64_t value;
-   struct rte_eth_xstat *accum_xstat;
-   bool count_accum_value = false;
+   struct rte_eth_xstat *total_xstat;
+   bool count_total_value = false;
unsigned int nb_queues;
 
nb_queues = sfc_sw_stat_get_queue_count(sa, sw_xstat);
@@ -242,12 +242,12 @@ sfc_sw_xstat_get_values(struct sfc_adapter *sa,
*nb_supported += sfc_sw_xstat_per_queue_get_count(nb_queues);
 
/*
-* The order of each software xstat type is the accumulative xstat
+* The order of each software xstat type is the total xstat
 * followed by per-queue xstats.
 */
if (*nb_written < xstats_size) {
-   count_accum_value = true;
-   accum_xstat = &xstats[*nb_written];
+   count_total_value = true;
+   total_xstat = &xstats[*nb_written];
xstats[*nb_written].id = *nb_written;
xstats[*nb_written].value = 0;
(*nb_written)++;
@@ -262,8 +262,8 @@ sfc_sw_xstat_get_values(struct sfc_adapter *sa,
(*nb_written)++;
}
 
-   if (count_accum_value)
-   accum_xstat->value += value;
+   if (count_total_value)
+   total_xstat->value += value;
}
 }
 
@@ -278,9 +278,9 @@ sfc_sw_xstat_get_values_by_id(struct sfc_adapter *sa,
rte_spinlock_t *bmp_lock = &sa->sw_xstats.queues_bitmap_lock;
struct rte_bitmap *bmp = sa->sw_xstats.queues_bitmap;
unsigned int id_base = *nb_supported;
-   bool count_accum_value = false;
-   unsigned int accum_value_idx;
-   uint64_t accum_value = 0;
+   bool count_total_value = false;
+   unsigned int total_value_idx;
+   uint64_t total_value = 0;
unsigned int i, qid;
unsigned int nb_queues;
 
@@ -294,32 +294,32 @@ sfc_sw_xstat_get_values_by_id(struct sfc_adapter *sa,
*nb_supported += sfc_sw_xstat_per_queue_get_count(nb_queues);
 
/*
-* The order of each software xstat type is the accumulative xstat
+* The order of each software xstat type is the total xstat
 * followed by per-queue xstats.
 */
for (i = 0; i < ids_size; i++) {
if (id_base <= ids[i] && ids[i] <= (id_base + nb_queues)) {
if (ids[i] == id_base) { /* Accumulative value */
-   count_accum_value = true;
-   accum_value_idx = i;
+   count_total_value = true;
+   total_value_idx = i;
continue;
}
qid = ids[i] - id_base - 1;
values[i] = sw_xstat->get_val(sa, qid);
-   accum_value += values[i];
+   total_value += values[i];
 
rte_bitmap_set(bmp, qid);
}
}
 
-   if (count_accum_value) {
-   values[accum_value_idx] = 0;
+   if (count_total_value) {
+   values[total_value_idx] = 0;
for (qid = 0; qid < nb_queues; ++qid) {
if (rte_bitmap_get(bmp, qid) != 0)
continue;
-   value

[dpdk-dev] [PATCH 03/11] net/sfc: rename SW stats structures

2021-09-28 Thread Andrew Rybchenko
From: Ivan Ilchenko 

Delete 'x' in:
1. per-adapter 'struct sfc_sw_xstats' with corresponding
   field 'sw_xstats'.
2. 'struct sfc_sw_xstat_descr' and callback prototype
   'sfc_get_sw_xstat_val_t' with its implementations.

The 'x' stands for 'extended' in RTE but from sfc point of
view these are just SW stats.

Signed-off-by: Ivan Ilchenko 
Signed-off-by: Andrew Rybchenko 
---
 drivers/net/sfc/sfc.h  |   4 +-
 drivers/net/sfc/sfc_sw_stats.c | 114 -
 2 files changed, 59 insertions(+), 59 deletions(-)

diff --git a/drivers/net/sfc/sfc.h b/drivers/net/sfc/sfc.h
index 331e06bac6..93d5202a24 100644
--- a/drivers/net/sfc/sfc.h
+++ b/drivers/net/sfc/sfc.h
@@ -217,7 +217,7 @@ struct sfc_counter_rxq {
struct rte_mempool  *mp;
 };
 
-struct sfc_sw_xstats {
+struct sfc_sw_stats {
uint64_t*reset_vals;
 
rte_spinlock_t  queues_bitmap_lock;
@@ -257,7 +257,7 @@ struct sfc_adapter {
struct sfc_sriovsriov;
struct sfc_intr intr;
struct sfc_port port;
-   struct sfc_sw_xstatssw_xstats;
+   struct sfc_sw_stats sw_stats;
struct sfc_filter   filter;
struct sfc_mae  mae;
 
diff --git a/drivers/net/sfc/sfc_sw_stats.c b/drivers/net/sfc/sfc_sw_stats.c
index a9f1790d38..108f301822 100644
--- a/drivers/net/sfc/sfc_sw_stats.c
+++ b/drivers/net/sfc/sfc_sw_stats.c
@@ -15,17 +15,17 @@ enum sfc_sw_stats_type {
SFC_SW_STATS_TX,
 };
 
-typedef uint64_t sfc_get_sw_xstat_val_t(struct sfc_adapter *sa, uint16_t qid);
+typedef uint64_t sfc_get_sw_stat_val_t(struct sfc_adapter *sa, uint16_t qid);
 
-struct sfc_sw_xstat_descr {
+struct sfc_sw_stat_descr {
const char *name;
enum sfc_sw_stats_type type;
-   sfc_get_sw_xstat_val_t *get_val;
+   sfc_get_sw_stat_val_t *get_val;
 };
 
-static sfc_get_sw_xstat_val_t sfc_get_sw_xstat_val_rx_dbells;
+static sfc_get_sw_stat_val_t sfc_get_sw_stat_val_rx_dbells;
 static uint64_t
-sfc_get_sw_xstat_val_rx_dbells(struct sfc_adapter *sa, uint16_t qid)
+sfc_get_sw_stat_val_rx_dbells(struct sfc_adapter *sa, uint16_t qid)
 {
struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
struct sfc_rxq_info *rxq_info;
@@ -36,9 +36,9 @@ sfc_get_sw_xstat_val_rx_dbells(struct sfc_adapter *sa, 
uint16_t qid)
return 0;
 }
 
-static sfc_get_sw_xstat_val_t sfc_get_sw_xstat_val_tx_dbells;
+static sfc_get_sw_stat_val_t sfc_get_sw_stat_val_tx_dbells;
 static uint64_t
-sfc_get_sw_xstat_val_tx_dbells(struct sfc_adapter *sa, uint16_t qid)
+sfc_get_sw_stat_val_tx_dbells(struct sfc_adapter *sa, uint16_t qid)
 {
struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
struct sfc_txq_info *txq_info;
@@ -49,28 +49,28 @@ sfc_get_sw_xstat_val_tx_dbells(struct sfc_adapter *sa, 
uint16_t qid)
return 0;
 }
 
-struct sfc_sw_xstat_descr sfc_sw_stats_descr[] = {
+struct sfc_sw_stat_descr sfc_sw_stats_descr[] = {
{
.name = "dbells",
.type = SFC_SW_STATS_RX,
-   .get_val  = sfc_get_sw_xstat_val_rx_dbells,
+   .get_val  = sfc_get_sw_stat_val_rx_dbells,
},
{
.name = "dbells",
.type = SFC_SW_STATS_TX,
-   .get_val  = sfc_get_sw_xstat_val_tx_dbells,
+   .get_val  = sfc_get_sw_stat_val_tx_dbells,
}
 };
 
 static int
 sfc_sw_stat_get_name(struct sfc_adapter *sa,
-const struct sfc_sw_xstat_descr *sw_xstat, char *name,
+const struct sfc_sw_stat_descr *sw_stat, char *name,
 size_t name_size, unsigned int id_off)
 {
const char *prefix;
int ret;
 
-   switch (sw_xstat->type) {
+   switch (sw_stat->type) {
case SFC_SW_STATS_RX:
prefix = "rx";
break;
@@ -79,25 +79,25 @@ sfc_sw_stat_get_name(struct sfc_adapter *sa,
break;
default:
sfc_err(sa, "%s: unknown software statistics type %d",
-   __func__, sw_xstat->type);
+   __func__, sw_stat->type);
return -EINVAL;
}
 
if (id_off == 0) {
ret = snprintf(name, name_size, "%s_%s", prefix,
-sw_xstat->name);
+sw_stat->name);
if (ret < 0 || ret >= (int)name_size) {
sfc_err(sa, "%s: failed to fill xstat name %s_%s, err 
%d",
-   __func__, prefix, sw_xstat->name, ret);
+   __func__, prefix, sw_stat->name, ret);
return ret > 0 ? -EINVAL : ret;
}
} else {
uint16_t qid = id_off - 1;
ret = snprintf(name, name_size,

[dpdk-dev] [PATCH 04/11] net/sfc: fix cleanup order of SW stats

2021-09-28 Thread Andrew Rybchenko
From: Ivan Ilchenko 

Fix cleanup order of SW stats structures to be reversed
to initialization order.

Fixes: fdd7719eb3c ("net/sfc: add xstats for Rx/Tx doorbells")
Cc: sta...@dpdk.org

Signed-off-by: Ivan Ilchenko 
Signed-off-by: Andrew Rybchenko 
---
 drivers/net/sfc/sfc_sw_stats.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/sfc/sfc_sw_stats.c b/drivers/net/sfc/sfc_sw_stats.c
index 108f301822..c297bb8294 100644
--- a/drivers/net/sfc/sfc_sw_stats.c
+++ b/drivers/net/sfc/sfc_sw_stats.c
@@ -566,8 +566,7 @@ sfc_sw_xstats_init(struct sfc_adapter *sa)
 void
 sfc_sw_xstats_close(struct sfc_adapter *sa)
 {
+   sfc_sw_xstats_free_queues_bitmap(sa);
rte_free(sa->sw_stats.reset_vals);
sa->sw_stats.reset_vals = NULL;
-
-   sfc_sw_xstats_free_queues_bitmap(sa);
 }
-- 
2.30.2



[dpdk-dev] [PATCH 05/11] net/sfc: fix missing const of SW stats descriptions

2021-09-28 Thread Andrew Rybchenko
From: Ivan Ilchenko 

Description is a global variable shared by all adapters and must
not be modified.

Fixes: fdd7719eb3c ("net/sfc: add xstats for Rx/Tx doorbells")
Cc: sta...@dpdk.org

Signed-off-by: Ivan Ilchenko 
Signed-off-by: Andrew Rybchenko 
---
 drivers/net/sfc/sfc_sw_stats.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/sfc/sfc_sw_stats.c b/drivers/net/sfc/sfc_sw_stats.c
index c297bb8294..de99e1cfaf 100644
--- a/drivers/net/sfc/sfc_sw_stats.c
+++ b/drivers/net/sfc/sfc_sw_stats.c
@@ -49,7 +49,7 @@ sfc_get_sw_stat_val_tx_dbells(struct sfc_adapter *sa, 
uint16_t qid)
return 0;
 }
 
-struct sfc_sw_stat_descr sfc_sw_stats_descr[] = {
+const struct sfc_sw_stat_descr sfc_sw_stats_descr[] = {
{
.name = "dbells",
.type = SFC_SW_STATS_RX,
@@ -452,7 +452,8 @@ sfc_sw_xstats_get_names_by_id(struct sfc_adapter *sa,
 }
 
 static void
-sfc_sw_xstat_reset(struct sfc_adapter *sa, struct sfc_sw_stat_descr *sw_stat,
+sfc_sw_xstat_reset(struct sfc_adapter *sa,
+  const struct sfc_sw_stat_descr *sw_stat,
   uint64_t *reset_vals)
 {
unsigned int nb_queues;
@@ -483,7 +484,7 @@ void
 sfc_sw_xstats_reset(struct sfc_adapter *sa)
 {
uint64_t *reset_vals = sa->sw_stats.reset_vals;
-   struct sfc_sw_stat_descr *sw_stat;
+   const struct sfc_sw_stat_descr *sw_stat;
unsigned int i;
 
SFC_ASSERT(sfc_adapter_is_locked(sa));
-- 
2.30.2



[dpdk-dev] [PATCH 06/11] net/sfc: optimize getting number of SW stats

2021-09-28 Thread Andrew Rybchenko
From: Ivan Ilchenko 

Optimize getting number of SW stats by caching the
value during device configure since it's the only
place it may change.

Signed-off-by: Ivan Ilchenko 
Signed-off-by: Andrew Rybchenko 
---
 drivers/net/sfc/sfc.h  |  2 ++
 drivers/net/sfc/sfc_sw_stats.c | 14 --
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/net/sfc/sfc.h b/drivers/net/sfc/sfc.h
index 93d5202a24..b9ff8baed2 100644
--- a/drivers/net/sfc/sfc.h
+++ b/drivers/net/sfc/sfc.h
@@ -218,6 +218,8 @@ struct sfc_counter_rxq {
 };
 
 struct sfc_sw_stats {
+   /* Number extended statistics provided by SW stats */
+   unsigned intxstats_count;
uint64_t*reset_vals;
 
rte_spinlock_t  queues_bitmap_lock;
diff --git a/drivers/net/sfc/sfc_sw_stats.c b/drivers/net/sfc/sfc_sw_stats.c
index de99e1cfaf..0f93091500 100644
--- a/drivers/net/sfc/sfc_sw_stats.c
+++ b/drivers/net/sfc/sfc_sw_stats.c
@@ -329,17 +329,8 @@ sfc_sw_xstat_get_values_by_id(struct sfc_adapter *sa,
 unsigned int
 sfc_sw_xstats_get_nb_supported(struct sfc_adapter *sa)
 {
-   unsigned int nb_supported = 0;
-   unsigned int i;
-
SFC_ASSERT(sfc_adapter_is_locked(sa));
-
-   for (i = 0; i < RTE_DIM(sfc_sw_stats_descr); i++) {
-   nb_supported += sfc_sw_xstat_get_nb_supported(sa,
-   &sfc_sw_stats_descr[i]);
-   }
-
-   return nb_supported;
+   return sa->sw_stats.xstats_count;
 }
 
 void
@@ -506,6 +497,7 @@ sfc_sw_xstats_configure(struct sfc_adapter *sa)
for (i = 0; i < RTE_DIM(sfc_sw_stats_descr); i++)
nb_supported += sfc_sw_xstat_get_nb_supported(sa,
&sfc_sw_stats_descr[i]);
+   sa->sw_stats.xstats_count = nb_supported;
 
*reset_vals = rte_realloc(*reset_vals,
  nb_supported * sizeof(**reset_vals), 0);
@@ -559,6 +551,7 @@ sfc_sw_xstats_alloc_queues_bitmap(struct sfc_adapter *sa)
 int
 sfc_sw_xstats_init(struct sfc_adapter *sa)
 {
+   sa->sw_stats.xstats_count = 0;
sa->sw_stats.reset_vals = NULL;
 
return sfc_sw_xstats_alloc_queues_bitmap(sa);
@@ -570,4 +563,5 @@ sfc_sw_xstats_close(struct sfc_adapter *sa)
sfc_sw_xstats_free_queues_bitmap(sa);
rte_free(sa->sw_stats.reset_vals);
sa->sw_stats.reset_vals = NULL;
+   sa->sw_stats.xstats_count = 0;
 }
-- 
2.30.2



[dpdk-dev] [PATCH 07/11] net/sfc: prepare having no some SW stats on an adapter

2021-09-28 Thread Andrew Rybchenko
From: Ivan Ilchenko 

Global description structure of SW stats is used currently.
Following patches introduce SW stats that may be unavailable
for some adapters, so add per-adapter descriptions to safely
work with multiple adapters.

Signed-off-by: Ivan Ilchenko 
Signed-off-by: Andrew Rybchenko 
---
 drivers/net/sfc/sfc.h  |  7 
 drivers/net/sfc/sfc_sw_stats.c | 65 +-
 2 files changed, 55 insertions(+), 17 deletions(-)

diff --git a/drivers/net/sfc/sfc.h b/drivers/net/sfc/sfc.h
index b9ff8baed2..5a40a73c7f 100644
--- a/drivers/net/sfc/sfc.h
+++ b/drivers/net/sfc/sfc.h
@@ -217,9 +217,16 @@ struct sfc_counter_rxq {
struct rte_mempool  *mp;
 };
 
+struct sfc_sw_stat_data {
+   const struct sfc_sw_stat_descr *descr;
+};
+
 struct sfc_sw_stats {
/* Number extended statistics provided by SW stats */
unsigned intxstats_count;
+   /* Supported SW statistics */
+   struct sfc_sw_stat_data *supp;
+   unsigned intsupp_count;
uint64_t*reset_vals;
 
rte_spinlock_t  queues_bitmap_lock;
diff --git a/drivers/net/sfc/sfc_sw_stats.c b/drivers/net/sfc/sfc_sw_stats.c
index 0f93091500..a9ab29064a 100644
--- a/drivers/net/sfc/sfc_sw_stats.c
+++ b/drivers/net/sfc/sfc_sw_stats.c
@@ -341,6 +341,7 @@ sfc_sw_xstats_get_vals(struct sfc_adapter *sa,
   unsigned int *nb_supported)
 {
uint64_t *reset_vals = sa->sw_stats.reset_vals;
+   struct sfc_sw_stats *sw_stats = &sa->sw_stats;
unsigned int sw_xstats_offset;
unsigned int i;
 
@@ -348,8 +349,8 @@ sfc_sw_xstats_get_vals(struct sfc_adapter *sa,
 
sw_xstats_offset = *nb_supported;
 
-   for (i = 0; i < RTE_DIM(sfc_sw_stats_descr); i++) {
-   sfc_sw_xstat_get_values(sa, &sfc_sw_stats_descr[i], xstats,
+   for (i = 0; i < sw_stats->xstats_count; i++) {
+   sfc_sw_xstat_get_values(sa, sw_stats->supp[i].descr, xstats,
xstats_count, nb_written, nb_supported);
}
 
@@ -366,13 +367,14 @@ sfc_sw_xstats_get_names(struct sfc_adapter *sa,
unsigned int *nb_written,
unsigned int *nb_supported)
 {
+   struct sfc_sw_stats *sw_stats = &sa->sw_stats;
unsigned int i;
int ret;
 
sfc_adapter_lock(sa);
 
-   for (i = 0; i < RTE_DIM(sfc_sw_stats_descr); i++) {
-   ret = sfc_sw_stat_get_names(sa, &sfc_sw_stats_descr[i],
+   for (i = 0; i < sw_stats->supp_count; i++) {
+   ret = sfc_sw_stat_get_names(sa, sw_stats->supp[i].descr,
xstats_names, xstats_count,
nb_written, nb_supported);
if (ret != 0) {
@@ -394,6 +396,7 @@ sfc_sw_xstats_get_vals_by_id(struct sfc_adapter *sa,
 unsigned int *nb_supported)
 {
uint64_t *reset_vals = sa->sw_stats.reset_vals;
+   struct sfc_sw_stats *sw_stats = &sa->sw_stats;
unsigned int sw_xstats_offset;
unsigned int i;
 
@@ -401,8 +404,8 @@ sfc_sw_xstats_get_vals_by_id(struct sfc_adapter *sa,
 
sw_xstats_offset = *nb_supported;
 
-   for (i = 0; i < RTE_DIM(sfc_sw_stats_descr); i++) {
-   sfc_sw_xstat_get_values_by_id(sa, &sfc_sw_stats_descr[i], ids,
+   for (i = 0; i < sw_stats->supp_count; i++) {
+   sfc_sw_xstat_get_values_by_id(sa, sw_stats->supp[i].descr, ids,
  values, n, nb_supported);
}
 
@@ -421,13 +424,14 @@ sfc_sw_xstats_get_names_by_id(struct sfc_adapter *sa,
  unsigned int size,
  unsigned int *nb_supported)
 {
+   struct sfc_sw_stats *sw_stats = &sa->sw_stats;
unsigned int i;
int ret;
 
sfc_adapter_lock(sa);
 
-   for (i = 0; i < RTE_DIM(sfc_sw_stats_descr); i++) {
-   ret = sfc_sw_xstat_get_names_by_id(sa, &sfc_sw_stats_descr[i],
+   for (i = 0; i < sw_stats->supp_count; i++) {
+   ret = sfc_sw_xstat_get_names_by_id(sa, sw_stats->supp[i].descr,
   ids, xstats_names, size,
   nb_supported);
if (ret != 0) {
@@ -475,15 +479,15 @@ void
 sfc_sw_xstats_reset(struct sfc_adapter *sa)
 {
uint64_t *reset_vals = sa->sw_stats.reset_vals;
-   const struct sfc_sw_stat_descr *sw_stat;
+   struct sfc_sw_stats *sw_stats = &sa->sw_stats;
unsigned int i;
 
SFC_ASSERT(sfc_adapter_is_locked(sa));
 
-   for (i = 0; i < RTE_DIM(sfc_sw_stats_descr); i++) {
-   sw_stat = &sfc_sw_stats_descr[i];
-   sfc_sw_xstat_reset(sa, sw_stat, reset_vals);
-   reset_vals += sfc_sw_xstat_get_nb_supported(sa, sw_stat);
+   for (i 

[dpdk-dev] [PATCH 08/11] net/sfc: add toggle to disable total stat

2021-09-28 Thread Andrew Rybchenko
From: Ivan Ilchenko 

Add toggle to disable total SW stat. This is useful
for per-queue 'packets' and 'bytes' to not conflict
with corresponding basic stats. These stats will be
added in the following patches.

Signed-off-by: Ivan Ilchenko 
Signed-off-by: Andrew Rybchenko 
---
 drivers/net/sfc/sfc_sw_stats.c | 54 +-
 1 file changed, 34 insertions(+), 20 deletions(-)

diff --git a/drivers/net/sfc/sfc_sw_stats.c b/drivers/net/sfc/sfc_sw_stats.c
index a9ab29064a..63fc334d2b 100644
--- a/drivers/net/sfc/sfc_sw_stats.c
+++ b/drivers/net/sfc/sfc_sw_stats.c
@@ -21,6 +21,7 @@ struct sfc_sw_stat_descr {
const char *name;
enum sfc_sw_stats_type type;
sfc_get_sw_stat_val_t *get_val;
+   bool provide_total;
 };
 
 static sfc_get_sw_stat_val_t sfc_get_sw_stat_val_rx_dbells;
@@ -54,11 +55,13 @@ const struct sfc_sw_stat_descr sfc_sw_stats_descr[] = {
.name = "dbells",
.type = SFC_SW_STATS_RX,
.get_val  = sfc_get_sw_stat_val_rx_dbells,
+   .provide_total = true,
},
{
.name = "dbells",
.type = SFC_SW_STATS_TX,
.get_val  = sfc_get_sw_stat_val_tx_dbells,
+   .provide_total = true,
}
 };
 
@@ -83,7 +86,7 @@ sfc_sw_stat_get_name(struct sfc_adapter *sa,
return -EINVAL;
}
 
-   if (id_off == 0) {
+   if (sw_stat->provide_total && id_off == 0) {
ret = snprintf(name, name_size, "%s_%s", prefix,
 sw_stat->name);
if (ret < 0 || ret >= (int)name_size) {
@@ -92,7 +95,7 @@ sfc_sw_stat_get_name(struct sfc_adapter *sa,
return ret > 0 ? -EINVAL : ret;
}
} else {
-   uint16_t qid = id_off - 1;
+   uint16_t qid = id_off - sw_stat->provide_total;
ret = snprintf(name, name_size, "%s_q%u_%s", prefix, qid,
sw_stat->name);
if (ret < 0 || ret >= (int)name_size) {
@@ -124,10 +127,11 @@ sfc_sw_stat_get_queue_count(struct sfc_adapter *sa,
 }
 
 static unsigned int
-sfc_sw_xstat_per_queue_get_count(unsigned int nb_queues)
+sfc_sw_xstat_per_queue_get_count(const struct sfc_sw_stat_descr *sw_stat,
+unsigned int nb_queues)
 {
/* Take into account the total xstat of all queues */
-   return nb_queues > 0 ? 1 + nb_queues : 0;
+   return nb_queues > 0 ? sw_stat->provide_total + nb_queues : 0;
 }
 
 static unsigned int
@@ -137,7 +141,7 @@ sfc_sw_xstat_get_nb_supported(struct sfc_adapter *sa,
unsigned int nb_queues;
 
nb_queues = sfc_sw_stat_get_queue_count(sa, sw_stat);
-   return sfc_sw_xstat_per_queue_get_count(nb_queues);
+   return sfc_sw_xstat_per_queue_get_count(sw_stat, nb_queues);
 }
 
 static int
@@ -157,13 +161,13 @@ sfc_sw_stat_get_names(struct sfc_adapter *sa,
nb_queues = sfc_sw_stat_get_queue_count(sa, sw_stat);
if (nb_queues == 0)
return 0;
-   *nb_supported += sfc_sw_xstat_per_queue_get_count(nb_queues);
+   *nb_supported += sfc_sw_xstat_per_queue_get_count(sw_stat, nb_queues);
 
/*
 * The order of each software xstat type is the total xstat
 * followed by per-queue xstats.
 */
-   if (*nb_written < xstats_names_sz) {
+   if (*nb_written < xstats_names_sz && sw_stat->provide_total) {
rc = sfc_sw_stat_get_name(sa, sw_stat,
  xstats_names[*nb_written].name,
  name_size, *nb_written - id_base);
@@ -196,6 +200,7 @@ sfc_sw_xstat_get_names_by_id(struct sfc_adapter *sa,
 {
const size_t name_size = sizeof(xstats_names[0].name);
unsigned int id_base = *nb_supported;
+   unsigned int id_end;
unsigned int nb_queues;
unsigned int i;
int rc;
@@ -203,14 +208,15 @@ sfc_sw_xstat_get_names_by_id(struct sfc_adapter *sa,
nb_queues = sfc_sw_stat_get_queue_count(sa, sw_stat);
if (nb_queues == 0)
return 0;
-   *nb_supported += sfc_sw_xstat_per_queue_get_count(nb_queues);
+   *nb_supported += sfc_sw_xstat_per_queue_get_count(sw_stat, nb_queues);
 
/*
 * The order of each software xstat type is the total xstat
 * followed by per-queue xstats.
 */
+   id_end = id_base + sw_stat->provide_total + nb_queues;
for (i = 0; i < size; i++) {
-   if (id_base <= ids[i] && ids[i] <= id_base + nb_queues) {
+   if (id_base <= ids[i] && ids[i] < id_end) {
rc = sfc_sw_stat_get_name(sa, sw_stat,
  xstats_names[i].name,
  name_size, ids[i] - id_base);
@@ -239,13 +245,13 @@ sfc_sw_xstat_get_values(st

[dpdk-dev] [PATCH 09/11] net/sfc: add support for SW stats groups

2021-09-28 Thread Andrew Rybchenko
From: Ivan Ilchenko 

Add support for grouping SW stats together. When stats are
grouped the corresponding stats values for each queue
are obtained during calling one read callback. This is useful
to group per-queue stats 'packets' and 'bytes' to keep stats
consistent, i.e. a number of bytes corresponds to a number of
packets. These stats will be added in the following patches.

Signed-off-by: Ivan Ilchenko 
Signed-off-by: Andrew Rybchenko 
---
 drivers/net/sfc/sfc.h  |   8 ++
 drivers/net/sfc/sfc_sw_stats.c | 153 -
 2 files changed, 138 insertions(+), 23 deletions(-)

diff --git a/drivers/net/sfc/sfc.h b/drivers/net/sfc/sfc.h
index 5a40a73c7f..30679014e3 100644
--- a/drivers/net/sfc/sfc.h
+++ b/drivers/net/sfc/sfc.h
@@ -30,6 +30,7 @@
 #include "sfc_sriov.h"
 #include "sfc_mae.h"
 #include "sfc_dp.h"
+#include "sfc_sw_stats.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -219,6 +220,8 @@ struct sfc_counter_rxq {
 
 struct sfc_sw_stat_data {
const struct sfc_sw_stat_descr *descr;
+   /* Cache fragment */
+   uint64_t*cache;
 };
 
 struct sfc_sw_stats {
@@ -227,6 +230,11 @@ struct sfc_sw_stats {
/* Supported SW statistics */
struct sfc_sw_stat_data *supp;
unsigned intsupp_count;
+
+   /* Cache for all supported SW statistics */
+   uint64_t*cache;
+   unsigned intcache_count;
+
uint64_t*reset_vals;
 
rte_spinlock_t  queues_bitmap_lock;
diff --git a/drivers/net/sfc/sfc_sw_stats.c b/drivers/net/sfc/sfc_sw_stats.c
index 63fc334d2b..81bd531a17 100644
--- a/drivers/net/sfc/sfc_sw_stats.c
+++ b/drivers/net/sfc/sfc_sw_stats.c
@@ -10,12 +10,17 @@
 #include "sfc_tx.h"
 #include "sfc_sw_stats.h"
 
+#define SFC_SW_STAT_INVALIDUINT64_MAX
+
+#define SFC_SW_STATS_GROUP_SIZE_MAX1U
+
 enum sfc_sw_stats_type {
SFC_SW_STATS_RX,
SFC_SW_STATS_TX,
 };
 
-typedef uint64_t sfc_get_sw_stat_val_t(struct sfc_adapter *sa, uint16_t qid);
+typedef void sfc_get_sw_stat_val_t(struct sfc_adapter *sa, uint16_t qid,
+  uint64_t *values, unsigned int values_count);
 
 struct sfc_sw_stat_descr {
const char *name;
@@ -25,31 +30,41 @@ struct sfc_sw_stat_descr {
 };
 
 static sfc_get_sw_stat_val_t sfc_get_sw_stat_val_rx_dbells;
-static uint64_t
-sfc_get_sw_stat_val_rx_dbells(struct sfc_adapter *sa, uint16_t qid)
+static void
+sfc_get_sw_stat_val_rx_dbells(struct sfc_adapter *sa, uint16_t qid,
+  uint64_t *values, unsigned int values_count)
 {
struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
struct sfc_rxq_info *rxq_info;
 
+   RTE_SET_USED(values_count);
+   SFC_ASSERT(values_count == 1);
rxq_info = sfc_rxq_info_by_ethdev_qid(sas, qid);
-   if (rxq_info->state & SFC_RXQ_INITIALIZED)
-   return rxq_info->dp->dpq.rx_dbells;
-   return 0;
+   values[0] = rxq_info->state & SFC_RXQ_INITIALIZED ?
+   rxq_info->dp->dpq.rx_dbells : 0;
 }
 
 static sfc_get_sw_stat_val_t sfc_get_sw_stat_val_tx_dbells;
-static uint64_t
-sfc_get_sw_stat_val_tx_dbells(struct sfc_adapter *sa, uint16_t qid)
+static void
+sfc_get_sw_stat_val_tx_dbells(struct sfc_adapter *sa, uint16_t qid,
+  uint64_t *values, unsigned int values_count)
 {
struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
struct sfc_txq_info *txq_info;
 
+   RTE_SET_USED(values_count);
+   SFC_ASSERT(values_count == 1);
txq_info = sfc_txq_info_by_ethdev_qid(sas, qid);
-   if (txq_info->state & SFC_TXQ_INITIALIZED)
-   return txq_info->dp->dpq.tx_dbells;
-   return 0;
+   values[0] = txq_info->state & SFC_TXQ_INITIALIZED ?
+   txq_info->dp->dpq.tx_dbells : 0;
 }
 
+/*
+ * SW stats can be grouped together. When stats are grouped the corresponding
+ * stats values for each queue are obtained during calling one get value
+ * callback. Stats of the same group are contiguous in the structure below.
+ * The start of the group is denoted by stat implementing get value callback.
+ */
 const struct sfc_sw_stat_descr sfc_sw_stats_descr[] = {
{
.name = "dbells",
@@ -228,9 +243,53 @@ sfc_sw_xstat_get_names_by_id(struct sfc_adapter *sa,
return 0;
 }
 
+static uint64_t
+sfc_sw_stat_get_val(struct sfc_adapter *sa,
+   unsigned int sw_stat_idx, uint16_t qid)
+{
+   struct sfc_sw_stats *sw_stats = &sa->sw_stats;
+   uint64_t *res = &sw_stats->supp[sw_stat_idx].cache[qid];
+   uint64_t values[SFC_SW_STATS_GROUP_SIZE_MAX];
+   unsigned int group_start_idx;
+   unsigned int group_size;
+   unsigned int i;
+
+   if (*res != SFC_SW_STAT_INVALID)
+   return *res;
+
+   /*
+* Search for the group start, i.e. the stat that implements
+

[dpdk-dev] [PATCH 10/11] net/sfc: collect per queue stats in EF100 Rx datapath

2021-09-28 Thread Andrew Rybchenko
From: Ivan Ilchenko 

If Rx datapath collects per queue statistics, use these stats
to provide ipackets and ibytes in basic ethdev stats.

Signed-off-by: Andrew Rybchenko 
Signed-off-by: Ivan Ilchenko 
---
 drivers/net/sfc/sfc.h  |   3 +
 drivers/net/sfc/sfc_dp.h   |   2 +
 drivers/net/sfc/sfc_dp_rx.h|   1 +
 drivers/net/sfc/sfc_ef100_rx.c |  10 ++-
 drivers/net/sfc/sfc_ethdev.c   |  72 -
 drivers/net/sfc/sfc_sw_stats.c | 115 -
 6 files changed, 182 insertions(+), 21 deletions(-)

diff --git a/drivers/net/sfc/sfc.h b/drivers/net/sfc/sfc.h
index 30679014e3..30bd109e8b 100644
--- a/drivers/net/sfc/sfc.h
+++ b/drivers/net/sfc/sfc.h
@@ -236,6 +236,9 @@ struct sfc_sw_stats {
unsigned intcache_count;
 
uint64_t*reset_vals;
+   /* Location of per-queue reset values for packets/bytes in reset_vals */
+   uint64_t*reset_rx_pkts;
+   uint64_t*reset_rx_bytes;
 
rte_spinlock_t  queues_bitmap_lock;
void*queues_bitmap_mem;
diff --git a/drivers/net/sfc/sfc_dp.h b/drivers/net/sfc/sfc_dp.h
index 7fd8f34b0f..2edde61a68 100644
--- a/drivers/net/sfc/sfc_dp.h
+++ b/drivers/net/sfc/sfc_dp.h
@@ -16,6 +16,7 @@
 #include 
 
 #include "sfc_log.h"
+#include "sfc_stats.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -49,6 +50,7 @@ struct sfc_dp_queue {
 * put stats at top of the structure to be closer to fields
 * used on datapath or reap to have more chances to be cache-hot.
 */
+   union sfc_pkts_bytesstats;
uint32_trx_dbells;
uint32_ttx_dbells;
 
diff --git a/drivers/net/sfc/sfc_dp_rx.h b/drivers/net/sfc/sfc_dp_rx.h
index b6c44085ce..d037acaa56 100644
--- a/drivers/net/sfc/sfc_dp_rx.h
+++ b/drivers/net/sfc/sfc_dp_rx.h
@@ -216,6 +216,7 @@ struct sfc_dp_rx {
 #define SFC_DP_RX_FEAT_FLOW_FLAG   0x2
 #define SFC_DP_RX_FEAT_FLOW_MARK   0x4
 #define SFC_DP_RX_FEAT_INTR0x8
+#define SFC_DP_RX_FEAT_STATS   0x10
/**
 * Rx offload capabilities supported by the datapath on device
 * level only if HW/FW supports it.
diff --git a/drivers/net/sfc/sfc_ef100_rx.c b/drivers/net/sfc/sfc_ef100_rx.c
index 1bf04f565a..391c52487d 100644
--- a/drivers/net/sfc/sfc_ef100_rx.c
+++ b/drivers/net/sfc/sfc_ef100_rx.c
@@ -525,10 +525,13 @@ sfc_ef100_rx_process_ready_pkts(struct sfc_ef100_rxq *rxq,
lastseg = seg;
}
 
-   if (likely(deliver))
+   if (likely(deliver)) {
*rx_pkts++ = pkt;
-   else
+   sfc_pkts_bytes_add(&rxq->dp.dpq.stats, 1,
+  rte_pktmbuf_pkt_len(pkt));
+   } else {
rte_pktmbuf_free(pkt);
+   }
}
 
return rx_pkts;
@@ -914,7 +917,8 @@ struct sfc_dp_rx sfc_ef100_rx = {
.hw_fw_caps = SFC_DP_HW_FW_CAP_EF100,
},
.features   = SFC_DP_RX_FEAT_MULTI_PROCESS |
- SFC_DP_RX_FEAT_INTR,
+ SFC_DP_RX_FEAT_INTR |
+ SFC_DP_RX_FEAT_STATS,
.dev_offload_capa   = 0,
.queue_offload_capa = DEV_RX_OFFLOAD_CHECKSUM |
  DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM |
diff --git a/drivers/net/sfc/sfc_ethdev.c b/drivers/net/sfc/sfc_ethdev.c
index 2db0d000c3..20d808d15c 100644
--- a/drivers/net/sfc/sfc_ethdev.c
+++ b/drivers/net/sfc/sfc_ethdev.c
@@ -586,6 +586,33 @@ sfc_tx_queue_release(void *queue)
sfc_adapter_unlock(sa);
 }
 
+static void
+sfc_stats_get_dp_rx(struct sfc_adapter *sa, uint64_t *pkts, uint64_t *bytes)
+{
+   struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
+   uint64_t pkts_sum = 0;
+   uint64_t bytes_sum = 0;
+   unsigned int i;
+
+   for (i = 0; i < sas->ethdev_rxq_count; ++i) {
+   struct sfc_rxq_info *rxq_info;
+
+   rxq_info = sfc_rxq_info_by_ethdev_qid(sas, i);
+   if (rxq_info->state & SFC_RXQ_INITIALIZED) {
+   union sfc_pkts_bytes qstats;
+
+   sfc_pkts_bytes_get(&rxq_info->dp->dpq.stats, &qstats);
+   pkts_sum += qstats.pkts -
+   sa->sw_stats.reset_rx_pkts[i];
+   bytes_sum += qstats.bytes -
+   sa->sw_stats.reset_rx_bytes[i];
+   }
+   }
+
+   *pkts = pkts_sum;
+   *bytes = bytes_sum;
+}
+
 /*
  * Some statistics are computed as A - B where A and B each increase
  * monotonically with some hardware counter(s) and the counters are read
@@ -612,6 +639,8 @@ sfc_update_diff_stat(uint64_t *s

[dpdk-dev] [PATCH 11/11] net/sfc: collect per queue stats in EF100 Tx datapath

2021-09-28 Thread Andrew Rybchenko
From: Ivan Ilchenko 

If Tx datapath collects per queue statistics, use these stats
to provide opackets and obytes in basic ethdev stats.

Signed-off-by: Andrew Rybchenko 
Signed-off-by: Ivan Ilchenko 
---
 drivers/net/sfc/sfc.h  |  2 ++
 drivers/net/sfc/sfc_dp_tx.h|  1 +
 drivers/net/sfc/sfc_ef100_tx.c |  6 +++-
 drivers/net/sfc/sfc_ethdev.c   | 64 +-
 drivers/net/sfc/sfc_sw_stats.c | 48 +
 5 files changed, 104 insertions(+), 17 deletions(-)

diff --git a/drivers/net/sfc/sfc.h b/drivers/net/sfc/sfc.h
index 30bd109e8b..ace66d435c 100644
--- a/drivers/net/sfc/sfc.h
+++ b/drivers/net/sfc/sfc.h
@@ -239,6 +239,8 @@ struct sfc_sw_stats {
/* Location of per-queue reset values for packets/bytes in reset_vals */
uint64_t*reset_rx_pkts;
uint64_t*reset_rx_bytes;
+   uint64_t*reset_tx_pkts;
+   uint64_t*reset_tx_bytes;
 
rte_spinlock_t  queues_bitmap_lock;
void*queues_bitmap_mem;
diff --git a/drivers/net/sfc/sfc_dp_tx.h b/drivers/net/sfc/sfc_dp_tx.h
index 777807985b..184711b887 100644
--- a/drivers/net/sfc/sfc_dp_tx.h
+++ b/drivers/net/sfc/sfc_dp_tx.h
@@ -168,6 +168,7 @@ struct sfc_dp_tx {
 
unsigned intfeatures;
 #define SFC_DP_TX_FEAT_MULTI_PROCESS   0x1
+#define SFC_DP_TX_FEAT_STATS   0x2
/**
 * Tx offload capabilities supported by the datapath on device
 * level only if HW/FW supports it.
diff --git a/drivers/net/sfc/sfc_ef100_tx.c b/drivers/net/sfc/sfc_ef100_tx.c
index 522e9a0d34..fce82795cc 100644
--- a/drivers/net/sfc/sfc_ef100_tx.c
+++ b/drivers/net/sfc/sfc_ef100_tx.c
@@ -710,6 +710,9 @@ sfc_ef100_xmit_pkts(void *tx_queue, struct rte_mbuf 
**tx_pkts, uint16_t nb_pkts)
}
 
dma_desc_space -= (added - pkt_start);
+
+   sfc_pkts_bytes_add(&txq->dp.dpq.stats, 1,
+  rte_pktmbuf_pkt_len(*pktp));
}
 
if (likely(added != txq->added)) {
@@ -940,7 +943,8 @@ struct sfc_dp_tx sfc_ef100_tx = {
.type   = SFC_DP_TX,
.hw_fw_caps = SFC_DP_HW_FW_CAP_EF100,
},
-   .features   = SFC_DP_TX_FEAT_MULTI_PROCESS,
+   .features   = SFC_DP_TX_FEAT_MULTI_PROCESS |
+ SFC_DP_TX_FEAT_STATS,
.dev_offload_capa   = 0,
.queue_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT |
  DEV_TX_OFFLOAD_IPV4_CKSUM |
diff --git a/drivers/net/sfc/sfc_ethdev.c b/drivers/net/sfc/sfc_ethdev.c
index 20d808d15c..fac9b27974 100644
--- a/drivers/net/sfc/sfc_ethdev.c
+++ b/drivers/net/sfc/sfc_ethdev.c
@@ -613,6 +613,33 @@ sfc_stats_get_dp_rx(struct sfc_adapter *sa, uint64_t 
*pkts, uint64_t *bytes)
*bytes = bytes_sum;
 }
 
+static void
+sfc_stats_get_dp_tx(struct sfc_adapter *sa, uint64_t *pkts, uint64_t *bytes)
+{
+   struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
+   uint64_t pkts_sum = 0;
+   uint64_t bytes_sum = 0;
+   unsigned int i;
+
+   for (i = 0; i < sas->ethdev_txq_count; ++i) {
+   struct sfc_txq_info *txq_info;
+
+   txq_info = sfc_txq_info_by_ethdev_qid(sas, i);
+   if (txq_info->state & SFC_TXQ_INITIALIZED) {
+   union sfc_pkts_bytes qstats;
+
+   sfc_pkts_bytes_get(&txq_info->dp->dpq.stats, &qstats);
+   pkts_sum += qstats.pkts -
+   sa->sw_stats.reset_tx_pkts[i];
+   bytes_sum += qstats.bytes -
+   sa->sw_stats.reset_tx_bytes[i];
+   }
+   }
+
+   *pkts = pkts_sum;
+   *bytes = bytes_sum;
+}
+
 /*
  * Some statistics are computed as A - B where A and B each increase
  * monotonically with some hardware counter(s) and the counters are read
@@ -641,6 +668,7 @@ sfc_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats 
*stats)
 {
const struct sfc_adapter_priv *sap = sfc_adapter_priv_by_eth_dev(dev);
bool have_dp_rx_stats = sap->dp_rx->features & SFC_DP_RX_FEAT_STATS;
+   bool have_dp_tx_stats = sap->dp_tx->features & SFC_DP_TX_FEAT_STATS;
struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
struct sfc_port *port = &sa->port;
uint64_t *mac_stats;
@@ -650,6 +678,8 @@ sfc_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats 
*stats)
 
if (have_dp_rx_stats)
sfc_stats_get_dp_rx(sa, &stats->ipackets, &stats->ibytes);
+   if (have_dp_tx_stats)
+   sfc_stats_get_dp_tx(sa, &stats->opackets, &stats->obytes);
 
ret = sfc_port_update_mac_stats(sa, B_FALSE);
if (ret != 0)
@@ -672,25 +702,27 @@ sfc_stats_get(struct rte_eth_dev *dev, struct 
rte_eth_stats *sta

Re: [dpdk-dev] [PATCH v1] ethdev: introduce shared Rx queue

2021-09-28 Thread Xueming(Steven) Li
On Tue, 2021-09-28 at 15:05 +0530, Jerin Jacob wrote:
> On Sun, Sep 26, 2021 at 11:06 AM Xueming(Steven) Li  
> wrote:
> > 
> > On Wed, 2021-08-11 at 13:04 +0100, Ferruh Yigit wrote:
> > > On 8/11/2021 9:28 AM, Xueming(Steven) Li wrote:
> > > > 
> > > > 
> > > > > -Original Message-
> > > > > From: Jerin Jacob 
> > > > > Sent: Wednesday, August 11, 2021 4:03 PM
> > > > > To: Xueming(Steven) Li 
> > > > > Cc: dpdk-dev ; Ferruh Yigit ; 
> > > > > NBU-Contact-Thomas Monjalon ;
> > > > > Andrew Rybchenko 
> > > > > Subject: Re: [dpdk-dev] [PATCH v1] ethdev: introduce shared Rx queue
> > > > > 
> > > > > On Mon, Aug 9, 2021 at 7:46 PM Xueming(Steven) Li 
> > > > >  wrote:
> > > > > > 
> > > > > > Hi,
> > > > > > 
> > > > > > > -Original Message-
> > > > > > > From: Jerin Jacob 
> > > > > > > Sent: Monday, August 9, 2021 9:51 PM
> > > > > > > To: Xueming(Steven) Li 
> > > > > > > Cc: dpdk-dev ; Ferruh Yigit 
> > > > > > > ;
> > > > > > > NBU-Contact-Thomas Monjalon ; Andrew 
> > > > > > > Rybchenko
> > > > > > > 
> > > > > > > Subject: Re: [dpdk-dev] [PATCH v1] ethdev: introduce shared Rx 
> > > > > > > queue
> > > > > > > 
> > > > > > > On Mon, Aug 9, 2021 at 5:18 PM Xueming Li  
> > > > > > > wrote:
> > > > > > > > 
> > > > > > > > In current DPDK framework, each RX queue is pre-loaded with 
> > > > > > > > mbufs
> > > > > > > > for incoming packets. When number of representors scale out in a
> > > > > > > > switch domain, the memory consumption became significant. Most
> > > > > > > > important, polling all ports leads to high cache miss, high
> > > > > > > > latency and low throughput.
> > > > > > > > 
> > > > > > > > This patch introduces shared RX queue. Ports with same
> > > > > > > > configuration in a switch domain could share RX queue set by 
> > > > > > > > specifying sharing group.
> > > > > > > > Polling any queue using same shared RX queue receives packets 
> > > > > > > > from
> > > > > > > > all member ports. Source port is identified by mbuf->port.
> > > > > > > > 
> > > > > > > > Port queue number in a shared group should be identical. Queue
> > > > > > > > index is
> > > > > > > > 1:1 mapped in shared group.
> > > > > > > > 
> > > > > > > > Share RX queue is supposed to be polled on same thread.
> > > > > > > > 
> > > > > > > > Multiple groups is supported by group ID.
> > > > > > > 
> > > > > > > Is this offload specific to the representor? If so can this name 
> > > > > > > be changed specifically to representor?
> > > > > > 
> > > > > > Yes, PF and representor in switch domain could take advantage.
> > > > > > 
> > > > > > > If it is for a generic case, how the flow ordering will be 
> > > > > > > maintained?
> > > > > > 
> > > > > > Not quite sure that I understood your question. The control path of 
> > > > > > is
> > > > > > almost same as before, PF and representor port still needed, rte 
> > > > > > flows not impacted.
> > > > > > Queues still needed for each member port, descriptors(mbuf) will be
> > > > > > supplied from shared Rx queue in my PMD implementation.
> > > > > 
> > > > > My question was if create a generic RTE_ETH_RX_OFFLOAD_SHARED_RXQ 
> > > > > offload, multiple ethdev receive queues land into the same
> > > > > receive queue, In that case, how the flow order is maintained for 
> > > > > respective receive queues.
> > > > 
> > > > I guess the question is testpmd forward stream? The forwarding logic 
> > > > has to be changed slightly in case of shared rxq.
> > > > basically for each packet in rx_burst result, lookup source stream 
> > > > according to mbuf->port, forwarding to target fs.
> > > > Packets from same source port could be grouped as a small burst to 
> > > > process, this will accelerates the performance if traffic come from
> > > > limited ports. I'll introduce some common api to do shard rxq 
> > > > forwarding, call it with packets handling callback, so it suites for
> > > > all forwarding engine. Will sent patches soon.
> > > > 
> > > 
> > > All ports will put the packets in to the same queue (share queue), right? 
> > > Does
> > > this means only single core will poll only, what will happen if there are
> > > multiple cores polling, won't it cause problem?
> > > 
> > > And if this requires specific changes in the application, I am not sure 
> > > about
> > > the solution, can't this work in a transparent way to the application?
> > 
> > Discussed with Jerin, new API introduced in v3 2/8 that aggregate ports
> > in same group into one new port. Users could schedule polling on the
> > aggregated port instead of all member ports.
> 
> The v3 still has testpmd changes in fastpath. Right? IMO, For this
> feature, we should not change fastpath of testpmd
> application. Instead, testpmd can use aggregated ports probably as
> separate fwd_engine to show how to use this feature.

Good point to discuss :) There are two strategies to polling a shared
Rxq:
1. polling each member port
   All forwarding engines can be reused to work as before. 
   My 

Re: [dpdk-dev] [PATCH v1] ethdev: introduce shared Rx queue

2021-09-28 Thread Xueming(Steven) Li
On Tue, 2021-09-28 at 15:05 +0530, Jerin Jacob wrote:
> On Sun, Sep 26, 2021 at 11:06 AM Xueming(Steven) Li  
> wrote:
> > 
> > On Wed, 2021-08-11 at 13:04 +0100, Ferruh Yigit wrote:
> > > On 8/11/2021 9:28 AM, Xueming(Steven) Li wrote:
> > > > 
> > > > 
> > > > > -Original Message-
> > > > > From: Jerin Jacob 
> > > > > Sent: Wednesday, August 11, 2021 4:03 PM
> > > > > To: Xueming(Steven) Li 
> > > > > Cc: dpdk-dev ; Ferruh Yigit ; 
> > > > > NBU-Contact-Thomas Monjalon ;
> > > > > Andrew Rybchenko 
> > > > > Subject: Re: [dpdk-dev] [PATCH v1] ethdev: introduce shared Rx queue
> > > > > 
> > > > > On Mon, Aug 9, 2021 at 7:46 PM Xueming(Steven) Li 
> > > > >  wrote:
> > > > > > 
> > > > > > Hi,
> > > > > > 
> > > > > > > -Original Message-
> > > > > > > From: Jerin Jacob 
> > > > > > > Sent: Monday, August 9, 2021 9:51 PM
> > > > > > > To: Xueming(Steven) Li 
> > > > > > > Cc: dpdk-dev ; Ferruh Yigit 
> > > > > > > ;
> > > > > > > NBU-Contact-Thomas Monjalon ; Andrew 
> > > > > > > Rybchenko
> > > > > > > 
> > > > > > > Subject: Re: [dpdk-dev] [PATCH v1] ethdev: introduce shared Rx 
> > > > > > > queue
> > > > > > > 
> > > > > > > On Mon, Aug 9, 2021 at 5:18 PM Xueming Li  
> > > > > > > wrote:
> > > > > > > > 
> > > > > > > > In current DPDK framework, each RX queue is pre-loaded with 
> > > > > > > > mbufs
> > > > > > > > for incoming packets. When number of representors scale out in a
> > > > > > > > switch domain, the memory consumption became significant. Most
> > > > > > > > important, polling all ports leads to high cache miss, high
> > > > > > > > latency and low throughput.
> > > > > > > > 
> > > > > > > > This patch introduces shared RX queue. Ports with same
> > > > > > > > configuration in a switch domain could share RX queue set by 
> > > > > > > > specifying sharing group.
> > > > > > > > Polling any queue using same shared RX queue receives packets 
> > > > > > > > from
> > > > > > > > all member ports. Source port is identified by mbuf->port.
> > > > > > > > 
> > > > > > > > Port queue number in a shared group should be identical. Queue
> > > > > > > > index is
> > > > > > > > 1:1 mapped in shared group.
> > > > > > > > 
> > > > > > > > Share RX queue is supposed to be polled on same thread.
> > > > > > > > 
> > > > > > > > Multiple groups is supported by group ID.
> > > > > > > 
> > > > > > > Is this offload specific to the representor? If so can this name 
> > > > > > > be changed specifically to representor?
> > > > > > 
> > > > > > Yes, PF and representor in switch domain could take advantage.
> > > > > > 
> > > > > > > If it is for a generic case, how the flow ordering will be 
> > > > > > > maintained?
> > > > > > 
> > > > > > Not quite sure that I understood your question. The control path of 
> > > > > > is
> > > > > > almost same as before, PF and representor port still needed, rte 
> > > > > > flows not impacted.
> > > > > > Queues still needed for each member port, descriptors(mbuf) will be
> > > > > > supplied from shared Rx queue in my PMD implementation.
> > > > > 
> > > > > My question was if create a generic RTE_ETH_RX_OFFLOAD_SHARED_RXQ 
> > > > > offload, multiple ethdev receive queues land into the same
> > > > > receive queue, In that case, how the flow order is maintained for 
> > > > > respective receive queues.
> > > > 
> > > > I guess the question is testpmd forward stream? The forwarding logic 
> > > > has to be changed slightly in case of shared rxq.
> > > > basically for each packet in rx_burst result, lookup source stream 
> > > > according to mbuf->port, forwarding to target fs.
> > > > Packets from same source port could be grouped as a small burst to 
> > > > process, this will accelerates the performance if traffic come from
> > > > limited ports. I'll introduce some common api to do shard rxq 
> > > > forwarding, call it with packets handling callback, so it suites for
> > > > all forwarding engine. Will sent patches soon.
> > > > 
> > > 
> > > All ports will put the packets in to the same queue (share queue), right? 
> > > Does
> > > this means only single core will poll only, what will happen if there are
> > > multiple cores polling, won't it cause problem?
> > > 
> > > And if this requires specific changes in the application, I am not sure 
> > > about
> > > the solution, can't this work in a transparent way to the application?
> > 
> > Discussed with Jerin, new API introduced in v3 2/8 that aggregate ports
> > in same group into one new port. Users could schedule polling on the
> > aggregated port instead of all member ports.
> 
> The v3 still has testpmd changes in fastpath. Right? IMO, For this
> feature, we should not change fastpath of testpmd
> application. Instead, testpmd can use aggregated ports probably as
> separate fwd_engine to show how to use this feature.

Good point to discuss :) There are two strategies to polling a shared
Rxq:
1. polling each member port
   All forwarding engines can be reused to work as before. 
   My 

Re: [dpdk-dev] [PATCH v5 2/2] examples/vhost: use API to check inflight packets

2021-09-28 Thread Ding, Xuan
Hi Kevin,

> -Original Message-
> From: Kevin Traynor 
> Sent: Tuesday, September 28, 2021 5:18 PM
> To: Ding, Xuan ; dev@dpdk.org;
> maxime.coque...@redhat.com; Xia, Chenbo 
> Cc: Hu, Jiayu ; Jiang, Cheng1 ;
> Richardson, Bruce ; Pai G, Sunil
> ; Wang, Yinan ; Yang, YvonneX
> 
> Subject: Re: [PATCH v5 2/2] examples/vhost: use API to check inflight packets
> 
> On 28/09/2021 07:24, Xuan Ding wrote:
> > In async data path, call rte_vhost_async_get_inflight_thread_unsafe()
> > API to directly return the number of inflight packets instead of
> > maintaining a local variable.
> >
> > Signed-off-by: Xuan Ding 
> > ---
> >   examples/vhost/main.c | 25 +++--
> >   examples/vhost/main.h |  1 -
> >   2 files changed, 11 insertions(+), 15 deletions(-)
> >
> > diff --git a/examples/vhost/main.c b/examples/vhost/main.c
> > index d0bf1f31e3..3faac6d053 100644
> > --- a/examples/vhost/main.c
> > +++ b/examples/vhost/main.c
> > @@ -842,11 +842,8 @@ complete_async_pkts(struct vhost_dev *vdev)
> >
> > complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
> > VIRTIO_RXQ, p_cpl, MAX_PKT_BURST);
> > -   if (complete_count) {
> > +   if (complete_count)
> > free_pkts(p_cpl, complete_count);
> > -   __atomic_sub_fetch(&vdev->pkts_inflight, complete_count,
> __ATOMIC_SEQ_CST);
> > -   }
> > -
> >   }
> >
> >   static __rte_always_inline void
> > @@ -886,7 +883,6 @@ drain_vhost(struct vhost_dev *vdev)
> >
> > complete_async_pkts(vdev);
> > ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ,
> m, nr_xmit);
> > -   __atomic_add_fetch(&vdev->pkts_inflight, ret,
> __ATOMIC_SEQ_CST);
> >
> > enqueue_fail = nr_xmit - ret;
> > if (enqueue_fail)
> > @@ -1212,7 +1208,6 @@ drain_eth_rx(struct vhost_dev *vdev)
> > complete_async_pkts(vdev);
> > enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
> > VIRTIO_RXQ, pkts, rx_count);
> > -   __atomic_add_fetch(&vdev->pkts_inflight, enqueue_count,
> __ATOMIC_SEQ_CST);
> >
> > enqueue_fail = rx_count - enqueue_count;
> > if (enqueue_fail)
> > @@ -1338,6 +1333,7 @@ destroy_device(int vid)
> > struct vhost_dev *vdev = NULL;
> > int lcore;
> > uint16_t i;
> 
> > +   int pkts_inflight;
> 
> You can move this down to the block it is used in

Thanks for the suggestion.
I consider calling the unsafe API in while (condition), and there is no need to 
define this variable.

> 
> >
> > TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
> > if (vdev->vid == vid)
> > @@ -1384,13 +1380,13 @@ destroy_device(int vid)
> >
> > if (async_vhost_driver) {
> > uint16_t n_pkt = 0;
> > -   struct rte_mbuf *m_cpl[vdev->pkts_inflight];
> > +   pkts_inflight = rte_vhost_async_get_inflight_thread_unsafe(vid,
> VIRTIO_RXQ);
> > +   struct rte_mbuf *m_cpl[pkts_inflight];
> >
> > -   while (vdev->pkts_inflight) {
> > +   while (pkts_inflight) {
> > n_pkt = rte_vhost_clear_queue_thread_unsafe(vid,
> VIRTIO_RXQ,
> > -   m_cpl, vdev->pkts_inflight);
> > +   m_cpl, pkts_inflight);
> > free_pkts(m_cpl, n_pkt);
> > -   __atomic_sub_fetch(&vdev->pkts_inflight, n_pkt,
> __ATOMIC_SEQ_CST);
> 
> This is an infinite loop if there are pkts_inflight, need to recheck
> pkts_inflight in the loop.

Thanks for the catch, will call the unsafe API directly in the while 
(condition).

> 
> > }
> >
> > rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
> > @@ -1486,6 +1482,7 @@ static int
> >   vring_state_changed(int vid, uint16_t queue_id, int enable)
> >   {
> > struct vhost_dev *vdev = NULL;
> > +   int pkts_inflight;
> >
> > TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
> > if (vdev->vid == vid)
> > @@ -1500,13 +1497,13 @@ vring_state_changed(int vid, uint16_t queue_id,
> int enable)
> > if (async_vhost_driver) {
> > if (!enable) {
> > uint16_t n_pkt = 0;
> > -   struct rte_mbuf *m_cpl[vdev->pkts_inflight];
> > +   pkts_inflight =
> rte_vhost_async_get_inflight_thread_unsafe(vid, queue_id);
> > +   struct rte_mbuf *m_cpl[pkts_inflight];
> >
> > -   while (vdev->pkts_inflight) {
> > +   while (pkts_inflight) {
> > n_pkt =
> rte_vhost_clear_queue_thread_unsafe(vid, queue_id,
> > -   m_cpl, vdev-
> >pkts_inflight);
> > +   m_cpl, pkts_inflight);
> > free_pkts(m_cpl, n_pkt);
> > -   __atomic_sub_fetch(&vdev->pkts_inflight,
> n_pk

Re: [dpdk-dev] [PATCH] app/testpmd: update raw flow to take hex input

2021-09-28 Thread Nipun Gupta
Please ignore this patch for now.

Regards,
Nipun

> -Original Message-
> From: nipun.gu...@nxp.com 
> Sent: Tuesday, September 28, 2021 4:39 PM
> To: dev@dpdk.org
> Cc: xiaoyun...@intel.com; or...@nvidia.com; tho...@monjalon.net;
> ferruh.yi...@intel.com; Hemant Agrawal ; Nipun
> Gupta 
> Subject: [PATCH] app/testpmd: update raw flow to take hex input
> 
> From: Nipun Gupta 
> 
> This patch enables method to provide key and mask for raw rules
> to be provided as hexadecimal values. There is new parameter
> pattern_mask added to support this.
> 
> Signed-off-by: Nipun Gupta 
> ---
>  app/test-pmd/cmdline_flow.c | 15 +++
>  doc/guides/testpmd_app_ug/testpmd_funcs.rst | 13 +
>  2 files changed, 28 insertions(+)
> 
> diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
> index 6cd99bf37f..a95b147d92 100644
> --- a/app/test-pmd/cmdline_flow.c
> +++ b/app/test-pmd/cmdline_flow.c
> @@ -158,6 +158,7 @@ enum index {
>   ITEM_RAW_OFFSET,
>   ITEM_RAW_LIMIT,
>   ITEM_RAW_PATTERN,
> + ITEM_RAW_PATTERN_HEX,
>   ITEM_ETH,
>   ITEM_ETH_DST,
>   ITEM_ETH_SRC,
> @@ -1046,6 +1047,7 @@ static const enum index item_raw[] = {
>   ITEM_RAW_OFFSET,
>   ITEM_RAW_LIMIT,
>   ITEM_RAW_PATTERN,
> + ITEM_RAW_PATTERN_HEX,
>   ITEM_NEXT,
>   ZERO,
>  };
> @@ -2487,6 +2489,19 @@ static const struct token token_list[] = {
>ARGS_ENTRY_ARB(sizeof(struct rte_flow_item_raw),
>   ITEM_RAW_PATTERN_SIZE)),
>   },
> + [ITEM_RAW_PATTERN_HEX] = {
> + .name = "pattern_hex",
> + .help = "hex string to look for",
> + .next = NEXT(item_raw,
> +  NEXT_ENTRY(HEX),
> +  NEXT_ENTRY(ITEM_PARAM_IS,
> + ITEM_PARAM_SPEC,
> + ITEM_PARAM_MASK)),
> + .args = ARGS(ARGS_ENTRY(struct rte_flow_item_raw, pattern),
> +  ARGS_ENTRY(struct rte_flow_item_raw, length),
> +  ARGS_ENTRY_ARB(sizeof(struct rte_flow_item_raw),
> + ITEM_RAW_PATTERN_SIZE)),
> + },
>   [ITEM_ETH] = {
>   .name = "eth",
>   .help = "match Ethernet header",
> diff --git a/doc/guides/testpmd_app_ug/testpmd_funcs.rst
> b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
> index 4f8751be1c..3a69d37037 100644
> --- a/doc/guides/testpmd_app_ug/testpmd_funcs.rst
> +++ b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
> @@ -3637,6 +3637,7 @@ This section lists supported pattern items and their
> attributes, if any.
>- ``offset {integer}``: absolute or relative offset for pattern.
>- ``limit {unsigned}``: search area limit for start of pattern.
>- ``pattern {string}``: byte string to look for.
> +  - ``pattern_hex {string}``: byte string (provided in hexadecimal) to look 
> for.
> 
>  - ``eth``: match Ethernet header.
> 
> @@ -5036,6 +5037,18 @@ The meter policy action list: ``green -> green, yellow
> -> yellow, red -> red``.
> testpmd> create port meter 0 1 13 1 yes 0x 0 0
> testpmd> flow create 0 priority 0 ingress group 1 pattern eth / end 
> actions
> meter mtr_id 1 / end
> 
> +Sample RAW rule
> +~~~
> +
> +A RAW rule can be creted as following using ``pattern_hex`` key and mask.
> +
> +::
> +
> +testpmd> flow create 0 group 0 priority 1 ingress pattern raw relative 
> is 0
> search is 0 offset
> + is 0 limit is 0 pattern_hex spec
> 0a0a0a0a
> + pattern_hex mask
>  / end
> actions
> + queue index 4 / end
> +
>  BPF Functions
>  --
> 
> --
> 2.17.1



Re: [dpdk-dev] [PATCH v5 2/2] examples/vhost: use API to check inflight packets

2021-09-28 Thread Ding, Xuan
Hi,

> -Original Message-
> From: Ding, Xuan
> Sent: Tuesday, September 28, 2021 7:51 PM
> To: Kevin Traynor ; dev@dpdk.org;
> maxime.coque...@redhat.com; Xia, Chenbo 
> Cc: Hu, Jiayu ; Jiang, Cheng1 ;
> Richardson, Bruce ; Pai G, Sunil
> ; Wang, Yinan ; Yang,
> YvonneX 
> Subject: RE: [PATCH v5 2/2] examples/vhost: use API to check inflight packets
> 
> Hi Kevin,
> 
> > -Original Message-
> > From: Kevin Traynor 
> > Sent: Tuesday, September 28, 2021 5:18 PM
> > To: Ding, Xuan ; dev@dpdk.org;
> > maxime.coque...@redhat.com; Xia, Chenbo 
> > Cc: Hu, Jiayu ; Jiang, Cheng1 ;
> > Richardson, Bruce ; Pai G, Sunil
> > ; Wang, Yinan ; Yang,
> YvonneX
> > 
> > Subject: Re: [PATCH v5 2/2] examples/vhost: use API to check inflight 
> > packets
> >
> > On 28/09/2021 07:24, Xuan Ding wrote:
> > > In async data path, call rte_vhost_async_get_inflight_thread_unsafe()
> > > API to directly return the number of inflight packets instead of
> > > maintaining a local variable.
> > >
> > > Signed-off-by: Xuan Ding 
> > > ---
> > >   examples/vhost/main.c | 25 +++--
> > >   examples/vhost/main.h |  1 -
> > >   2 files changed, 11 insertions(+), 15 deletions(-)
> > >
> > > diff --git a/examples/vhost/main.c b/examples/vhost/main.c
> > > index d0bf1f31e3..3faac6d053 100644
> > > --- a/examples/vhost/main.c
> > > +++ b/examples/vhost/main.c
> > > @@ -842,11 +842,8 @@ complete_async_pkts(struct vhost_dev *vdev)
> > >
> > >   complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
> > >   VIRTIO_RXQ, p_cpl, 
> > > MAX_PKT_BURST);
> > > - if (complete_count) {
> > > + if (complete_count)
> > >   free_pkts(p_cpl, complete_count);
> > > - __atomic_sub_fetch(&vdev->pkts_inflight, complete_count,
> > __ATOMIC_SEQ_CST);
> > > - }
> > > -
> > >   }
> > >
> > >   static __rte_always_inline void
> > > @@ -886,7 +883,6 @@ drain_vhost(struct vhost_dev *vdev)
> > >
> > >   complete_async_pkts(vdev);
> > >   ret = rte_vhost_submit_enqueue_burst(vdev->vid, 
> > > VIRTIO_RXQ,
> > m, nr_xmit);
> > > - __atomic_add_fetch(&vdev->pkts_inflight, ret,
> > __ATOMIC_SEQ_CST);
> > >
> > >   enqueue_fail = nr_xmit - ret;
> > >   if (enqueue_fail)
> > > @@ -1212,7 +1208,6 @@ drain_eth_rx(struct vhost_dev *vdev)
> > >   complete_async_pkts(vdev);
> > >   enqueue_count = 
> > > rte_vhost_submit_enqueue_burst(vdev->vid,
> > >   VIRTIO_RXQ, pkts, rx_count);
> > > - __atomic_add_fetch(&vdev->pkts_inflight, enqueue_count,
> > __ATOMIC_SEQ_CST);
> > >
> > >   enqueue_fail = rx_count - enqueue_count;
> > >   if (enqueue_fail)
> > > @@ -1338,6 +1333,7 @@ destroy_device(int vid)
> > >   struct vhost_dev *vdev = NULL;
> > >   int lcore;
> > >   uint16_t i;
> >
> > > + int pkts_inflight;
> >
> > You can move this down to the block it is used in
> 
> Thanks for the suggestion.
> I consider calling the unsafe API in while (condition), and there is no need 
> to
> define this variable.
> 
> >
> > >
> > >   TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
> > >   if (vdev->vid == vid)
> > > @@ -1384,13 +1380,13 @@ destroy_device(int vid)
> > >
> > >   if (async_vhost_driver) {
> > >   uint16_t n_pkt = 0;
> > > - struct rte_mbuf *m_cpl[vdev->pkts_inflight];
> > > + pkts_inflight = rte_vhost_async_get_inflight_thread_unsafe(vid,
> > VIRTIO_RXQ);
> > > + struct rte_mbuf *m_cpl[pkts_inflight];
> > >
> > > - while (vdev->pkts_inflight) {
> > > + while (pkts_inflight) {
> > >   n_pkt = rte_vhost_clear_queue_thread_unsafe(vid,
> > VIRTIO_RXQ,
> > > - m_cpl, vdev->pkts_inflight);
> > > + m_cpl, pkts_inflight);
> > >   free_pkts(m_cpl, n_pkt);
> > > - __atomic_sub_fetch(&vdev->pkts_inflight, n_pkt,
> > __ATOMIC_SEQ_CST);
> >
> > This is an infinite loop if there are pkts_inflight, need to recheck
> > pkts_inflight in the loop.
> 
> Thanks for the catch, will call the unsafe API directly in the while 
> (condition).

Sorry for replying myself, as rte_mbuf *m_cpl also needs pkts_inflight here.
Will follow your suggestion, see next version, thanks!

Regards,
Xuan

> 
> >
> > >   }
> > >
> > >   rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
> > > @@ -1486,6 +1482,7 @@ static int
> > >   vring_state_changed(int vid, uint16_t queue_id, int enable)
> > >   {
> > >   struct vhost_dev *vdev = NULL;
> > > + int pkts_inflight;
> > >
> > >   TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
> > >   if (vdev->vid == vid)
> > > @@ -1500,13 +1497,13 @@ vring_state_ch

Re: [dpdk-dev] [PATCH v4 1/2] ethdev: fix docs of functions getting xstats by IDs

2021-09-28 Thread Andrew Rybchenko
Hi Olivier,

I apologize for so long delay with reply. I simply lost it from
my view.

Many thanks for review notes. See below.

On 7/26/21 1:13 PM, Olivier Matz wrote:
> Hi Andrew,
> 
> Some comments below.
> 
> On Sat, Jul 24, 2021 at 03:33:13PM +0300, Andrew Rybchenko wrote:
>> From: Ivan Ilchenko 
>>
>> Document valid combinations of input arguments in accordance with
>> current implementation in ethdev.
>>
>> Fixes: 79c913a42f0 ("ethdev: retrieve xstats by ID")
>> Cc: sta...@dpdk.org
>>
>> Signed-off-by: Ivan Ilchenko 
>> Signed-off-by: Andrew Rybchenko 
>> Reviewed-by: Andy Moreton 
>> ---
>>  lib/ethdev/rte_ethdev.h | 32 +++-
>>  1 file changed, 19 insertions(+), 13 deletions(-)
>>
>> diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
>> index d2b27c351f..b14067fe7e 100644
>> --- a/lib/ethdev/rte_ethdev.h
>> +++ b/lib/ethdev/rte_ethdev.h
>> @@ -2872,13 +2872,17 @@ int rte_eth_xstats_get(uint16_t port_id, struct 
>> rte_eth_xstat *xstats,
>>   * @param port_id
>>   *   The port identifier of the Ethernet device.
>>   * @param xstats_names
>> - *   An rte_eth_xstat_name array of at least *size* elements to
>> - *   be filled. If set to NULL, the function returns the required number
>> - *   of elements.
>> + *   Array to be filled in with names of requested device statistics.
>> + *   Must not be NULL if @p ids are specified (not NULL).
>>   * @param ids
>> - *   IDs array given by app to retrieve specific statistics
>> + *   IDs array given by app to retrieve specific statistics. May be NULL to
>> + *   retrieve names of all available statistics or, if @p xstats_names is
>> + *   NULL as well,  just a number of available statistics.
> 
> double spaces before "just"
> 
> "a number" -> "the number"?

Fixed in v5

>>   * @param size
>> - *   The size of the xstats_names array (number of elements).
>> + *   If @p ids is not NULL, number of elements in the array with requested 
>> IDs
>> + *   and number of elements in @p xstats_names to put names in. If @p ids is
>> + *   NULL, number of elements in @p xstats_names to put all available 
>> statistics
>> + *   names in.
> 
> Just a suggestion here, I feel the following description would be clearer:
> 
>   Number of elements in @p xstats_names array (if not NULL) and
>   in @p ids array (if not NULL).

I agree that it is better to avoid here details about
behaviour of one or another array pointer is NULL.
Descriptions of corresponding parameters cover it.

Fixed in v5.

>
> Shouldn't we say that it has to be 0 if both arrays are NULL?

Yes, I think it is useful. Will add in v5.

> 
> Also, the order of arguments is not the same in comment and in
> the function. I think it can make sense to align the comment
> to the prototype.

Fixed in v5.

> 
> 
>>   * @return
>>   *   - A positive value lower or equal to size: success. The return value
>>   * is the number of entries filled in the stats table.
> 
> Not seen in the patch, but right after this line, there is:
> 
>  *   - A positive value higher than size: error, the given statistics table
>  * is too small. The return value corresponds to the size that should
>  * be given to succeed. The entries in the table are not valid and
>  * shall not be used by the caller.
> 
> I wonder if it shouldn't be slighly reworded to remove 'error'. After
> all, passing NULL arrays (and size == 0) is a valid, so the return is
> not an error.

I agree that it should not be treated as an error. It is just a
special case of success when return is partially provided (just
a number of stats).

Fixed in v5.

> 
>> @@ -2886,7 +2890,7 @@ int rte_eth_xstats_get(uint16_t port_id, struct 
>> rte_eth_xstat *xstats,
>>   * is too small. The return value corresponds to the size that should
>>   * be given to succeed. The entries in the table are not valid and
>>   * shall not be used by the caller.
>> - *   - A negative value on error (invalid port id).
>> + *   - A negative value on error.
>>   */
>>  int
>>  rte_eth_xstats_get_names_by_id(uint16_t port_id,
>> @@ -2899,14 +2903,16 @@ rte_eth_xstats_get_names_by_id(uint16_t port_id,
>>   * @param port_id
>>   *   The port identifier of the Ethernet device.
>>   * @param ids
>> - *   A pointer to an ids array passed by application. This tells which
>> - *   statistics values function should retrieve. This parameter
>> - *   can be set to NULL if size is 0. In this case function will retrieve
>> - *   all available statistics.
>> + *   IDs array given by app to retrieve specific statistics. May be NULL to
>> + *   retrieve all available statistics or, if @p values is NULL as well,
>> + *   just a number of available statistics.
>>   * @param values
>> - *   A pointer to a table to be filled with device statistics values.
>> + *   Array to be filled in with requested device statistics.
>> + *   Must not be NULL if ids are specified (not NULL).
>>   * @param size
>> - *   The size of the ids array (number of elements).
>> +

Re: [dpdk-dev] [PATCH v4 2/2] ethdev: fix docs of drivers callbacks getting xstats by IDs

2021-09-28 Thread Andrew Rybchenko
On 7/26/21 1:13 PM, Olivier Matz wrote:
> On Sat, Jul 24, 2021 at 03:33:14PM +0300, Andrew Rybchenko wrote:
>> From: Ivan Ilchenko 
>>
>> Update xstats by IDs callbacks documentation in accordance with
>> ethdev usage of these callbacks. Document valid combinations of
>> input arguments to make driver implementation simpler.
>>
>> Fixes: 79c913a42f0 ("ethdev: retrieve xstats by ID")
>> Cc: sta...@dpdk.org
>>
>> Signed-off-by: Ivan Ilchenko 
>> Signed-off-by: Andrew Rybchenko 
>> Reviewed-by: Andy Moreton 
>> ---
>>  lib/ethdev/ethdev_driver.h | 43 --
>>  1 file changed, 41 insertions(+), 2 deletions(-)
>>
>> diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h
>> index 40e474aa7e..fd5b7ca550 100644
>> --- a/lib/ethdev/ethdev_driver.h
>> +++ b/lib/ethdev/ethdev_driver.h
>> @@ -187,11 +187,28 @@ typedef int (*eth_xstats_get_t)(struct rte_eth_dev 
>> *dev,
>>  struct rte_eth_xstat *stats, unsigned int n);
>>  /**< @internal Get extended stats of an Ethernet device. */
>>  
>> +/**
>> + * @internal
>> + * Get extended stats of an Ethernet device.
>> + *
>> + * @param dev
>> + *   ethdev handle of port.
>> + * @param ids
>> + *   IDs array to retrieve specific statistics. Must not be NULL.
>> + * @param values
>> + *   A pointer to a table to be filled with device statistics values.
>> + *   Must not be NULL.
>> + * @param n
>> + *   Element count in @p ids and @p values
>> + *
>> + * @return
>> + *   - A number of filled in stats.
>> + *   - A negative value on error.
>> + */
>>  typedef int (*eth_xstats_get_by_id_t)(struct rte_eth_dev *dev,
>>const uint64_t *ids,
>>uint64_t *values,
>>unsigned int n);
>> -/**< @internal Get extended stats of an Ethernet device. */
>>  
>>  /**
>>   * @internal
>> @@ -218,10 +235,32 @@ typedef int (*eth_xstats_get_names_t)(struct 
>> rte_eth_dev *dev,
>>  struct rte_eth_xstat_name *xstats_names, unsigned int size);
>>  /**< @internal Get names of extended stats of an Ethernet device. */
>>  
>> +/**
>> + * @internal
>> + * Get names of extended stats of an Ethernet device.
>> + * For name count, set @p xstats_names and @p ids to NULL.
>> + *
>> + * @param dev
>> + *   ethdev handle of port.
>> + * @param xstats_names
>> + *   An rte_eth_xstat_name array of at least *size* elements to
>> + *   be filled. Can be NULL together with @p ids to retrieve number of
>> + *   available statistics.
>> + * @param ids
>> + *   IDs array to retrieve specific statistics. Can be NULL together
>> + *   with @p xstats_names to retrieve number of available statistics.
>> + * @param size
>> + *   Size of ids and xstats_names arrays.
>> + *   Element count in @p ids and @p xstats_names
> 
> I think only the second line should be kept.

Thanks, fixed in v5.

> 
> Shouldn't we also say here that size should be 0 if arrays are NULL?

In this particular case I don't think so. ethdev layer
guarantees that the parameter is 0 if both array pointers
are NULL. PMD should not care about it.

> 
>> + *
>> + * @return
>> + *   - A number of filled in stats if both xstats_names and ids are not 
>> NULL.
>> + *   - A number of available stats if both xstats_names and ids are NULL.
>> + *   - A negative value on error.
>> + */
>>  typedef int (*eth_xstats_get_names_by_id_t)(struct rte_eth_dev *dev,
>>  struct rte_eth_xstat_name *xstats_names, const uint64_t *ids,
>>  unsigned int size);
>> -/**< @internal Get names of extended stats of an Ethernet device. */
>>  
>>  typedef int (*eth_queue_stats_mapping_set_t)(struct rte_eth_dev *dev,
>>   uint16_t queue_id,
>> -- 
>> 2.30.2
>>



[dpdk-dev] [PATCH v5 1/2] ethdev: fix docs of functions getting xstats by IDs

2021-09-28 Thread Andrew Rybchenko
From: Ivan Ilchenko 

Document valid combinations of input arguments in accordance with
current implementation in ethdev.

Fixes: 79c913a42f0 ("ethdev: retrieve xstats by ID")
Cc: sta...@dpdk.org

Signed-off-by: Ivan Ilchenko 
Signed-off-by: Andrew Rybchenko 
Reviewed-by: Andy Moreton 
---
 lib/ethdev/rte_ethdev.h | 35 +++
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index 224c6c980a..f597171c97 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -2899,21 +2899,23 @@ int rte_eth_xstats_get(uint16_t port_id, struct 
rte_eth_xstat *xstats,
  * @param port_id
  *   The port identifier of the Ethernet device.
  * @param xstats_names
- *   An rte_eth_xstat_name array of at least *size* elements to
- *   be filled. If set to NULL, the function returns the required number
- *   of elements.
- * @param ids
- *   IDs array given by app to retrieve specific statistics
+ *   Array to be filled in with names of requested device statistics.
+ *   Must not be NULL if @p ids are specified (not NULL).
  * @param size
- *   The size of the xstats_names array (number of elements).
+ *   Number of elements in @p xstats_names array (if not NULL) and in
+ *   @p ids array (if not NULL). Must be 0 if both array pointers are NULL.
+ * @param ids
+ *   IDs array given by app to retrieve specific statistics. May be NULL to
+ *   retrieve names of all available statistics or, if @p xstats_names is
+ *   NULL as well, just the number of available statistics.
  * @return
  *   - A positive value lower or equal to size: success. The return value
  * is the number of entries filled in the stats table.
- *   - A positive value higher than size: error, the given statistics table
+ *   - A positive value higher than size: success. The given statistics table
  * is too small. The return value corresponds to the size that should
  * be given to succeed. The entries in the table are not valid and
  * shall not be used by the caller.
- *   - A negative value on error (invalid port id).
+ *   - A negative value on error.
  */
 int
 rte_eth_xstats_get_names_by_id(uint16_t port_id,
@@ -2926,22 +2928,23 @@ rte_eth_xstats_get_names_by_id(uint16_t port_id,
  * @param port_id
  *   The port identifier of the Ethernet device.
  * @param ids
- *   A pointer to an ids array passed by application. This tells which
- *   statistics values function should retrieve. This parameter
- *   can be set to NULL if size is 0. In this case function will retrieve
- *   all available statistics.
+ *   IDs array given by app to retrieve specific statistics. May be NULL to
+ *   retrieve all available statistics or, if @p values is NULL as well,
+ *   just the number of available statistics.
  * @param values
- *   A pointer to a table to be filled with device statistics values.
+ *   Array to be filled in with requested device statistics.
+ *   Must not be NULL if ids are specified (not NULL).
  * @param size
- *   The size of the ids array (number of elements).
+ *   Number of elements in @p values array (if not NULL) and in @p ids
+ *   array (if not NULL). Must be 0 if both array pointers are NULL.
  * @return
  *   - A positive value lower or equal to size: success. The return value
  * is the number of entries filled in the stats table.
- *   - A positive value higher than size: error, the given statistics table
+ *   - A positive value higher than size: success: The given statistics table
  * is too small. The return value corresponds to the size that should
  * be given to succeed. The entries in the table are not valid and
  * shall not be used by the caller.
- *   - A negative value on error (invalid port id).
+ *   - A negative value on error.
  */
 int rte_eth_xstats_get_by_id(uint16_t port_id, const uint64_t *ids,
 uint64_t *values, unsigned int size);
-- 
2.30.2



[dpdk-dev] [PATCH v5 2/2] ethdev: fix docs of drivers callbacks getting xstats by IDs

2021-09-28 Thread Andrew Rybchenko
From: Ivan Ilchenko 

Update xstats by IDs callbacks documentation in accordance with
ethdev usage of these callbacks. Document valid combinations of
input arguments to make driver implementation simpler.

Fixes: 79c913a42f0 ("ethdev: retrieve xstats by ID")
Cc: sta...@dpdk.org

Signed-off-by: Ivan Ilchenko 
Signed-off-by: Andrew Rybchenko 
Reviewed-by: Andy Moreton 
---
 lib/ethdev/ethdev_driver.h | 42 --
 1 file changed, 40 insertions(+), 2 deletions(-)

diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h
index 40e474aa7e..c89eefcc42 100644
--- a/lib/ethdev/ethdev_driver.h
+++ b/lib/ethdev/ethdev_driver.h
@@ -187,11 +187,28 @@ typedef int (*eth_xstats_get_t)(struct rte_eth_dev *dev,
struct rte_eth_xstat *stats, unsigned int n);
 /**< @internal Get extended stats of an Ethernet device. */
 
+/**
+ * @internal
+ * Get extended stats of an Ethernet device.
+ *
+ * @param dev
+ *   ethdev handle of port.
+ * @param ids
+ *   IDs array to retrieve specific statistics. Must not be NULL.
+ * @param values
+ *   A pointer to a table to be filled with device statistics values.
+ *   Must not be NULL.
+ * @param n
+ *   Element count in @p ids and @p values.
+ *
+ * @return
+ *   - A number of filled in stats.
+ *   - A negative value on error.
+ */
 typedef int (*eth_xstats_get_by_id_t)(struct rte_eth_dev *dev,
  const uint64_t *ids,
  uint64_t *values,
  unsigned int n);
-/**< @internal Get extended stats of an Ethernet device. */
 
 /**
  * @internal
@@ -218,10 +235,31 @@ typedef int (*eth_xstats_get_names_t)(struct rte_eth_dev 
*dev,
struct rte_eth_xstat_name *xstats_names, unsigned int size);
 /**< @internal Get names of extended stats of an Ethernet device. */
 
+/**
+ * @internal
+ * Get names of extended stats of an Ethernet device.
+ * For name count, set @p xstats_names and @p ids to NULL.
+ *
+ * @param dev
+ *   ethdev handle of port.
+ * @param xstats_names
+ *   An rte_eth_xstat_name array of at least *size* elements to
+ *   be filled. Can be NULL together with @p ids to retrieve number of
+ *   available statistics.
+ * @param ids
+ *   IDs array to retrieve specific statistics. Can be NULL together
+ *   with @p xstats_names to retrieve number of available statistics.
+ * @param size
+ *   Element count in @p ids and @p xstats_names.
+ *
+ * @return
+ *   - A number of filled in stats if both xstats_names and ids are not NULL.
+ *   - A number of available stats if both xstats_names and ids are NULL.
+ *   - A negative value on error.
+ */
 typedef int (*eth_xstats_get_names_by_id_t)(struct rte_eth_dev *dev,
struct rte_eth_xstat_name *xstats_names, const uint64_t *ids,
unsigned int size);
-/**< @internal Get names of extended stats of an Ethernet device. */
 
 typedef int (*eth_queue_stats_mapping_set_t)(struct rte_eth_dev *dev,
 uint16_t queue_id,
-- 
2.30.2



Re: [dpdk-dev] [PATCH v2] net/ixgbe: fix RxQ/TxQ release

2021-09-28 Thread Wang, Haiyue
> -Original Message-
> From: Julien Meunier 
> Sent: Tuesday, September 28, 2021 16:13
> To: dev@dpdk.org
> Cc: sta...@dpdk.org; Richardson, Bruce ; Wang, 
> Haiyue
> 
> Subject: [PATCH v2] net/ixgbe: fix RxQ/TxQ release
> 
> On the vector implementation, during the tear-down, the mbufs not
> drained in the RxQ and TxQ are freed based on an algorithm which
> supposed that the number of descriptors is a power of 2 (max_desc).
> Based on this hypothesis, this algorithm uses a bitmask in order to
> detect an index overflow during the iteration, and to restart the loop
> from 0.
> 
> However, there is no such power of 2 requirement in the ixgbe for the
> number of descriptors in the RxQ / TxQ. The only requirement is to have
> a number correctly aligned.
> 
> If a user requested to configure a number of descriptors which is not a
> power of 2, as a consequence, during the tear-down, it was possible to
> be in an infinite loop, and to never reach the exit loop condition.
> 
> By removing the bitmask and changing the loop method, we can avoid this
> issue, and allow the user to configure a RxQ / TxQ which is not a power
> of 2.
> 
> Fixes: c95584dc2b18 ("ixgbe: new vectorized functions for Rx/Tx")
> Cc: bruce.richard...@intel.com
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Julien Meunier 
> ---
>  drivers/net/ixgbe/ixgbe_rxtx_vec_common.h | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)
> 

Acked-by: Haiyue Wang 

> --
> 2.17.1



Re: [dpdk-dev] [PATCH v3 00/20] ice/base: add parser module

2021-09-28 Thread Ferruh Yigit
On 9/21/2021 2:32 PM, Zhang, Qi Z wrote:
> 
> 
>> -Original Message-
>> From: Zhang, Qi Z 
>> Sent: Tuesday, September 21, 2021 9:20 PM
>> To: Yang, Qiming 
>> Cc: Guo, Junfeng ; dev@dpdk.org; Zhang, Qi Z
>> 
>> Subject: [PATCH v3 00/20] ice/base: add parser module
>>
>> Add the parser module that can parse on a raw packet then figure out the
>> low-level metadata to program the hardware packet process pipeline for flow
>> offloading(Switch/FDIR/RSS). This is the pre-step to enable a 
>> protocol-agnostic
>> flow offloading solution for ice devices that leverage Intel DDP technology.
>>
>> -v3:
>> 1. fix 32 bit compile issue in patch 2/20
>>
>> -v2:
>> 1. use inclusive word in patch 2/20
>> 2. replace magic number with macro in patch 17/20 3. fix couple typos
>>
>> Qi Zhang (20):
>>   net/ice/base: add parser create and destroy skeleton
>>   net/ice/base: init imem table for parser
>>   net/ice/base: init metainit table for parser
>>   net/ice/base: init parse graph cam table for parser
>>   net/ice/base: init boost TCAM table for parser
>>   net/ice/base: init ptype marker TCAM table for parser
>>   net/ice/base: init marker group table for parser
>>   net/ice/base: init protocol group table for parser
>>   net/ice/base: init flag redirect table for parser
>>   net/ice/base: init XLT key builder for parser
>>   net/ice/base: add parser runtime skeleton
>>   net/ice/base: add helper function for boost TCAM match
>>   net/ice/base: add helper functions for parse graph key matching
>>   net/ice/base: add helper function for ptype markers match
>>   net/ice/base: add helper function to redirect flags
>>   net/ice/base: add helper function to aggregate flags
>>   net/ice/base: add parser execution main loop
>>   net/ice/base: support double VLAN mode configure for parser
>>   net/ice/base: add tunnel port support for parser
>>   net/ice/base: add API for parser profile initialization
>>
>>  drivers/net/ice/base/ice_bst_tcam.c| 291 +
>>  drivers/net/ice/base/ice_bst_tcam.h|  35 +
>>  drivers/net/ice/base/ice_common.h  |   1 +
>>  drivers/net/ice/base/ice_flex_pipe.c   |   4 +-
>>  drivers/net/ice/base/ice_flex_pipe.h   |   8 +
>>  drivers/net/ice/base/ice_flex_type.h   |   2 +
>>  drivers/net/ice/base/ice_flg_rd.c  |  76 +++
>>  drivers/net/ice/base/ice_flg_rd.h  |  17 +
>>  drivers/net/ice/base/ice_imem.c| 244 +++
>>  drivers/net/ice/base/ice_imem.h| 109 
>>  drivers/net/ice/base/ice_metainit.c| 143 
>>  drivers/net/ice/base/ice_metainit.h|  46 ++
>>  drivers/net/ice/base/ice_mk_grp.c  |  55 ++
>>  drivers/net/ice/base/ice_mk_grp.h  |  15 +
>>  drivers/net/ice/base/ice_parser.c  | 556 
>>  drivers/net/ice/base/ice_parser.h  | 113 
>>  drivers/net/ice/base/ice_parser_rt.c   | 870 +
>>  drivers/net/ice/base/ice_parser_rt.h   |  53 ++
>>  drivers/net/ice/base/ice_parser_util.h |  36 +
>>  drivers/net/ice/base/ice_pg_cam.c  | 374 +++
>>  drivers/net/ice/base/ice_pg_cam.h  |  74 +++
>>  drivers/net/ice/base/ice_proto_grp.c   | 108 +++
>>  drivers/net/ice/base/ice_proto_grp.h   |  23 +
>>  drivers/net/ice/base/ice_ptype_mk.c|  76 +++
>>  drivers/net/ice/base/ice_ptype_mk.h|  21 +
>>  drivers/net/ice/base/ice_tmatch.h  |  44 ++
>>  drivers/net/ice/base/ice_type.h|   1 +
>>  drivers/net/ice/base/ice_xlt_kb.c  | 216 ++
>>  drivers/net/ice/base/ice_xlt_kb.h  |  34 +
>>  drivers/net/ice/base/meson.build   |  11 +
>>  30 files changed, 3654 insertions(+), 2 deletions(-)  create mode 100644
>> drivers/net/ice/base/ice_bst_tcam.c
>>  create mode 100644 drivers/net/ice/base/ice_bst_tcam.h
>>  create mode 100644 drivers/net/ice/base/ice_flg_rd.c  create mode 100644
>> drivers/net/ice/base/ice_flg_rd.h  create mode 100644
>> drivers/net/ice/base/ice_imem.c  create mode 100644
>> drivers/net/ice/base/ice_imem.h  create mode 100644
>> drivers/net/ice/base/ice_metainit.c
>>  create mode 100644 drivers/net/ice/base/ice_metainit.h
>>  create mode 100644 drivers/net/ice/base/ice_mk_grp.c  create mode
>> 100644 drivers/net/ice/base/ice_mk_grp.h  create mode 100644
>> drivers/net/ice/base/ice_parser.c  create mode 100644
>> drivers/net/ice/base/ice_parser.h  create mode 100644
>> drivers/net/ice/base/ice_parser_rt.c
>>  create mode 100644 drivers/net/ice/base/ice_parser_rt.h
>>  create mode 100644 drivers/net/ice/base/ice_parser_util.h
>>  create mode 100644 drivers/net/ice/base/ice_pg_cam.c  create mode
>> 100644 drivers/net/ice/base/ice_pg_cam.h  create mode 100644
>> drivers/net/ice/base/ice_proto_grp.c
>>  create mode 100644 drivers/net/ice/base/ice_proto_grp.h
>>  create mode 100644 drivers/net/ice/base/ice_ptype_mk.c
>>  create mode 100644 drivers/net/ice/base/ice_ptype_mk.h
>>  create mode 100644 drivers/net/ice/base/ice_tmatch.h  create mode
>> 100644 drivers/net/ice/base/ice_xlt_kb.c  create mode 100644
>> drivers/net/ice/base/ice_xlt_kb.h
>>
>>

  1   2   3   >