[PATCH] maintainers: update email address

2023-10-20 Thread Chenbo Xia
I left Intel and joined Nvidia, so update my email address.

Signed-off-by: Chenbo Xia 
---
 .mailmap|  2 +-
 MAINTAINERS | 12 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/.mailmap b/.mailmap
index 3f5bab26a8..2ff31b008f 100644
--- a/.mailmap
+++ b/.mailmap
@@ -213,7 +213,7 @@ Charles Brett 
 Charles Myers 
 Charles Stoll 
 Chas Williams <3ch...@gmail.com>  
-Chenbo Xia 
+Chenbo Xia 
 Chengchang Tang  
 Chengfeng Ye 
 Chenghu Yao 
diff --git a/MAINTAINERS b/MAINTAINERS
index 4083658697..b1c9495a00 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -50,7 +50,7 @@ T: git://dpdk.org/next/dpdk-next-net-mlx
 
 Next-virtio Tree
 M: Maxime Coquelin 
-M: Chenbo Xia 
+M: Chenbo Xia 
 T: git://dpdk.org/next/dpdk-next-virtio
 
 Next-crypto Tree
@@ -594,7 +594,7 @@ F: drivers/bus/dpaa/
 F: drivers/bus/fslmc/
 
 PCI bus driver
-M: Chenbo Xia 
+M: Chenbo Xia 
 M: Nipun Gupta 
 F: drivers/bus/pci/
 
@@ -983,7 +983,7 @@ F: doc/guides/nics/features/vmxnet3.ini
 
 Vhost-user
 M: Maxime Coquelin 
-M: Chenbo Xia 
+M: Chenbo Xia 
 T: git://dpdk.org/next/dpdk-next-virtio
 F: lib/vhost/
 F: doc/guides/prog_guide/vhost_lib.rst
@@ -997,7 +997,7 @@ F: doc/guides/sample_app_ug/vdpa.rst
 
 Vhost PMD
 M: Maxime Coquelin 
-M: Chenbo Xia 
+M: Chenbo Xia 
 T: git://dpdk.org/next/dpdk-next-virtio
 F: drivers/net/vhost/
 F: doc/guides/nics/vhost.rst
@@ -1005,7 +1005,7 @@ F: doc/guides/nics/features/vhost.ini
 
 Virtio PMD
 M: Maxime Coquelin 
-M: Chenbo Xia 
+M: Chenbo Xia 
 T: git://dpdk.org/next/dpdk-next-virtio
 F: drivers/net/virtio/
 F: doc/guides/nics/virtio.rst
@@ -1661,7 +1661,7 @@ F: app/test/test_rcu*
 F: doc/guides/prog_guide/rcu_lib.rst
 
 PCI
-M: Chenbo Xia 
+M: Chenbo Xia 
 M: Gaetan Rivet 
 F: lib/pci/
 
-- 
2.39.3 (Apple Git-145)



[PATCH v2] maintainers: update email address

2023-10-22 Thread Chenbo Xia
I left Intel and joined Nvidia, so update my email address.

Signed-off-by: Chenbo Xia 
Acked-by: Maxime Coquelin 
---
 .mailmap|  2 +-
 MAINTAINERS | 12 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/.mailmap b/.mailmap
index 3f5bab26a8..d40b3ad6c0 100644
--- a/.mailmap
+++ b/.mailmap
@@ -213,7 +213,7 @@ Charles Brett 
 Charles Myers 
 Charles Stoll 
 Chas Williams <3ch...@gmail.com>  
-Chenbo Xia 
+Chenbo Xia  
 Chengchang Tang  
 Chengfeng Ye 
 Chenghu Yao 
diff --git a/MAINTAINERS b/MAINTAINERS
index 4083658697..b1c9495a00 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -50,7 +50,7 @@ T: git://dpdk.org/next/dpdk-next-net-mlx
 
 Next-virtio Tree
 M: Maxime Coquelin 
-M: Chenbo Xia 
+M: Chenbo Xia 
 T: git://dpdk.org/next/dpdk-next-virtio
 
 Next-crypto Tree
@@ -594,7 +594,7 @@ F: drivers/bus/dpaa/
 F: drivers/bus/fslmc/
 
 PCI bus driver
-M: Chenbo Xia 
+M: Chenbo Xia 
 M: Nipun Gupta 
 F: drivers/bus/pci/
 
@@ -983,7 +983,7 @@ F: doc/guides/nics/features/vmxnet3.ini
 
 Vhost-user
 M: Maxime Coquelin 
-M: Chenbo Xia 
+M: Chenbo Xia 
 T: git://dpdk.org/next/dpdk-next-virtio
 F: lib/vhost/
 F: doc/guides/prog_guide/vhost_lib.rst
@@ -997,7 +997,7 @@ F: doc/guides/sample_app_ug/vdpa.rst
 
 Vhost PMD
 M: Maxime Coquelin 
-M: Chenbo Xia 
+M: Chenbo Xia 
 T: git://dpdk.org/next/dpdk-next-virtio
 F: drivers/net/vhost/
 F: doc/guides/nics/vhost.rst
@@ -1005,7 +1005,7 @@ F: doc/guides/nics/features/vhost.ini
 
 Virtio PMD
 M: Maxime Coquelin 
-M: Chenbo Xia 
+M: Chenbo Xia 
 T: git://dpdk.org/next/dpdk-next-virtio
 F: drivers/net/virtio/
 F: doc/guides/nics/virtio.rst
@@ -1661,7 +1661,7 @@ F: app/test/test_rcu*
 F: doc/guides/prog_guide/rcu_lib.rst
 
 PCI
-M: Chenbo Xia 
+M: Chenbo Xia 
 M: Gaetan Rivet 
 F: lib/pci/
 
-- 
2.39.3 (Apple Git-145)



[PATCH] maintainers: update email address

2023-09-18 Thread Chenbo Xia
I am leaving Intel, so replace my Intel email with personal one
temporarily.

Signed-off-by: Chenbo Xia 
---
 .mailmap|  2 +-
 MAINTAINERS | 12 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/.mailmap b/.mailmap
index 864d33ee46..8cb3c1e80f 100644
--- a/.mailmap
+++ b/.mailmap
@@ -209,7 +209,7 @@ Charles Brett 
 Charles Myers 
 Charles Stoll 
 Chas Williams <3ch...@gmail.com>  
-Chenbo Xia 
+Chenbo Xia 
 Chengchang Tang  
 Chengfeng Ye 
 Chenghu Yao 
diff --git a/MAINTAINERS b/MAINTAINERS
index 698608cdb2..ef0f52f1ef 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -50,7 +50,7 @@ T: git://dpdk.org/next/dpdk-next-net-mlx
 
 Next-virtio Tree
 M: Maxime Coquelin 
-M: Chenbo Xia 
+M: Chenbo Xia 
 T: git://dpdk.org/next/dpdk-next-virtio
 
 Next-crypto Tree
@@ -590,7 +590,7 @@ F: drivers/bus/dpaa/
 F: drivers/bus/fslmc/
 
 PCI bus driver
-M: Chenbo Xia 
+M: Chenbo Xia 
 M: Nipun Gupta 
 F: drivers/bus/pci/
 
@@ -980,7 +980,7 @@ F: doc/guides/nics/features/vmxnet3.ini
 
 Vhost-user
 M: Maxime Coquelin 
-M: Chenbo Xia 
+M: Chenbo Xia 
 T: git://dpdk.org/next/dpdk-next-virtio
 F: lib/vhost/
 F: doc/guides/prog_guide/vhost_lib.rst
@@ -994,7 +994,7 @@ F: doc/guides/sample_app_ug/vdpa.rst
 
 Vhost PMD
 M: Maxime Coquelin 
-M: Chenbo Xia 
+M: Chenbo Xia 
 T: git://dpdk.org/next/dpdk-next-virtio
 F: drivers/net/vhost/
 F: doc/guides/nics/vhost.rst
@@ -1002,7 +1002,7 @@ F: doc/guides/nics/features/vhost.ini
 
 Virtio PMD
 M: Maxime Coquelin 
-M: Chenbo Xia 
+M: Chenbo Xia 
 T: git://dpdk.org/next/dpdk-next-virtio
 F: drivers/net/virtio/
 F: doc/guides/nics/virtio.rst
@@ -1655,7 +1655,7 @@ F: app/test/test_rcu*
 F: doc/guides/prog_guide/rcu_lib.rst
 
 PCI
-M: Chenbo Xia 
+M: Chenbo Xia 
 M: Gaetan Rivet 
 F: lib/pci/
 
-- 
2.25.1



Re: [PATCH v6 2/2] bus/pci: fix secondary process save 'FD' problem

2024-07-14 Thread Chenbo Xia
Hi David,

> On Jul 13, 2024, at 01:30, David Marchand  wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
> Hello,
> 
> On Tue, Jul 2, 2024 at 9:40 AM Chaoyong He  wrote:
>> 
>> From: Zerun Fu 
>> 
>> In the previous logic the 'fd' was only saved in the primary process,
>> but for some devices this value is also used in the secondary logic.
>> 
>> For example, the call of 'rte_pci_find_ext_capability()' will fail in
>> the secondary process.
>> 
>> Fix this problem by getting and saving the value of 'fd' also in the
>> secondary process logic.
>> 
>> Fixes: 9b957f378abf ("pci: merge uio functions for linux and bsd")
>> Cc: muk...@igel.co.jp
>> Cc: sta...@dpdk.org
>> 
>> Signed-off-by: Zerun Fu 
>> Reviewed-by: Chaoyong He 
>> Reviewed-by: Long Wu 
>> Reviewed-by: Peng Zhang 
>> Acked-by: Anatoly Burakov 
> 
> Chenbo,
> Are you ok with this fix?

Sorry that I was interrupted when I was reviewing this and later I forgot..

For this patch:

Reviewed-by: Chenbo Xia 

> 
> Thanks.
> 
> --
> David Marchand




Re: [PATCH v5] virtio: optimize stats counters performance

2024-08-06 Thread Chenbo Xia


> On Aug 2, 2024, at 00:03, Morten Brørup  wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
> Optimized the performance of updating the virtio statistics counters by
> reducing the number of branches.
> 
> Ordered the packet size comparisons according to the probability with
> typical internet traffic mix.
> 
> Signed-off-by: Morten Brørup 
> ---
> v5:
> * Do not inline the function. (Stephen)
> v4:
> * Consider multicast/broadcast packets unlikely.
> v3:
> * Eliminated a local variable.
> * Note: Substituted sizeof(uint32_t)*4 by 32UL, using unsigned long type
>  to keep optimal offsetting in generated assembler output.
> * Removed unnecessary curly braces.
> v2:
> * Fixed checkpatch warning about line length.
> ---
> drivers/net/virtio/virtio_rxtx.c | 39 
> drivers/net/virtio/virtio_rxtx.h |  4 ++--
> 2 files changed, 16 insertions(+), 27 deletions(-)
> 
> diff --git a/drivers/net/virtio/virtio_rxtx.c 
> b/drivers/net/virtio/virtio_rxtx.c
> index f69b9453a2..b67f063b31 100644
> --- a/drivers/net/virtio/virtio_rxtx.c
> +++ b/drivers/net/virtio/virtio_rxtx.c
> @@ -82,37 +82,26 @@ vq_ring_free_chain(struct virtqueue *vq, uint16_t 
> desc_idx)
> }
> 
> void
> -virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf 
> *mbuf)
> +virtio_update_packet_stats(struct virtnet_stats *const stats,
> +   const struct rte_mbuf *const mbuf)
> {
>uint32_t s = mbuf->pkt_len;
> -   struct rte_ether_addr *ea;
> +   const struct rte_ether_addr *const ea =
> +   rte_pktmbuf_mtod(mbuf, const struct rte_ether_addr *);
> 
>stats->bytes += s;
> 
> -   if (s == 64) {
> -   stats->size_bins[1]++;
> -   } else if (s > 64 && s < 1024) {
> -   uint32_t bin;
> -
> -   /* count zeros, and offset into correct bin */
> -   bin = (sizeof(s) * 8) - rte_clz32(s) - 5;
> -   stats->size_bins[bin]++;
> -   } else {
> -   if (s < 64)
> -   stats->size_bins[0]++;
> -   else if (s < 1519)
> -   stats->size_bins[6]++;
> -   else
> -   stats->size_bins[7]++;
> -   }
> +   if (s >= 1024)
> +   stats->size_bins[6 + (s > 1518)]++;
> +   else if (s <= 64)
> +   stats->size_bins[s >> 6]++;
> +   else
> +   stats->size_bins[32UL - rte_clz32(s) - 5]++;
> 
> -   ea = rte_pktmbuf_mtod(mbuf, struct rte_ether_addr *);
> -   if (rte_is_multicast_ether_addr(ea)) {
> -   if (rte_is_broadcast_ether_addr(ea))
> -   stats->broadcast++;
> -   else
> -   stats->multicast++;
> -   }
> +   RTE_BUILD_BUG_ON(offsetof(struct virtnet_stats, broadcast) !=
> +   offsetof(struct virtnet_stats, multicast) + 
> sizeof(uint64_t));
> +   if (unlikely(rte_is_multicast_ether_addr(ea)))
> +   (&stats->multicast)[rte_is_broadcast_ether_addr(ea)]++;
> }
> 
> static inline void
> diff --git a/drivers/net/virtio/virtio_rxtx.h 
> b/drivers/net/virtio/virtio_rxtx.h
> index afc4b74534..68034c914b 100644
> --- a/drivers/net/virtio/virtio_rxtx.h
> +++ b/drivers/net/virtio/virtio_rxtx.h
> @@ -35,7 +35,7 @@ struct virtnet_tx {
> };
> 
> int virtio_rxq_vec_setup(struct virtnet_rx *rxvq);
> -void virtio_update_packet_stats(struct virtnet_stats *stats,
> -   struct rte_mbuf *mbuf);
> +void virtio_update_packet_stats(struct virtnet_stats *const stats,
> +   const struct rte_mbuf *const mbuf);
> 
> #endif /* _VIRTIO_RXTX_H_ */
> —
> 2.43.0
> 

Reviewed-by: Chenbo Xia 

Re: [PATCH] vhost-user: optimize stats counters performance

2024-08-06 Thread Chenbo Xia


> On Aug 2, 2024, at 22:32, Morten Brørup  wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
> Optimized the performance of updating the statistics counters by reducing
> the number of branches.
> 
> Ordered the packet size comparisons according to the probability with
> typical internet traffic mix.
> 
> Signed-off-by: Morten Brørup 
> ---
> lib/vhost/virtio_net.c | 40 ++--
> 1 file changed, 14 insertions(+), 26 deletions(-)
> 
> diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
> index 370402d849..25a495df56 100644
> --- a/lib/vhost/virtio_net.c
> +++ b/lib/vhost/virtio_net.c
> @@ -53,7 +53,7 @@ is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t 
> nr_vring)
> }
> 
> static inline void
> -vhost_queue_stats_update(struct virtio_net *dev, struct vhost_virtqueue *vq,
> +vhost_queue_stats_update(const struct virtio_net *dev, struct 
> vhost_virtqueue *vq,
>struct rte_mbuf **pkts, uint16_t count)
>__rte_shared_locks_required(&vq->access_lock)
> {
> @@ -64,37 +64,25 @@ vhost_queue_stats_update(struct virtio_net *dev, struct 
> vhost_virtqueue *vq,
>return;
> 
>for (i = 0; i < count; i++) {
> -   struct rte_ether_addr *ea;
> -   struct rte_mbuf *pkt = pkts[i];
> +   const struct rte_ether_addr *ea;
> +   const struct rte_mbuf *pkt = pkts[i];
>uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt);
> 
>stats->packets++;
>stats->bytes += pkt_len;
> 
> -   if (pkt_len == 64) {
> -   stats->size_bins[1]++;
> -   } else if (pkt_len > 64 && pkt_len < 1024) {
> -   uint32_t bin;
> -
> -   /* count zeros, and offset into correct bin */
> -   bin = (sizeof(pkt_len) * 8) - rte_clz32(pkt_len) - 5;
> -   stats->size_bins[bin]++;
> -   } else {
> -   if (pkt_len < 64)
> -   stats->size_bins[0]++;
> -   else if (pkt_len < 1519)
> -   stats->size_bins[6]++;
> -   else
> -   stats->size_bins[7]++;
> -   }
> +   if (pkt_len >= 1024)
> +   stats->size_bins[6 + (pkt_len > 1518)]++;
> +   else if (pkt_len <= 64)
> +   stats->size_bins[pkt_len >> 6]++;
> +   else
> +   stats->size_bins[32UL - rte_clz32(pkt_len) - 5]++;
> 
> -   ea = rte_pktmbuf_mtod(pkt, struct rte_ether_addr *);
> -   if (rte_is_multicast_ether_addr(ea)) {
> -   if (rte_is_broadcast_ether_addr(ea))
> -   stats->broadcast++;
> -   else
> -   stats->multicast++;
> -   }
> +   ea = rte_pktmbuf_mtod(pkt, const struct rte_ether_addr *);
> +   RTE_BUILD_BUG_ON(offsetof(struct virtqueue_stats, broadcast) 
> !=
> +   offsetof(struct virtqueue_stats, multicast) + 
> sizeof(uint64_t));
> +   if (unlikely(rte_is_multicast_ether_addr(ea)))
> +   
> (&stats->multicast)[rte_is_broadcast_ether_addr(ea)]++;
>}
> }
> 
> --
> 2.43.0
> 

Reviewed-by: Chenbo Xia 



Re: [PATCH] bus/pci: don't open uio device in secondary process

2024-08-29 Thread Chenbo Xia


> On Aug 28, 2024, at 18:40, Konrad Sztyber  wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
> The uio_pci_generic driver clears the bus master bit when the device
> file is closed.  So, when the secondary process terminates after probing

Should be one space before ‘So'

> a device, that device becomes unusable in the primary process.
> 
> To avoid that, the device file is now opened only in the primary
> process.  The commit that introduced this regression, 847d78fb95
> ("bus/pci: fix FD in secondary process"), only mentioned enabling access
> to config space from secondary process, which still works, as it doesn't
> rely on the device file.
> 
> Fixes: 847d78fb95 ("bus/pci: fix FD in secondary process")

Besides the cc stable tag mentioned by Chaoyong, commit ID should be 12-digit.

Please also fix the coding style:

WARNING:BLOCK_COMMENT_STYLE: Block comments use a trailing */ on a separate line
#176: FILE: drivers/bus/pci/linux/pci_uio.c:265:
+ * closed, so open it only in the primary process */

With above fixed:

Reviewed-by: Chenbo Xia 

> 
> Signed-off-by: Konrad Sztyber 
> ---
> drivers/bus/pci/linux/pci_uio.c | 25 +
> 1 file changed, 13 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/bus/pci/linux/pci_uio.c b/drivers/bus/pci/linux/pci_uio.c
> index 4c1d3327a9..432316afcc 100644
> --- a/drivers/bus/pci/linux/pci_uio.c
> +++ b/drivers/bus/pci/linux/pci_uio.c
> @@ -232,18 +232,6 @@ pci_uio_alloc_resource(struct rte_pci_device *dev,
>loc->domain, loc->bus, loc->devid, loc->function);
>return 1;
>}
> -   snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num);
> -
> -   /* save fd */
> -   fd = open(devname, O_RDWR);
> -   if (fd < 0) {
> -   PCI_LOG(ERR, "Cannot open %s: %s", devname, strerror(errno));
> -   goto error;
> -   }
> -
> -   if (rte_intr_fd_set(dev->intr_handle, fd))
> -   goto error;
> -
>snprintf(cfgname, sizeof(cfgname),
>"/sys/class/uio/uio%u/device/config", uio_num);
> 
> @@ -273,6 +261,19 @@ pci_uio_alloc_resource(struct rte_pci_device *dev,
>if (rte_eal_process_type() != RTE_PROC_PRIMARY)
>return 0;
> 
> +   /* the uio_pci_generic driver clears the bus master enable bit when 
> the device file is
> +* closed, so open it only in the primary process */
> +   snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num);
> +   /* save fd */
> +   fd = open(devname, O_RDWR);
> +   if (fd < 0) {
> +   PCI_LOG(ERR, "Cannot open %s: %s", devname, strerror(errno));
> +   goto error;
> +   }
> +
> +   if (rte_intr_fd_set(dev->intr_handle, fd))
> +   goto error;
> +
>/* allocate the mapping details for secondary processes*/
>*uio_res = rte_zmalloc("UIO_RES", sizeof(**uio_res), 0);
>if (*uio_res == NULL) {
> --
> 2.45.0
> 



Re: [PATCH v2] bus/pci: don't open uio device in secondary process

2024-08-29 Thread Chenbo Xia


> On Aug 29, 2024, at 16:57, Konrad Sztyber  wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
> The uio_pci_generic driver clears the bus master bit when the device
> file is closed. So, when the secondary process terminates after probing
> a device, that device becomes unusable in the primary process.
> 
> To avoid that, the device file is now opened only in the primary
> process. The commit that introduced this regression, 847d78fb9530
> ("bus/pci: fix FD in secondary process"), only mentioned enabling access
> to config space from secondary process, which still works, as it doesn't
> rely on the device file.
> 
> Fixes: 847d78fb9530 ("bus/pci: fix FD in secondary process")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Konrad Sztyber 
> ---
> drivers/bus/pci/linux/pci_uio.c | 27 +++
> 1 file changed, 15 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/bus/pci/linux/pci_uio.c b/drivers/bus/pci/linux/pci_uio.c
> index 4c1d3327a9..5c4ba8098c 100644
> --- a/drivers/bus/pci/linux/pci_uio.c
> +++ b/drivers/bus/pci/linux/pci_uio.c
> @@ -232,18 +232,6 @@ pci_uio_alloc_resource(struct rte_pci_device *dev,
>   loc->domain, loc->bus, loc->devid, loc->function);
>   return 1;
>   }
> -   snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num);
> -
> -   /* save fd */
> -   fd = open(devname, O_RDWR);
> -   if (fd < 0) {
> -   PCI_LOG(ERR, "Cannot open %s: %s", devname, strerror(errno));
> -   goto error;
> -   }
> -
> -   if (rte_intr_fd_set(dev->intr_handle, fd))
> -   goto error;
> -
>   snprintf(cfgname, sizeof(cfgname),
>   "/sys/class/uio/uio%u/device/config", uio_num);
> 
> @@ -273,6 +261,21 @@ pci_uio_alloc_resource(struct rte_pci_device *dev,
>   if (rte_eal_process_type() != RTE_PROC_PRIMARY)
>   return 0;
> 
> +   /*
> +* the uio_pci_generic driver clears the bus master enable bit when 
> the device file is
> +* closed, so open it only in the primary process
> +*/
> +   snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num);
> +   /* save fd */
> +   fd = open(devname, O_RDWR);
> +   if (fd < 0) {
> +   PCI_LOG(ERR, "Cannot open %s: %s", devname, strerror(errno));
> +   goto error;
> +   }
> +
> +   if (rte_intr_fd_set(dev->intr_handle, fd))
> +   goto error;
> +
>   /* allocate the mapping details for secondary processes*/
>   *uio_res = rte_zmalloc("UIO_RES", sizeof(**uio_res), 0);
>   if (*uio_res == NULL) {
> --
> 2.45.0
> 

Reviewed-by: Chenbo Xia 



[dpdk-dev] [PATCH] doc: remove deprecation notice for vhost

2021-11-02 Thread Chenbo Xia
Ten vhost APIs were announced to be stable and promoted in below
commit, so remove the related deprecation notice.

Fixes: 945ef8a04098 ("vhost: promote some APIs to stable")

Signed-off-by: Chenbo Xia 
Reported-by: Maxime Coquelin 
---
 doc/guides/rel_notes/deprecation.rst | 8 
 1 file changed, 8 deletions(-)

diff --git a/doc/guides/rel_notes/deprecation.rst 
b/doc/guides/rel_notes/deprecation.rst
index 4366015b01..4f7e95f05f 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -114,14 +114,6 @@ Deprecation Notices
 * vhost: rename ``struct vhost_device_ops`` to ``struct rte_vhost_device_ops``
   in DPDK v21.11.
 
-* vhost: The experimental tags of ``rte_vhost_driver_get_protocol_features``,
-  ``rte_vhost_driver_get_queue_num``, ``rte_vhost_crypto_create``,
-  ``rte_vhost_crypto_free``, ``rte_vhost_crypto_fetch_requests``,
-  ``rte_vhost_crypto_finalize_requests``, ``rte_vhost_crypto_set_zero_copy``,
-  ``rte_vhost_va_from_guest_pa``, ``rte_vhost_extern_callback_register``,
-  and ``rte_vhost_driver_set_protocol_features`` functions will be removed
-  and the API functions will be made stable in DPDK 21.11.
-
 * cryptodev: Hide structures ``rte_cryptodev_sym_session`` and
   ``rte_cryptodev_asym_session`` to remove unnecessary indirection between
   session and the private data of session. An opaque pointer can be exposed
-- 
2.17.1



[dpdk-dev] [PATCH] examples/vhost: fix port init failure in mergeable mode

2021-11-03 Thread Chenbo Xia
When the example starts in mergeable mode with an i40e port,
it fails to launch because the examples use default mtu MAX_MTU
to configure ethdev. The root cause is some devices have Ethernet
frame overhead and then MAX_MTU will be larger than device's max
mtu, so the ethdev configure will fail.

This patch checks the device's max mtu before setting the ethdev
configuration. If the device has a max mtu, use that value to
configure.

Fixes: 1bb4a528c41f ("ethdev: fix max Rx packet length")

Signed-off-by: Chenbo Xia 
Reported-by: Xingguang He 
---
 examples/vhost/main.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 58e12aa710..09fb2382bf 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -273,6 +273,13 @@ port_init(uint16_t port)
 
tx_rings = (uint16_t)rte_lcore_count();
 
+   if (mergeable) {
+   if (dev_info.max_mtu != UINT16_MAX && dev_info.max_rx_pktlen > 
dev_info.max_mtu)
+   vmdq_conf_default.rxmode.mtu = dev_info.max_mtu;
+   else
+   vmdq_conf_default.rxmode.mtu = MAX_MTU;
+   }
+
/* Get port configuration. */
retval = get_eth_conf(&port_conf, num_devices);
if (retval < 0)
@@ -631,8 +638,6 @@ us_vhost_parse_args(int argc, char **argv)
return -1;
}
mergeable = !!ret;
-   if (ret)
-   vmdq_conf_default.rxmode.mtu = MAX_MTU;
break;
 
case OPT_STATS_NUM:
-- 
2.17.1



[dpdk-dev] [PATCH 0/8] Removal of PCI bus ABIs

2021-09-09 Thread Chenbo Xia
As announced in the deprecation notice, most ABIs in PCI bus will be removed.

As there exist some applications that want to access PCI memory resource,
two new APIs are defined in Patch 1 and corresponding changes are applied
to testpmd in Patch 2.

Patch 3-4 clean up the unnecessary usage of PCI bus header in examples.

Patch 5-7 clean up the unused PCI related structure in kni library and related
tests and examples.

Patch 8 finally removes most of ABIs in PCI bus.

Chenbo Xia (8):
  bus/pci: add new memory resource access APIs
  app/testpmd: use PCI memory resource access APIs
  examples/ethtool: use PCI library API to get PCI address
  examples/kni: remove unused PCI bus header
  test/kni: remove setting of PCI ID and address
  examples/ip_pipeline: remove setting of PCI ID and address
  kni: replace unused variable definition with reserved bytes
  bus/pci: remove ABIs in PCI bus

 app/test-pmd/config.c |  38 +-
 app/test-pmd/testpmd.h|  46 +-
 app/test/test_kni.c   |  57 ---
 app/test/virtual_pmd.c|   2 +-
 doc/guides/rel_notes/release_21_11.rst|   7 +
 drivers/baseband/acc100/rte_acc100_pmd.c  |   2 +-
 .../fpga_5gnr_fec/rte_fpga_5gnr_fec.c |   2 +-
 drivers/baseband/fpga_lte_fec/fpga_lte_fec.c  |   2 +-
 drivers/bus/pci/bsd/pci.c |   1 -
 drivers/bus/pci/linux/pci.c   |   1 -
 drivers/bus/pci/linux/pci_uio.c   |   1 -
 drivers/bus/pci/linux/pci_vfio.c  |   1 -
 drivers/bus/pci/meson.build   |   4 +
 drivers/bus/pci/pci_common.c  |  78 
 drivers/bus/pci/pci_common_uio.c  |   1 -
 drivers/bus/pci/pci_driver.h  | 402 ++
 drivers/bus/pci/pci_params.c  |   1 -
 drivers/bus/pci/private.h |   3 +-
 drivers/bus/pci/rte_bus_pci.h | 387 ++---
 drivers/bus/pci/version.map   |  28 +-
 drivers/common/cnxk/roc_platform.h|   2 +-
 drivers/common/mlx5/linux/mlx5_common_verbs.c |   2 +-
 drivers/common/mlx5/mlx5_common_pci.c |   2 +-
 drivers/common/octeontx2/otx2_dev.h   |   2 +-
 drivers/common/octeontx2/otx2_sec_idev.c  |   2 +-
 drivers/common/qat/qat_device.h   |   2 +-
 drivers/common/qat/qat_qp.c   |   2 +-
 drivers/common/sfc_efx/sfc_efx.h  |   2 +-
 drivers/compress/mlx5/mlx5_compress.c |   2 +-
 drivers/compress/octeontx/otx_zip.h   |   2 +-
 drivers/compress/qat/qat_comp.c   |   2 +-
 drivers/crypto/ccp/ccp_dev.h  |   2 +-
 drivers/crypto/ccp/ccp_pci.h  |   2 +-
 drivers/crypto/ccp/rte_ccp_pmd.c  |   2 +-
 drivers/crypto/cnxk/cn10k_cryptodev.c |   2 +-
 drivers/crypto/cnxk/cn9k_cryptodev.c  |   2 +-
 drivers/crypto/mlx5/mlx5_crypto.c |   2 +-
 drivers/crypto/nitrox/nitrox_device.h |   2 +-
 drivers/crypto/octeontx/otx_cryptodev.c   |   2 +-
 drivers/crypto/octeontx/otx_cryptodev_ops.c   |   2 +-
 drivers/crypto/octeontx2/otx2_cryptodev.c |   2 +-
 drivers/crypto/qat/qat_sym.c  |   2 +-
 drivers/crypto/qat/qat_sym_pmd.c  |   2 +-
 drivers/crypto/virtio/virtio_cryptodev.c  |   2 +-
 drivers/crypto/virtio/virtio_pci.h|   2 +-
 drivers/event/dlb2/pf/dlb2_main.h |   2 +-
 drivers/event/dlb2/pf/dlb2_pf.c   |   2 +-
 drivers/event/octeontx/ssovf_probe.c  |   2 +-
 drivers/event/octeontx/timvf_probe.c  |   2 +-
 drivers/event/octeontx2/otx2_evdev.c  |   2 +-
 drivers/mempool/cnxk/cnxk_mempool.c   |   2 +-
 drivers/mempool/octeontx/octeontx_fpavf.c |   2 +-
 drivers/mempool/octeontx2/otx2_mempool.c  |   2 +-
 drivers/mempool/octeontx2/otx2_mempool.h  |   2 +-
 drivers/mempool/octeontx2/otx2_mempool_irq.c  |   2 +-
 drivers/meson.build   |   4 +
 drivers/net/ark/ark_ethdev.c  |   2 +-
 drivers/net/avp/avp_ethdev.c  |   2 +-
 drivers/net/bnx2x/bnx2x.h |   2 +-
 drivers/net/bnxt/bnxt.h   |   2 +-
 drivers/net/bonding/rte_eth_bond_args.c   |   2 +-
 drivers/net/cxgbe/base/adapter.h  |   2 +-
 drivers/net/cxgbe/cxgbe_ethdev.c  |   2 +-
 drivers/net/e1000/em_ethdev.c |   2 +-
 drivers/net/e1000/em_rxtx.c   |   2 +-
 drivers/net/e1000/igb_ethdev.c|   2 +-
 drivers/net/e1000/igb_pf.c|   2 +-
 drivers/net/ena/ena_ethdev.h  |   2 +-
 drivers/net/enic/base/vnic_dev.h  |   2 +-
 drivers/net/enic/enic_ethdev.c|   2 +-
 drivers/net/enic/enic_main.c  |   2 +-
 drivers/net/enic/enic_vf_representor.c|   2 +-
 drivers/net/hinic/base/hinic_pmd_hwdev.c

[dpdk-dev] [PATCH 1/8] bus/pci: add new memory resource access APIs

2021-09-09 Thread Chenbo Xia
Some applications wants to access PCI memory resource. Currently
applications use struct rte_pci_device to access it. Since the
structure will be made internal later, this patch adds two APIs
for memory resource access.

Signed-off-by: Chenbo Xia 
---
 doc/guides/rel_notes/release_21_11.rst |  5 ++
 drivers/bus/pci/pci_common.c   | 78 ++
 drivers/bus/pci/rte_bus_pci.h  | 36 
 drivers/bus/pci/version.map|  4 ++
 4 files changed, 123 insertions(+)

diff --git a/doc/guides/rel_notes/release_21_11.rst 
b/doc/guides/rel_notes/release_21_11.rst
index 675b573834..1c9abb74ec 100644
--- a/doc/guides/rel_notes/release_21_11.rst
+++ b/doc/guides/rel_notes/release_21_11.rst
@@ -62,6 +62,11 @@ New Features
   * Added bus-level parsing of the devargs syntax.
   * Kept compatibility with the legacy syntax as parsing fallback.
 
+* **Added new memory resource read/write APIs in PCI bus.**
+
+  Added new memory resource read/write APIs ``rte_pci_mem_rd32`` and
+  ``rte_pci_mem_wr32`` for applications to read/write PCI memory
+  resource.
 
 Removed Items
 -
diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c
index 3406e03b29..944288b132 100644
--- a/drivers/bus/pci/pci_common.c
+++ b/drivers/bus/pci/pci_common.c
@@ -25,6 +25,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "private.h"
 
@@ -777,6 +778,83 @@ rte_pci_set_bus_master(struct rte_pci_device *dev, bool 
enable)
return 0;
 }
 
+static void *
+get_pci_mem_addr(const char *name, uint16_t idx, uint64_t offset)
+{
+   struct rte_pci_device *dev = NULL;
+   struct rte_pci_addr addr = {0};
+   struct rte_mem_resource *res = NULL;
+   bool found = false;
+
+   if (rte_pci_addr_parse(name, &addr)) {
+   RTE_LOG(ERR, EAL, "Wrong name format of PCI device (%s)", name);
+   return NULL;
+   }
+
+   FOREACH_DEVICE_ON_PCIBUS(dev) {
+   if (rte_pci_addr_cmp(&dev->addr, &addr)) {
+   continue;
+   } else {
+   found = true;
+   break;
+   }
+   }
+
+   if (!found) {
+   RTE_LOG(ERR, EAL, "Can not find the device (%s)", name);
+   return NULL;
+   }
+
+   res = &dev->mem_resource[idx];
+   if (idx >= PCI_MAX_RESOURCE || res->len == 0 || res->addr == NULL) {
+   RTE_LOG(ERR, EAL, "Invalid index of a mapped memory resourse");
+   return NULL;
+   }
+
+   if (offset >= res->len || offset + 4 > res->len) {
+   RTE_LOG(ERR, EAL, "Invalid offset of a memory resourse");
+   return NULL;
+   }
+
+   return (void *)((char *)res->addr + offset);
+}
+
+int
+rte_pci_mem_rd32(const char *name, uint16_t idx, uint32_t *data, uint64_t 
offset)
+{
+   void *reg_addr = NULL;
+
+   if (data == NULL) {
+   RTE_LOG(ERR, EAL, "NULL data buffer for PCI memory access");
+   return -EINVAL;
+   }
+
+   reg_addr = get_pci_mem_addr(name, idx, offset);
+   if (reg_addr == NULL)
+   return -EINVAL;
+
+   *data = rte_read32(reg_addr);
+   return 0;
+}
+
+int
+rte_pci_mem_wr32(const char *name, uint16_t idx, const uint32_t *data, 
uint64_t offset)
+{
+   void *reg_addr = NULL;
+
+   if (data == NULL) {
+   RTE_LOG(ERR, EAL, "NULL data buffer for PCI memory access");
+   return -EINVAL;
+   }
+
+   reg_addr = get_pci_mem_addr(name, idx, offset);
+   if (reg_addr == NULL)
+   return -EINVAL;
+
+   rte_write32(*data, reg_addr);
+   return 0;
+}
+
 struct rte_pci_bus rte_pci_bus = {
.bus = {
.scan = rte_pci_scan,
diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h
index 583470e831..21d9dd4289 100644
--- a/drivers/bus/pci/rte_bus_pci.h
+++ b/drivers/bus/pci/rte_bus_pci.h
@@ -392,6 +392,42 @@ void rte_pci_ioport_read(struct rte_pci_ioport *p,
 void rte_pci_ioport_write(struct rte_pci_ioport *p,
const void *data, size_t len, off_t offset);
 
+/**
+ * Read 4 bytes from PCI memory resource.
+ *
+ * @param name
+ *   PCI device name (e.g., :18:00.0).
+ * @param idx
+ *   Memory resource index.
+ * @param data
+ *   Data buffer where the bytes should be read into.
+ * @param offset
+ *   The offset into the PCI memory resource.
+ * @return
+ *  0 on success, negative value on error.
+ */
+__rte_experimental
+int
+rte_pci_mem_rd32(const char *name, uint16_t idx, uint32_t *data, uint64_t 
offset);
+
+/**
+ * Write 4 bytes to PCI memory resource.
+ *
+ * @param name
+ *   PCI device name (e.g., :18:00.0).
+ * @param idx
+ *   Memory resource index.
+ * @param data
+ *   Buffer of data that should be written to PCI memory.
+ *

[dpdk-dev] [PATCH 2/8] app/testpmd: use PCI memory resource access APIs

2021-09-09 Thread Chenbo Xia
Currently testpmd uses struct rte_pci_device to access PCI memory
resource. Since this structure will be internal later, this patch
replaces use of rte_pci_device with new PCI memory resource access
APIs to read/write BAR 0.

Signed-off-by: Chenbo Xia 
---
 app/test-pmd/config.c  | 38 +++---
 app/test-pmd/testpmd.h | 46 +-
 2 files changed, 35 insertions(+), 49 deletions(-)

diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index 31d8ba1b91..a0ecda3a8e 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -950,10 +950,6 @@ vlan_id_is_invalid(uint16_t vlan_id)
 static int
 port_reg_off_is_invalid(portid_t port_id, uint32_t reg_off)
 {
-   const struct rte_pci_device *pci_dev;
-   const struct rte_bus *bus;
-   uint64_t pci_len;
-
if (reg_off & 0x3) {
fprintf(stderr,
"Port register offset 0x%X not aligned on a 4-byte 
boundary\n",
@@ -966,22 +962,6 @@ port_reg_off_is_invalid(portid_t port_id, uint32_t reg_off)
return 0;
}
 
-   bus = rte_bus_find_by_device(ports[port_id].dev_info.device);
-   if (bus && !strcmp(bus->name, "pci")) {
-   pci_dev = RTE_DEV_TO_PCI(ports[port_id].dev_info.device);
-   } else {
-   fprintf(stderr, "Not a PCI device\n");
-   return 1;
-   }
-
-   pci_len = pci_dev->mem_resource[0].len;
-   if (reg_off >= pci_len) {
-   fprintf(stderr,
-   "Port %d: register offset %u (0x%X) out of port PCI 
resource (length=%"PRIu64")\n",
-   port_id, (unsigned int)reg_off, (unsigned int)reg_off,
-   pci_len);
-   return 1;
-   }
return 0;
 }
 
@@ -1009,14 +989,14 @@ port_reg_bit_display(portid_t port_id, uint32_t reg_off, 
uint8_t bit_x)
 {
uint32_t reg_v;
 
-
if (port_id_is_invalid(port_id, ENABLED_WARN))
return;
if (port_reg_off_is_invalid(port_id, reg_off))
return;
if (reg_bit_pos_is_invalid(bit_x))
return;
-   reg_v = port_id_pci_reg_read(port_id, reg_off);
+   if (port_id_pci_reg_read(port_id, reg_off, ®_v))
+   return;
display_port_and_reg_off(port_id, (unsigned)reg_off);
printf("bit %d=%d\n", bit_x, (int) ((reg_v & (1 << bit_x)) >> bit_x));
 }
@@ -1042,7 +1022,8 @@ port_reg_bit_field_display(portid_t port_id, uint32_t 
reg_off,
else
l_bit = bit1_pos, h_bit = bit2_pos;
 
-   reg_v = port_id_pci_reg_read(port_id, reg_off);
+   if (port_id_pci_reg_read(port_id, reg_off, ®_v))
+   return;
reg_v >>= l_bit;
if (h_bit < 31)
reg_v &= ((1 << (h_bit - l_bit + 1)) - 1);
@@ -1060,7 +1041,8 @@ port_reg_display(portid_t port_id, uint32_t reg_off)
return;
if (port_reg_off_is_invalid(port_id, reg_off))
return;
-   reg_v = port_id_pci_reg_read(port_id, reg_off);
+   if (port_id_pci_reg_read(port_id, reg_off, ®_v))
+   return;
display_port_reg_value(port_id, reg_off, reg_v);
 }
 
@@ -1081,7 +1063,9 @@ port_reg_bit_set(portid_t port_id, uint32_t reg_off, 
uint8_t bit_pos,
(int) bit_v);
return;
}
-   reg_v = port_id_pci_reg_read(port_id, reg_off);
+   if (port_id_pci_reg_read(port_id, reg_off, ®_v))
+   return;
+
if (bit_v == 0)
reg_v &= ~(1 << bit_pos);
else
@@ -1123,7 +1107,9 @@ port_reg_bit_field_set(portid_t port_id, uint32_t reg_off,
(unsigned)max_v, (unsigned)max_v);
return;
}
-   reg_v = port_id_pci_reg_read(port_id, reg_off);
+   if (port_id_pci_reg_read(port_id, reg_off, ®_v))
+   return;
+
reg_v &= ~(max_v << l_bit); /* Keep unchanged bits */
reg_v |= (value << l_bit); /* Set changed bits */
port_id_pci_reg_write(port_id, reg_off, reg_v);
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index 16a3598e48..7922807a6e 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -678,41 +678,41 @@ mbuf_pool_find(unsigned int sock_id, uint16_t idx)
 /**
  * Read/Write operations on a PCI register of a port.
  */
-static inline uint32_t
-port_pci_reg_read(struct rte_port *port, uint32_t reg_off)
+static inline int
+port_id_pci_reg_read(portid_t pt_id, uint32_t reg_off, uint32_t *reg_v)
 {
-   const struct rte_pci_device *pci_dev;
+   struct rte_port *port = &ports[(pt_id)];
+   char name[RTE_ETH_NAME_MAX_LEN];
const struct rte_bus *bus;
-   void *reg_addr;
-   uint32_t reg_v;
 
if (!port->dev_info.device) {
  

[dpdk-dev] [PATCH 3/8] examples/ethtool: use PCI library API to get PCI address

2021-09-09 Thread Chenbo Xia
Currently ethtool example uses struct rte_pci_device to know PCI
address of a device. As this API will be removed later in PCI bus,
this patch uses PCI library API to get the PCI address.

Signed-off-by: Chenbo Xia 
---
 examples/ethtool/lib/rte_ethtool.c | 14 +-
 examples/ethtool/meson.build   |  2 +-
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/examples/ethtool/lib/rte_ethtool.c 
b/examples/ethtool/lib/rte_ethtool.c
index 4132516307..89727c9f72 100644
--- a/examples/ethtool/lib/rte_ethtool.c
+++ b/examples/ethtool/lib/rte_ethtool.c
@@ -8,7 +8,7 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 #ifdef RTE_NET_IXGBE
 #include 
 #endif
@@ -23,8 +23,9 @@ rte_ethtool_get_drvinfo(uint16_t port_id, struct 
ethtool_drvinfo *drvinfo)
 {
struct rte_eth_dev_info dev_info;
struct rte_dev_reg_info reg_info;
-   const struct rte_pci_device *pci_dev;
const struct rte_bus *bus = NULL;
+   char name[RTE_ETH_NAME_MAX_LEN];
+   struct rte_pci_addr addr;
int n;
int ret;
 
@@ -56,11 +57,14 @@ rte_ethtool_get_drvinfo(uint16_t port_id, struct 
ethtool_drvinfo *drvinfo)
if (dev_info.device)
bus = rte_bus_find_by_device(dev_info.device);
if (bus && !strcmp(bus->name, "pci")) {
-   pci_dev = RTE_DEV_TO_PCI(dev_info.device);
+   rte_eth_dev_get_name_by_port(port_id, name);
+   if (rte_pci_addr_parse(name, &addr)) {
+   printf("Failed to parse pci address\n");
+   return -1;
+   }
snprintf(drvinfo->bus_info, sizeof(drvinfo->bus_info),
"%04x:%02x:%02x.%x",
-   pci_dev->addr.domain, pci_dev->addr.bus,
-   pci_dev->addr.devid, pci_dev->addr.function);
+   addr.domain, addr.bus, addr.devid, addr.function);
} else {
snprintf(drvinfo->bus_info, sizeof(drvinfo->bus_info), "N/A");
}
diff --git a/examples/ethtool/meson.build b/examples/ethtool/meson.build
index d7f63d48af..c2dbf8ae5a 100644
--- a/examples/ethtool/meson.build
+++ b/examples/ethtool/meson.build
@@ -18,7 +18,7 @@ sources = files(
 )
 includes = include_directories('lib', 'ethtool-app')
 
-deps += 'bus_pci'
+deps += 'pci'
 if dpdk_conf.has('RTE_NET_IXGBE')
 deps += 'net_ixgbe'
 endif
-- 
2.17.1



[dpdk-dev] [PATCH 4/8] examples/kni: remove unused PCI bus header

2021-09-09 Thread Chenbo Xia
The header rte_bus_pci.h is included in kni example but nothing
in it is used. So remove it.

Signed-off-by: Chenbo Xia 
---
 examples/kni/main.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/kni/main.c b/examples/kni/main.c
index beabb3c848..6dc335c0b5 100644
--- a/examples/kni/main.c
+++ b/examples/kni/main.c
@@ -31,7 +31,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
-- 
2.17.1



[dpdk-dev] [PATCH 5/8] test/kni: remove setting of PCI ID and address

2021-09-09 Thread Chenbo Xia
PCI device id and address in structure rte_kni_conf are never used
in the test and kni library. So remove the related code.

Signed-off-by: Chenbo Xia 
---
 app/test/test_kni.c | 57 -
 1 file changed, 57 deletions(-)

diff --git a/app/test/test_kni.c b/app/test/test_kni.c
index 96733554b6..aa9a316c50 100644
--- a/app/test/test_kni.c
+++ b/app/test/test_kni.c
@@ -25,7 +25,6 @@ test_kni(void)
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 
@@ -424,32 +423,14 @@ test_kni_processing(uint16_t port_id, struct rte_mempool 
*mp)
unsigned i;
struct rte_kni *kni;
struct rte_kni_conf conf;
-   struct rte_eth_dev_info info;
struct rte_kni_ops ops;
-   const struct rte_pci_device *pci_dev;
-   const struct rte_bus *bus = NULL;
 
if (!mp)
return -1;
 
memset(&conf, 0, sizeof(conf));
-   memset(&info, 0, sizeof(info));
memset(&ops, 0, sizeof(ops));
 
-   ret = rte_eth_dev_info_get(port_id, &info);
-   if (ret != 0) {
-   printf("Error during getting device (port %u) info: %s\n",
-   port_id, strerror(-ret));
-   return -1;
-   }
-
-   if (info.device)
-   bus = rte_bus_find_by_device(info.device);
-   if (bus && !strcmp(bus->name, "pci")) {
-   pci_dev = RTE_DEV_TO_PCI(info.device);
-   conf.addr = pci_dev->addr;
-   conf.id = pci_dev->id;
-   }
snprintf(conf.name, sizeof(conf.name), TEST_KNI_PORT);
 
/* core id 1 configured for kernel thread */
@@ -543,10 +524,7 @@ test_kni(void)
struct rte_kni *kni;
struct rte_mempool *mp;
struct rte_kni_conf conf;
-   struct rte_eth_dev_info info;
struct rte_kni_ops ops;
-   const struct rte_pci_device *pci_dev;
-   const struct rte_bus *bus;
FILE *fd;
DIR *dir;
char buf[16];
@@ -634,26 +612,9 @@ test_kni(void)
fclose(fd);
 
/* test of allocating KNI with NULL mempool pointer */
-   memset(&info, 0, sizeof(info));
memset(&conf, 0, sizeof(conf));
memset(&ops, 0, sizeof(ops));
 
-   ret = rte_eth_dev_info_get(port_id, &info);
-   if (ret != 0) {
-   printf("Error during getting device (port %u) info: %s\n",
-   port_id, strerror(-ret));
-   return -1;
-   }
-
-   if (info.device)
-   bus = rte_bus_find_by_device(info.device);
-   else
-   bus = NULL;
-   if (bus && !strcmp(bus->name, "pci")) {
-   pci_dev = RTE_DEV_TO_PCI(info.device);
-   conf.addr = pci_dev->addr;
-   conf.id = pci_dev->id;
-   }
conf.group_id = port_id;
conf.mbuf_size = MAX_PACKET_SZ;
 
@@ -678,26 +639,8 @@ test_kni(void)
 
/* test of allocating KNI without a name */
memset(&conf, 0, sizeof(conf));
-   memset(&info, 0, sizeof(info));
memset(&ops, 0, sizeof(ops));
 
-   ret = rte_eth_dev_info_get(port_id, &info);
-   if (ret != 0) {
-   printf("Error during getting device (port %u) info: %s\n",
-   port_id, strerror(-ret));
-   ret = -1;
-   goto fail;
-   }
-
-   if (info.device)
-   bus = rte_bus_find_by_device(info.device);
-   else
-   bus = NULL;
-   if (bus && !strcmp(bus->name, "pci")) {
-   pci_dev = RTE_DEV_TO_PCI(info.device);
-   conf.addr = pci_dev->addr;
-   conf.id = pci_dev->id;
-   }
conf.group_id = port_id;
conf.mbuf_size = MAX_PACKET_SZ;
 
-- 
2.17.1



[dpdk-dev] [PATCH 6/8] examples/ip_pipeline: remove setting of PCI ID and address

2021-09-09 Thread Chenbo Xia
PCI ID and address in structure rte_kni_conf are never used and
will be removed in kni library. So remove the setting of them
first in the example.

Signed-off-by: Chenbo Xia 
---
 examples/ip_pipeline/kni.c | 16 
 1 file changed, 16 deletions(-)

diff --git a/examples/ip_pipeline/kni.c b/examples/ip_pipeline/kni.c
index a2d3331cb0..fccecc3dc6 100644
--- a/examples/ip_pipeline/kni.c
+++ b/examples/ip_pipeline/kni.c
@@ -6,7 +6,6 @@
 #include 
 
 #include 
-#include 
 #include 
 
 #include "kni.h"
@@ -100,16 +99,12 @@ kni_change_mtu(uint16_t port_id, unsigned int new_mtu)
 struct kni *
 kni_create(const char *name, struct kni_params *params)
 {
-   struct rte_eth_dev_info dev_info;
struct rte_kni_conf kni_conf;
struct rte_kni_ops kni_ops;
struct kni *kni;
struct mempool *mempool;
struct link *link;
struct rte_kni *k;
-   const struct rte_pci_device *pci_dev;
-   const struct rte_bus *bus = NULL;
-   int ret;
 
/* Check input params */
if ((name == NULL) ||
@@ -124,23 +119,12 @@ kni_create(const char *name, struct kni_params *params)
return NULL;
 
/* Resource create */
-   ret = rte_eth_dev_info_get(link->port_id, &dev_info);
-   if (ret != 0)
-   return NULL;
-
memset(&kni_conf, 0, sizeof(kni_conf));
strlcpy(kni_conf.name, name, RTE_KNI_NAMESIZE);
kni_conf.force_bind = params->force_bind;
kni_conf.core_id = params->thread_id;
kni_conf.group_id = link->port_id;
kni_conf.mbuf_size = mempool->buffer_size;
-   if (dev_info.device)
-   bus = rte_bus_find_by_device(dev_info.device);
-   if (bus && !strcmp(bus->name, "pci")) {
-   pci_dev = RTE_DEV_TO_PCI(dev_info.device);
-   kni_conf.addr = pci_dev->addr;
-   kni_conf.id = pci_dev->id;
-   }
 
memset(&kni_ops, 0, sizeof(kni_ops));
kni_ops.port_id = link->port_id;
-- 
2.17.1



[dpdk-dev] [PATCH 7/8] kni: replace unused variable definition with reserved bytes

2021-09-09 Thread Chenbo Xia
PCI ID and address in structure rte_kni_conf are never used. And in
order not to break ABI, replace these variables with reserved bytes.

Signed-off-by: Chenbo Xia 
---
 lib/kni/rte_kni.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/lib/kni/rte_kni.h b/lib/kni/rte_kni.h
index b0eaf46104..2281abbf6a 100644
--- a/lib/kni/rte_kni.h
+++ b/lib/kni/rte_kni.h
@@ -17,7 +17,6 @@
  * and burst transmit packets to KNI interfaces.
  */
 
-#include 
 #include 
 #include 
 #include 
@@ -66,8 +65,7 @@ struct rte_kni_conf {
uint32_t core_id;   /* Core ID to bind kernel thread on */
uint16_t group_id;  /* Group ID */
unsigned mbuf_size; /* mbuf size */
-   struct rte_pci_addr addr; /* depreciated */
-   struct rte_pci_id id; /* depreciated */
+   uint8_t rsvd[20];
 
__extension__
uint8_t force_bind : 1; /* Flag to bind kernel thread */
-- 
2.17.1



[dpdk-dev] [PATCH 8/8] bus/pci: remove ABIs in PCI bus

2021-09-09 Thread Chenbo Xia
As announced in the deprecation note, most of ABIs in PCI bus are
removed in this patch. Only the function rte_pci_dump is still ABI
and experimental APIs are kept for future promotion.

This patch creates a new file named pci_driver.h and moves most of
the content in original rte_bus_pci.h to it. After that, pci_driver.h
is considered the interface for drivers and rte_bus_pci.h for
applications. pci_driver.h is defined as driver_sdk_headers so that
out-of-tree drivers can use it.

Then this patch replaces the including of rte_bus_pci.h with pci_driver.h
in all related drivers.

Signed-off-by: Chenbo Xia 
---
 app/test/virtual_pmd.c|   2 +-
 doc/guides/rel_notes/release_21_11.rst|   2 +
 drivers/baseband/acc100/rte_acc100_pmd.c  |   2 +-
 .../fpga_5gnr_fec/rte_fpga_5gnr_fec.c |   2 +-
 drivers/baseband/fpga_lte_fec/fpga_lte_fec.c  |   2 +-
 drivers/bus/pci/bsd/pci.c |   1 -
 drivers/bus/pci/linux/pci.c   |   1 -
 drivers/bus/pci/linux/pci_uio.c   |   1 -
 drivers/bus/pci/linux/pci_vfio.c  |   1 -
 drivers/bus/pci/meson.build   |   4 +
 drivers/bus/pci/pci_common_uio.c  |   1 -
 drivers/bus/pci/pci_driver.h  | 402 ++
 drivers/bus/pci/pci_params.c  |   1 -
 drivers/bus/pci/private.h |   3 +-
 drivers/bus/pci/rte_bus_pci.h | 375 +---
 drivers/bus/pci/version.map   |  32 +-
 drivers/common/cnxk/roc_platform.h|   2 +-
 drivers/common/mlx5/linux/mlx5_common_verbs.c |   2 +-
 drivers/common/mlx5/mlx5_common_pci.c |   2 +-
 drivers/common/octeontx2/otx2_dev.h   |   2 +-
 drivers/common/octeontx2/otx2_sec_idev.c  |   2 +-
 drivers/common/qat/qat_device.h   |   2 +-
 drivers/common/qat/qat_qp.c   |   2 +-
 drivers/common/sfc_efx/sfc_efx.h  |   2 +-
 drivers/compress/mlx5/mlx5_compress.c |   2 +-
 drivers/compress/octeontx/otx_zip.h   |   2 +-
 drivers/compress/qat/qat_comp.c   |   2 +-
 drivers/crypto/ccp/ccp_dev.h  |   2 +-
 drivers/crypto/ccp/ccp_pci.h  |   2 +-
 drivers/crypto/ccp/rte_ccp_pmd.c  |   2 +-
 drivers/crypto/cnxk/cn10k_cryptodev.c |   2 +-
 drivers/crypto/cnxk/cn9k_cryptodev.c  |   2 +-
 drivers/crypto/mlx5/mlx5_crypto.c |   2 +-
 drivers/crypto/nitrox/nitrox_device.h |   2 +-
 drivers/crypto/octeontx/otx_cryptodev.c   |   2 +-
 drivers/crypto/octeontx/otx_cryptodev_ops.c   |   2 +-
 drivers/crypto/octeontx2/otx2_cryptodev.c |   2 +-
 drivers/crypto/qat/qat_sym.c  |   2 +-
 drivers/crypto/qat/qat_sym_pmd.c  |   2 +-
 drivers/crypto/virtio/virtio_cryptodev.c  |   2 +-
 drivers/crypto/virtio/virtio_pci.h|   2 +-
 drivers/event/dlb2/pf/dlb2_main.h |   2 +-
 drivers/event/dlb2/pf/dlb2_pf.c   |   2 +-
 drivers/event/octeontx/ssovf_probe.c  |   2 +-
 drivers/event/octeontx/timvf_probe.c  |   2 +-
 drivers/event/octeontx2/otx2_evdev.c  |   2 +-
 drivers/mempool/cnxk/cnxk_mempool.c   |   2 +-
 drivers/mempool/octeontx/octeontx_fpavf.c |   2 +-
 drivers/mempool/octeontx2/otx2_mempool.c  |   2 +-
 drivers/mempool/octeontx2/otx2_mempool.h  |   2 +-
 drivers/mempool/octeontx2/otx2_mempool_irq.c  |   2 +-
 drivers/meson.build   |   4 +
 drivers/net/ark/ark_ethdev.c  |   2 +-
 drivers/net/avp/avp_ethdev.c  |   2 +-
 drivers/net/bnx2x/bnx2x.h |   2 +-
 drivers/net/bnxt/bnxt.h   |   2 +-
 drivers/net/bonding/rte_eth_bond_args.c   |   2 +-
 drivers/net/cxgbe/base/adapter.h  |   2 +-
 drivers/net/cxgbe/cxgbe_ethdev.c  |   2 +-
 drivers/net/e1000/em_ethdev.c |   2 +-
 drivers/net/e1000/em_rxtx.c   |   2 +-
 drivers/net/e1000/igb_ethdev.c|   2 +-
 drivers/net/e1000/igb_pf.c|   2 +-
 drivers/net/ena/ena_ethdev.h  |   2 +-
 drivers/net/enic/base/vnic_dev.h  |   2 +-
 drivers/net/enic/enic_ethdev.c|   2 +-
 drivers/net/enic/enic_main.c  |   2 +-
 drivers/net/enic/enic_vf_representor.c|   2 +-
 drivers/net/hinic/base/hinic_pmd_hwdev.c  |   2 +-
 drivers/net/hinic/base/hinic_pmd_hwif.c   |   2 +-
 drivers/net/hinic/base/hinic_pmd_nicio.c  |   2 +-
 drivers/net/hinic/hinic_pmd_ethdev.c  |   2 +-
 drivers/net/hns3/hns3_ethdev.c|   2 +-
 drivers/net/hns3/hns3_rxtx.c  |   2 +-
 drivers/net/i40e/i40e_ethdev.c|   2 +-
 drivers/net/i40e/i40e_ethdev_vf.c |   2 +-
 drivers/net/i40e/i40e_vf_representor.c|   2 +-
 drivers/net/igc/igc_ethdev.c

[dpdk-dev] [PATCH v2 0/7] Removal of PCI bus ABIs

2021-09-17 Thread Chenbo Xia
As announced in the deprecation notice, most ABIs in PCI bus will be removed.

As there exist some applications that want to access PCI memory resource,
two new APIs are defined in Patch 1 and corresponding changes are applied
to testpmd in Patch 2.

Patch 3-4 clean up the unnecessary usage of PCI bus header in examples.

Patch 5-6 clean up the unused PCI related structure in kni library and related
tests and examples.

Patch 7 finally removes most of ABIs in PCI bus.

---
v2: 
 - Add check on call of port_id_pci_reg_write (Xiaoyun)
 - Combine two clean-up patches in test and example, and backport (David)

Chenbo Xia (7):
  bus/pci: add new memory resource access APIs
  app/testpmd: use PCI memory resource access APIs
  examples/ethtool: use PCI library API to get PCI address
  examples/kni: remove unused PCI bus header
  kni: remove unused PCI info from test and example
  kni: replace unused variable definition with reserved bytes
  bus/pci: remove ABIs in PCI bus

 app/test-pmd/config.c |  50 +--
 app/test-pmd/testpmd.h|  54 +--
 app/test/test_kni.c   |  57 ---
 app/test/virtual_pmd.c|   2 +-
 doc/guides/rel_notes/release_21_11.rst|   8 +
 drivers/baseband/acc100/rte_acc100_pmd.c  |   2 +-
 .../fpga_5gnr_fec/rte_fpga_5gnr_fec.c |   2 +-
 drivers/baseband/fpga_lte_fec/fpga_lte_fec.c  |   2 +-
 drivers/bus/pci/bsd/pci.c |   1 -
 drivers/bus/pci/linux/pci.c   |   1 -
 drivers/bus/pci/linux/pci_uio.c   |   1 -
 drivers/bus/pci/linux/pci_vfio.c  |   1 -
 drivers/bus/pci/meson.build   |   4 +
 drivers/bus/pci/pci_common.c  |  78 
 drivers/bus/pci/pci_common_uio.c  |   1 -
 drivers/bus/pci/pci_driver.h  | 402 ++
 drivers/bus/pci/pci_params.c  |   1 -
 drivers/bus/pci/private.h |   3 +-
 drivers/bus/pci/rte_bus_pci.h | 387 ++---
 drivers/bus/pci/version.map   |  28 +-
 drivers/common/cnxk/roc_platform.h|   2 +-
 drivers/common/mlx5/linux/mlx5_common_verbs.c |   2 +-
 drivers/common/mlx5/mlx5_common_pci.c |   2 +-
 drivers/common/octeontx2/otx2_dev.h   |   2 +-
 drivers/common/octeontx2/otx2_sec_idev.c  |   2 +-
 drivers/common/qat/qat_device.h   |   2 +-
 drivers/common/qat/qat_qp.c   |   2 +-
 drivers/common/sfc_efx/sfc_efx.h  |   2 +-
 drivers/compress/mlx5/mlx5_compress.c |   2 +-
 drivers/compress/octeontx/otx_zip.h   |   2 +-
 drivers/compress/qat/qat_comp.c   |   2 +-
 drivers/crypto/ccp/ccp_dev.h  |   2 +-
 drivers/crypto/ccp/ccp_pci.h  |   2 +-
 drivers/crypto/ccp/rte_ccp_pmd.c  |   2 +-
 drivers/crypto/cnxk/cn10k_cryptodev.c |   2 +-
 drivers/crypto/cnxk/cn9k_cryptodev.c  |   2 +-
 drivers/crypto/mlx5/mlx5_crypto.c |   2 +-
 drivers/crypto/nitrox/nitrox_device.h |   2 +-
 drivers/crypto/octeontx/otx_cryptodev.c   |   2 +-
 drivers/crypto/octeontx/otx_cryptodev_ops.c   |   2 +-
 drivers/crypto/octeontx2/otx2_cryptodev.c |   2 +-
 drivers/crypto/qat/qat_sym.c  |   2 +-
 drivers/crypto/qat/qat_sym_pmd.c  |   2 +-
 drivers/crypto/virtio/virtio_cryptodev.c  |   2 +-
 drivers/crypto/virtio/virtio_pci.h|   2 +-
 drivers/event/dlb2/pf/dlb2_main.h |   2 +-
 drivers/event/dlb2/pf/dlb2_pf.c   |   2 +-
 drivers/event/octeontx/ssovf_probe.c  |   2 +-
 drivers/event/octeontx/timvf_probe.c  |   2 +-
 drivers/event/octeontx2/otx2_evdev.c  |   2 +-
 drivers/mempool/cnxk/cnxk_mempool.c   |   2 +-
 drivers/mempool/octeontx/octeontx_fpavf.c |   2 +-
 drivers/mempool/octeontx2/otx2_mempool.c  |   2 +-
 drivers/mempool/octeontx2/otx2_mempool.h  |   2 +-
 drivers/mempool/octeontx2/otx2_mempool_irq.c  |   2 +-
 drivers/meson.build   |   4 +
 drivers/net/ark/ark_ethdev.c  |   2 +-
 drivers/net/avp/avp_ethdev.c  |   2 +-
 drivers/net/bnx2x/bnx2x.h |   2 +-
 drivers/net/bnxt/bnxt.h   |   2 +-
 drivers/net/bonding/rte_eth_bond_args.c   |   2 +-
 drivers/net/cxgbe/base/adapter.h  |   2 +-
 drivers/net/cxgbe/cxgbe_ethdev.c  |   2 +-
 drivers/net/e1000/em_ethdev.c |   2 +-
 drivers/net/e1000/em_rxtx.c   |   2 +-
 drivers/net/e1000/igb_ethdev.c|   2 +-
 drivers/net/e1000/igb_pf.c|   2 +-
 drivers/net/ena/ena_ethdev.h  |   2 +-
 drivers/net/enic/base/vnic_dev.h  |   2 +-
 drivers/net/enic/enic_ethdev.c|   2 +-
 drivers/net/enic/enic_main.c  |   2 +-
 drivers/net

[dpdk-dev] [PATCH v2 1/7] bus/pci: add new memory resource access APIs

2021-09-17 Thread Chenbo Xia
Some applications wants to access PCI memory resource. Currently
applications use struct rte_pci_device to access it. Since the
structure will be made internal later, this patch adds two APIs
for memory resource access.

Signed-off-by: Chenbo Xia 
Acked-by: Ray Kinsella 
---
 doc/guides/rel_notes/release_21_11.rst |  6 ++
 drivers/bus/pci/pci_common.c   | 78 ++
 drivers/bus/pci/rte_bus_pci.h  | 36 
 drivers/bus/pci/version.map|  4 ++
 4 files changed, 124 insertions(+)

diff --git a/doc/guides/rel_notes/release_21_11.rst 
b/doc/guides/rel_notes/release_21_11.rst
index 1d56fa9bf2..ce3f554e10 100644
--- a/doc/guides/rel_notes/release_21_11.rst
+++ b/doc/guides/rel_notes/release_21_11.rst
@@ -55,6 +55,12 @@ New Features
  Also, make sure to start the actual text at the margin.
  ===
 
+* **Added new memory resource read/write APIs in PCI bus.**
+
+  Added new memory resource read/write APIs ``rte_pci_mem_rd32`` and
+  ``rte_pci_mem_wr32`` for applications to read/write PCI memory
+  resource.
+
 * **Enabled new devargs parser.**
 
   * Enabled devargs syntax
diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c
index 3406e03b29..5bc7c8e2c7 100644
--- a/drivers/bus/pci/pci_common.c
+++ b/drivers/bus/pci/pci_common.c
@@ -25,6 +25,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "private.h"
 
@@ -777,6 +778,83 @@ rte_pci_set_bus_master(struct rte_pci_device *dev, bool 
enable)
return 0;
 }
 
+static void *
+get_pci_mem_addr(const char *name, uint16_t idx, uint64_t offset)
+{
+   struct rte_pci_device *dev = NULL;
+   struct rte_pci_addr addr = {0};
+   struct rte_mem_resource *res = NULL;
+   bool found = false;
+
+   if (rte_pci_addr_parse(name, &addr)) {
+   RTE_LOG(ERR, EAL, "Wrong name format of PCI device (%s)", name);
+   return NULL;
+   }
+
+   FOREACH_DEVICE_ON_PCIBUS(dev) {
+   if (rte_pci_addr_cmp(&dev->addr, &addr)) {
+   continue;
+   } else {
+   found = true;
+   break;
+   }
+   }
+
+   if (!found) {
+   RTE_LOG(ERR, EAL, "Can not find the device (%s)", name);
+   return NULL;
+   }
+
+   res = &dev->mem_resource[idx];
+   if (idx >= PCI_MAX_RESOURCE || res->len == 0 || res->addr == NULL) {
+   RTE_LOG(ERR, EAL, "Invalid index of a mapped memory resourse");
+   return NULL;
+   }
+
+   if (offset + 4 > res->len) {
+   RTE_LOG(ERR, EAL, "Invalid offset of a memory resourse");
+   return NULL;
+   }
+
+   return (void *)((char *)res->addr + offset);
+}
+
+int
+rte_pci_mem_rd32(const char *name, uint16_t idx, uint32_t *data, uint64_t 
offset)
+{
+   void *reg_addr = NULL;
+
+   if (data == NULL) {
+   RTE_LOG(ERR, EAL, "NULL data buffer for PCI memory access");
+   return -EINVAL;
+   }
+
+   reg_addr = get_pci_mem_addr(name, idx, offset);
+   if (reg_addr == NULL)
+   return -EINVAL;
+
+   *data = rte_read32(reg_addr);
+   return 0;
+}
+
+int
+rte_pci_mem_wr32(const char *name, uint16_t idx, const uint32_t *data, 
uint64_t offset)
+{
+   void *reg_addr = NULL;
+
+   if (data == NULL) {
+   RTE_LOG(ERR, EAL, "NULL data buffer for PCI memory access");
+   return -EINVAL;
+   }
+
+   reg_addr = get_pci_mem_addr(name, idx, offset);
+   if (reg_addr == NULL)
+   return -EINVAL;
+
+   rte_write32(*data, reg_addr);
+   return 0;
+}
+
 struct rte_pci_bus rte_pci_bus = {
.bus = {
.scan = rte_pci_scan,
diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h
index 583470e831..21d9dd4289 100644
--- a/drivers/bus/pci/rte_bus_pci.h
+++ b/drivers/bus/pci/rte_bus_pci.h
@@ -392,6 +392,42 @@ void rte_pci_ioport_read(struct rte_pci_ioport *p,
 void rte_pci_ioport_write(struct rte_pci_ioport *p,
const void *data, size_t len, off_t offset);
 
+/**
+ * Read 4 bytes from PCI memory resource.
+ *
+ * @param name
+ *   PCI device name (e.g., :18:00.0).
+ * @param idx
+ *   Memory resource index.
+ * @param data
+ *   Data buffer where the bytes should be read into.
+ * @param offset
+ *   The offset into the PCI memory resource.
+ * @return
+ *  0 on success, negative value on error.
+ */
+__rte_experimental
+int
+rte_pci_mem_rd32(const char *name, uint16_t idx, uint32_t *data, uint64_t 
offset);
+
+/**
+ * Write 4 bytes to PCI memory resource.
+ *
+ * @param name
+ *   PCI device name (e.g., :18:00.0).
+ * @param idx
+ *   Memory resource index.
+ * @param data
+ *   Buffer of data that should be written

[dpdk-dev] [PATCH v2 2/7] app/testpmd: use PCI memory resource access APIs

2021-09-17 Thread Chenbo Xia
Currently testpmd uses struct rte_pci_device to access PCI memory
resource. Since this structure will be internal later, this patch
replaces use of rte_pci_device with new PCI memory resource access
APIs to read/write BAR 0.

Signed-off-by: Chenbo Xia 
---
 app/test-pmd/config.c  | 50 ++
 app/test-pmd/testpmd.h | 54 ++
 2 files changed, 46 insertions(+), 58 deletions(-)

diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index f5765b34f7..67be2f9ee7 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -948,10 +948,6 @@ vlan_id_is_invalid(uint16_t vlan_id)
 static int
 port_reg_off_is_invalid(portid_t port_id, uint32_t reg_off)
 {
-   const struct rte_pci_device *pci_dev;
-   const struct rte_bus *bus;
-   uint64_t pci_len;
-
if (reg_off & 0x3) {
fprintf(stderr,
"Port register offset 0x%X not aligned on a 4-byte 
boundary\n",
@@ -964,22 +960,6 @@ port_reg_off_is_invalid(portid_t port_id, uint32_t reg_off)
return 0;
}
 
-   bus = rte_bus_find_by_device(ports[port_id].dev_info.device);
-   if (bus && !strcmp(bus->name, "pci")) {
-   pci_dev = RTE_DEV_TO_PCI(ports[port_id].dev_info.device);
-   } else {
-   fprintf(stderr, "Not a PCI device\n");
-   return 1;
-   }
-
-   pci_len = pci_dev->mem_resource[0].len;
-   if (reg_off >= pci_len) {
-   fprintf(stderr,
-   "Port %d: register offset %u (0x%X) out of port PCI 
resource (length=%"PRIu64")\n",
-   port_id, (unsigned int)reg_off, (unsigned int)reg_off,
-   pci_len);
-   return 1;
-   }
return 0;
 }
 
@@ -1007,14 +987,14 @@ port_reg_bit_display(portid_t port_id, uint32_t reg_off, 
uint8_t bit_x)
 {
uint32_t reg_v;
 
-
if (port_id_is_invalid(port_id, ENABLED_WARN))
return;
if (port_reg_off_is_invalid(port_id, reg_off))
return;
if (reg_bit_pos_is_invalid(bit_x))
return;
-   reg_v = port_id_pci_reg_read(port_id, reg_off);
+   if (port_id_pci_reg_read(port_id, reg_off, ®_v))
+   return;
display_port_and_reg_off(port_id, (unsigned)reg_off);
printf("bit %d=%d\n", bit_x, (int) ((reg_v & (1 << bit_x)) >> bit_x));
 }
@@ -1040,7 +1020,8 @@ port_reg_bit_field_display(portid_t port_id, uint32_t 
reg_off,
else
l_bit = bit1_pos, h_bit = bit2_pos;
 
-   reg_v = port_id_pci_reg_read(port_id, reg_off);
+   if (port_id_pci_reg_read(port_id, reg_off, ®_v))
+   return;
reg_v >>= l_bit;
if (h_bit < 31)
reg_v &= ((1 << (h_bit - l_bit + 1)) - 1);
@@ -1058,7 +1039,8 @@ port_reg_display(portid_t port_id, uint32_t reg_off)
return;
if (port_reg_off_is_invalid(port_id, reg_off))
return;
-   reg_v = port_id_pci_reg_read(port_id, reg_off);
+   if (port_id_pci_reg_read(port_id, reg_off, ®_v))
+   return;
display_port_reg_value(port_id, reg_off, reg_v);
 }
 
@@ -1079,13 +1061,15 @@ port_reg_bit_set(portid_t port_id, uint32_t reg_off, 
uint8_t bit_pos,
(int) bit_v);
return;
}
-   reg_v = port_id_pci_reg_read(port_id, reg_off);
+   if (port_id_pci_reg_read(port_id, reg_off, ®_v))
+   return;
+
if (bit_v == 0)
reg_v &= ~(1 << bit_pos);
else
reg_v |= (1 << bit_pos);
-   port_id_pci_reg_write(port_id, reg_off, reg_v);
-   display_port_reg_value(port_id, reg_off, reg_v);
+   if (!port_id_pci_reg_write(port_id, reg_off, reg_v))
+   display_port_reg_value(port_id, reg_off, reg_v);
 }
 
 void
@@ -1121,11 +1105,13 @@ port_reg_bit_field_set(portid_t port_id, uint32_t 
reg_off,
(unsigned)max_v, (unsigned)max_v);
return;
}
-   reg_v = port_id_pci_reg_read(port_id, reg_off);
+   if (port_id_pci_reg_read(port_id, reg_off, ®_v))
+   return;
+
reg_v &= ~(max_v << l_bit); /* Keep unchanged bits */
reg_v |= (value << l_bit); /* Set changed bits */
-   port_id_pci_reg_write(port_id, reg_off, reg_v);
-   display_port_reg_value(port_id, reg_off, reg_v);
+   if (!port_id_pci_reg_write(port_id, reg_off, reg_v))
+   display_port_reg_value(port_id, reg_off, reg_v);
 }
 
 void
@@ -1135,8 +1121,8 @@ port_reg_set(portid_t port_id, uint32_t reg_off, uint32_t 
reg_v)
return;
if (port_reg_off_is_invalid(port_id, reg_off))
return;
-   port_id_pci_reg_write(port_id, reg_off, reg_v);
-   

[dpdk-dev] [PATCH v2 3/7] examples/ethtool: use PCI library API to get PCI address

2021-09-17 Thread Chenbo Xia
Currently ethtool example uses struct rte_pci_device to know PCI
address of a device. As this API will be removed later in PCI bus,
this patch uses PCI library API to get the PCI address.

Signed-off-by: Chenbo Xia 
---
 examples/ethtool/lib/rte_ethtool.c | 14 +-
 examples/ethtool/meson.build   |  2 +-
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/examples/ethtool/lib/rte_ethtool.c 
b/examples/ethtool/lib/rte_ethtool.c
index 4132516307..89727c9f72 100644
--- a/examples/ethtool/lib/rte_ethtool.c
+++ b/examples/ethtool/lib/rte_ethtool.c
@@ -8,7 +8,7 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 #ifdef RTE_NET_IXGBE
 #include 
 #endif
@@ -23,8 +23,9 @@ rte_ethtool_get_drvinfo(uint16_t port_id, struct 
ethtool_drvinfo *drvinfo)
 {
struct rte_eth_dev_info dev_info;
struct rte_dev_reg_info reg_info;
-   const struct rte_pci_device *pci_dev;
const struct rte_bus *bus = NULL;
+   char name[RTE_ETH_NAME_MAX_LEN];
+   struct rte_pci_addr addr;
int n;
int ret;
 
@@ -56,11 +57,14 @@ rte_ethtool_get_drvinfo(uint16_t port_id, struct 
ethtool_drvinfo *drvinfo)
if (dev_info.device)
bus = rte_bus_find_by_device(dev_info.device);
if (bus && !strcmp(bus->name, "pci")) {
-   pci_dev = RTE_DEV_TO_PCI(dev_info.device);
+   rte_eth_dev_get_name_by_port(port_id, name);
+   if (rte_pci_addr_parse(name, &addr)) {
+   printf("Failed to parse pci address\n");
+   return -1;
+   }
snprintf(drvinfo->bus_info, sizeof(drvinfo->bus_info),
"%04x:%02x:%02x.%x",
-   pci_dev->addr.domain, pci_dev->addr.bus,
-   pci_dev->addr.devid, pci_dev->addr.function);
+   addr.domain, addr.bus, addr.devid, addr.function);
} else {
snprintf(drvinfo->bus_info, sizeof(drvinfo->bus_info), "N/A");
}
diff --git a/examples/ethtool/meson.build b/examples/ethtool/meson.build
index d7f63d48af..c2dbf8ae5a 100644
--- a/examples/ethtool/meson.build
+++ b/examples/ethtool/meson.build
@@ -18,7 +18,7 @@ sources = files(
 )
 includes = include_directories('lib', 'ethtool-app')
 
-deps += 'bus_pci'
+deps += 'pci'
 if dpdk_conf.has('RTE_NET_IXGBE')
 deps += 'net_ixgbe'
 endif
-- 
2.17.1



[dpdk-dev] [PATCH v2 4/7] examples/kni: remove unused PCI bus header

2021-09-17 Thread Chenbo Xia
The header rte_bus_pci.h is included in kni example but nothing
in it is used. So remove it.

Signed-off-by: Chenbo Xia 
Acked-by: Ferruh Yigit 
---
 examples/kni/main.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/kni/main.c b/examples/kni/main.c
index beabb3c848..6dc335c0b5 100644
--- a/examples/kni/main.c
+++ b/examples/kni/main.c
@@ -31,7 +31,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
-- 
2.17.1



[dpdk-dev] [PATCH v2 5/7] kni: remove unused PCI info from test and example

2021-09-17 Thread Chenbo Xia
PCI device id and address in structure rte_kni_conf are never used
in the test, example and kni library. So remove the related code.

Fixes: ea6b39b5b847 ("kni: remove ethtool support")
Cc: sta...@dpdk.org

Signed-off-by: Chenbo Xia 
Acked-by: Ferruh Yigit 
---
 app/test/test_kni.c| 57 --
 examples/ip_pipeline/kni.c | 16 ---
 2 files changed, 73 deletions(-)

diff --git a/app/test/test_kni.c b/app/test/test_kni.c
index 96733554b6..aa9a316c50 100644
--- a/app/test/test_kni.c
+++ b/app/test/test_kni.c
@@ -25,7 +25,6 @@ test_kni(void)
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 
@@ -424,32 +423,14 @@ test_kni_processing(uint16_t port_id, struct rte_mempool 
*mp)
unsigned i;
struct rte_kni *kni;
struct rte_kni_conf conf;
-   struct rte_eth_dev_info info;
struct rte_kni_ops ops;
-   const struct rte_pci_device *pci_dev;
-   const struct rte_bus *bus = NULL;
 
if (!mp)
return -1;
 
memset(&conf, 0, sizeof(conf));
-   memset(&info, 0, sizeof(info));
memset(&ops, 0, sizeof(ops));
 
-   ret = rte_eth_dev_info_get(port_id, &info);
-   if (ret != 0) {
-   printf("Error during getting device (port %u) info: %s\n",
-   port_id, strerror(-ret));
-   return -1;
-   }
-
-   if (info.device)
-   bus = rte_bus_find_by_device(info.device);
-   if (bus && !strcmp(bus->name, "pci")) {
-   pci_dev = RTE_DEV_TO_PCI(info.device);
-   conf.addr = pci_dev->addr;
-   conf.id = pci_dev->id;
-   }
snprintf(conf.name, sizeof(conf.name), TEST_KNI_PORT);
 
/* core id 1 configured for kernel thread */
@@ -543,10 +524,7 @@ test_kni(void)
struct rte_kni *kni;
struct rte_mempool *mp;
struct rte_kni_conf conf;
-   struct rte_eth_dev_info info;
struct rte_kni_ops ops;
-   const struct rte_pci_device *pci_dev;
-   const struct rte_bus *bus;
FILE *fd;
DIR *dir;
char buf[16];
@@ -634,26 +612,9 @@ test_kni(void)
fclose(fd);
 
/* test of allocating KNI with NULL mempool pointer */
-   memset(&info, 0, sizeof(info));
memset(&conf, 0, sizeof(conf));
memset(&ops, 0, sizeof(ops));
 
-   ret = rte_eth_dev_info_get(port_id, &info);
-   if (ret != 0) {
-   printf("Error during getting device (port %u) info: %s\n",
-   port_id, strerror(-ret));
-   return -1;
-   }
-
-   if (info.device)
-   bus = rte_bus_find_by_device(info.device);
-   else
-   bus = NULL;
-   if (bus && !strcmp(bus->name, "pci")) {
-   pci_dev = RTE_DEV_TO_PCI(info.device);
-   conf.addr = pci_dev->addr;
-   conf.id = pci_dev->id;
-   }
conf.group_id = port_id;
conf.mbuf_size = MAX_PACKET_SZ;
 
@@ -678,26 +639,8 @@ test_kni(void)
 
/* test of allocating KNI without a name */
memset(&conf, 0, sizeof(conf));
-   memset(&info, 0, sizeof(info));
memset(&ops, 0, sizeof(ops));
 
-   ret = rte_eth_dev_info_get(port_id, &info);
-   if (ret != 0) {
-   printf("Error during getting device (port %u) info: %s\n",
-   port_id, strerror(-ret));
-   ret = -1;
-   goto fail;
-   }
-
-   if (info.device)
-   bus = rte_bus_find_by_device(info.device);
-   else
-   bus = NULL;
-   if (bus && !strcmp(bus->name, "pci")) {
-   pci_dev = RTE_DEV_TO_PCI(info.device);
-   conf.addr = pci_dev->addr;
-   conf.id = pci_dev->id;
-   }
conf.group_id = port_id;
conf.mbuf_size = MAX_PACKET_SZ;
 
diff --git a/examples/ip_pipeline/kni.c b/examples/ip_pipeline/kni.c
index a2d3331cb0..fccecc3dc6 100644
--- a/examples/ip_pipeline/kni.c
+++ b/examples/ip_pipeline/kni.c
@@ -6,7 +6,6 @@
 #include 
 
 #include 
-#include 
 #include 
 
 #include "kni.h"
@@ -100,16 +99,12 @@ kni_change_mtu(uint16_t port_id, unsigned int new_mtu)
 struct kni *
 kni_create(const char *name, struct kni_params *params)
 {
-   struct rte_eth_dev_info dev_info;
struct rte_kni_conf kni_conf;
struct rte_kni_ops kni_ops;
struct kni *kni;
struct mempool *mempool;
struct link *link;
struct rte_kni *k;
-   const struct rte_pci_device *pci_dev;
-   const struct rte_bus *bus = NULL;
-   int ret;
 
/* Check input params */
if ((name == NULL) ||
@@ -124,23 +119,12 @@ kni_create(const char *name, struct kni_params *params)
return NULL;
 
 

[dpdk-dev] [PATCH v2 6/7] kni: replace unused variable definition with reserved bytes

2021-09-17 Thread Chenbo Xia
PCI ID and address in structure rte_kni_conf are never used. And in
order not to break ABI, replace these variables with reserved bytes.

Signed-off-by: Chenbo Xia 
---
 lib/kni/rte_kni.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/lib/kni/rte_kni.h b/lib/kni/rte_kni.h
index b0eaf46104..2281abbf6a 100644
--- a/lib/kni/rte_kni.h
+++ b/lib/kni/rte_kni.h
@@ -17,7 +17,6 @@
  * and burst transmit packets to KNI interfaces.
  */
 
-#include 
 #include 
 #include 
 #include 
@@ -66,8 +65,7 @@ struct rte_kni_conf {
uint32_t core_id;   /* Core ID to bind kernel thread on */
uint16_t group_id;  /* Group ID */
unsigned mbuf_size; /* mbuf size */
-   struct rte_pci_addr addr; /* depreciated */
-   struct rte_pci_id id; /* depreciated */
+   uint8_t rsvd[20];
 
__extension__
uint8_t force_bind : 1; /* Flag to bind kernel thread */
-- 
2.17.1



[dpdk-dev] [PATCH v2 7/7] bus/pci: remove ABIs in PCI bus

2021-09-17 Thread Chenbo Xia
As announced in the deprecation note, most of ABIs in PCI bus are
removed in this patch. Only the function rte_pci_dump is still ABI
and experimental APIs are kept for future promotion.

This patch creates a new file named pci_driver.h and moves most of
the content in original rte_bus_pci.h to it. After that, pci_driver.h
is considered the interface for drivers and rte_bus_pci.h for
applications. pci_driver.h is defined as driver_sdk_headers so that
out-of-tree drivers can use it.

Then this patch replaces the including of rte_bus_pci.h with pci_driver.h
in all related drivers.

Signed-off-by: Chenbo Xia 
Acked-by: Ray Kinsella 
Acked-by: Rosen Xu 
---
 app/test/virtual_pmd.c|   2 +-
 doc/guides/rel_notes/release_21_11.rst|   2 +
 drivers/baseband/acc100/rte_acc100_pmd.c  |   2 +-
 .../fpga_5gnr_fec/rte_fpga_5gnr_fec.c |   2 +-
 drivers/baseband/fpga_lte_fec/fpga_lte_fec.c  |   2 +-
 drivers/bus/pci/bsd/pci.c |   1 -
 drivers/bus/pci/linux/pci.c   |   1 -
 drivers/bus/pci/linux/pci_uio.c   |   1 -
 drivers/bus/pci/linux/pci_vfio.c  |   1 -
 drivers/bus/pci/meson.build   |   4 +
 drivers/bus/pci/pci_common_uio.c  |   1 -
 drivers/bus/pci/pci_driver.h  | 402 ++
 drivers/bus/pci/pci_params.c  |   1 -
 drivers/bus/pci/private.h |   3 +-
 drivers/bus/pci/rte_bus_pci.h | 375 +---
 drivers/bus/pci/version.map   |  32 +-
 drivers/common/cnxk/roc_platform.h|   2 +-
 drivers/common/mlx5/linux/mlx5_common_verbs.c |   2 +-
 drivers/common/mlx5/mlx5_common_pci.c |   2 +-
 drivers/common/octeontx2/otx2_dev.h   |   2 +-
 drivers/common/octeontx2/otx2_sec_idev.c  |   2 +-
 drivers/common/qat/qat_device.h   |   2 +-
 drivers/common/qat/qat_qp.c   |   2 +-
 drivers/common/sfc_efx/sfc_efx.h  |   2 +-
 drivers/compress/mlx5/mlx5_compress.c |   2 +-
 drivers/compress/octeontx/otx_zip.h   |   2 +-
 drivers/compress/qat/qat_comp.c   |   2 +-
 drivers/crypto/ccp/ccp_dev.h  |   2 +-
 drivers/crypto/ccp/ccp_pci.h  |   2 +-
 drivers/crypto/ccp/rte_ccp_pmd.c  |   2 +-
 drivers/crypto/cnxk/cn10k_cryptodev.c |   2 +-
 drivers/crypto/cnxk/cn9k_cryptodev.c  |   2 +-
 drivers/crypto/mlx5/mlx5_crypto.c |   2 +-
 drivers/crypto/nitrox/nitrox_device.h |   2 +-
 drivers/crypto/octeontx/otx_cryptodev.c   |   2 +-
 drivers/crypto/octeontx/otx_cryptodev_ops.c   |   2 +-
 drivers/crypto/octeontx2/otx2_cryptodev.c |   2 +-
 drivers/crypto/qat/qat_sym.c  |   2 +-
 drivers/crypto/qat/qat_sym_pmd.c  |   2 +-
 drivers/crypto/virtio/virtio_cryptodev.c  |   2 +-
 drivers/crypto/virtio/virtio_pci.h|   2 +-
 drivers/event/dlb2/pf/dlb2_main.h |   2 +-
 drivers/event/dlb2/pf/dlb2_pf.c   |   2 +-
 drivers/event/octeontx/ssovf_probe.c  |   2 +-
 drivers/event/octeontx/timvf_probe.c  |   2 +-
 drivers/event/octeontx2/otx2_evdev.c  |   2 +-
 drivers/mempool/cnxk/cnxk_mempool.c   |   2 +-
 drivers/mempool/octeontx/octeontx_fpavf.c |   2 +-
 drivers/mempool/octeontx2/otx2_mempool.c  |   2 +-
 drivers/mempool/octeontx2/otx2_mempool.h  |   2 +-
 drivers/mempool/octeontx2/otx2_mempool_irq.c  |   2 +-
 drivers/meson.build   |   4 +
 drivers/net/ark/ark_ethdev.c  |   2 +-
 drivers/net/avp/avp_ethdev.c  |   2 +-
 drivers/net/bnx2x/bnx2x.h |   2 +-
 drivers/net/bnxt/bnxt.h   |   2 +-
 drivers/net/bonding/rte_eth_bond_args.c   |   2 +-
 drivers/net/cxgbe/base/adapter.h  |   2 +-
 drivers/net/cxgbe/cxgbe_ethdev.c  |   2 +-
 drivers/net/e1000/em_ethdev.c |   2 +-
 drivers/net/e1000/em_rxtx.c   |   2 +-
 drivers/net/e1000/igb_ethdev.c|   2 +-
 drivers/net/e1000/igb_pf.c|   2 +-
 drivers/net/ena/ena_ethdev.h  |   2 +-
 drivers/net/enic/base/vnic_dev.h  |   2 +-
 drivers/net/enic/enic_ethdev.c|   2 +-
 drivers/net/enic/enic_main.c  |   2 +-
 drivers/net/enic/enic_vf_representor.c|   2 +-
 drivers/net/hinic/base/hinic_pmd_hwdev.c  |   2 +-
 drivers/net/hinic/base/hinic_pmd_hwif.c   |   2 +-
 drivers/net/hinic/base/hinic_pmd_nicio.c  |   2 +-
 drivers/net/hinic/hinic_pmd_ethdev.c  |   2 +-
 drivers/net/hns3/hns3_ethdev.c|   2 +-
 drivers/net/hns3/hns3_rxtx.c  |   2 +-
 drivers/net/i40e/i40e_ethdev.c|   2 +-
 drivers/net/i40e/i40e_ethdev_vf.c |   2 +-
 drivers/net/i40e/i40e_vf_representor.c|   2

[dpdk-dev] [PATCH] net/virtio: fix uninitialized duplex mode

2021-07-26 Thread Chenbo Xia
When virtio front-end initializes, the duplex mode should be set
unknown before reading any duplex mode information from configuration
space. This patch fixes the issue that duplex mode is by default set
to zero, which equals ETH_LINK_HALF_DUPLEX. This will lead to duplex
mode being half duplex when fron-end does not have the feature
named VIRTIO_NET_F_SPEED_DUPLEX.

Fixes: 1357b4b36246 ("net/virtio: support Virtio link speed feature")
Cc: sta...@dpdk.org

Signed-off-by: Chenbo Xia 
---
 drivers/net/virtio/virtio_ethdev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index 72d3dda71f..9061db4e41 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1901,6 +1901,7 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
if (ret < 0)
return ret;
hw->speed = speed;
+   hw->duplex = DUPLEX_UNKNOWN;
 
/* Allocate memory for storing MAC addresses */
eth_dev->data->mac_addrs = rte_zmalloc("virtio",
-- 
2.17.1



[dpdk-dev] [PATCH] vhost: announce experimental tag removal of vhost APIs

2021-07-30 Thread Chenbo Xia
This patch announces the experimental tag removal of 10 vhost APIs,
which have been experimental for more than 2 years. All APIs could
be made stable in DPDK 21.11.

Signed-off-by: Chenbo Xia 
Acked-by: Maxime Coquelin 
---
 doc/guides/rel_notes/deprecation.rst | 8 
 1 file changed, 8 insertions(+)

diff --git a/doc/guides/rel_notes/deprecation.rst 
b/doc/guides/rel_notes/deprecation.rst
index 9584d6bfd7..f97a9d0058 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -147,3 +147,11 @@ Deprecation Notices
 * cmdline: ``cmdline`` structure will be made opaque to hide platform-specific
   content. On Linux and FreeBSD, supported prior to DPDK 20.11,
   original structure will be kept until DPDK 21.11.
+
+* vhost: The experimental tags of ``rte_vhost_driver_get_protocol_features``,
+  ``rte_vhost_driver_get_queue_num``, ``rte_vhost_crypto_create``,
+  ``rte_vhost_crypto_free``, ``rte_vhost_crypto_fetch_requests``,
+  ``rte_vhost_crypto_finalize_requests``, ``rte_vhost_crypto_set_zero_copy``,
+  ``rte_vhost_va_from_guest_pa``, ``rte_vhost_extern_callback_register``,
+  and ``rte_vhost_driver_set_protocol_features`` APIs will be removed and the
+  APIs will be made stable in DPDK 21.11.
\ No newline at end of file
-- 
2.17.1



[dpdk-dev] [PATCH v2] vhost: announce experimental tag removal of vhost APIs

2021-07-30 Thread Chenbo Xia
This patch announces the experimental tag removal of 10 vhost APIs,
which have been experimental for more than 2 years. All APIs could
be made stable in DPDK 21.11.

Signed-off-by: Chenbo Xia 
Acked-by: Maxime Coquelin 
---
 doc/guides/rel_notes/deprecation.rst | 8 
 1 file changed, 8 insertions(+)

diff --git a/doc/guides/rel_notes/deprecation.rst 
b/doc/guides/rel_notes/deprecation.rst
index 9584d6bfd7..5d5b7884d7 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -147,3 +147,11 @@ Deprecation Notices
 * cmdline: ``cmdline`` structure will be made opaque to hide platform-specific
   content. On Linux and FreeBSD, supported prior to DPDK 20.11,
   original structure will be kept until DPDK 21.11.
+
+* vhost: The experimental tags of ``rte_vhost_driver_get_protocol_features``,
+  ``rte_vhost_driver_get_queue_num``, ``rte_vhost_crypto_create``,
+  ``rte_vhost_crypto_free``, ``rte_vhost_crypto_fetch_requests``,
+  ``rte_vhost_crypto_finalize_requests``, ``rte_vhost_crypto_set_zero_copy``,
+  ``rte_vhost_va_from_guest_pa``, ``rte_vhost_extern_callback_register``,
+  and ``rte_vhost_driver_set_protocol_features`` APIs will be removed and the
+  APIs will be made stable in DPDK 21.11.
-- 
2.17.1



[dpdk-dev] [PATCH] examples/vhost: fix unchecked return value

2021-02-18 Thread Chenbo Xia
This patch fixes unchecked return value for rte_vhost_get_mem_table(),
which is reported by coverity.

Coverity issue: 364233
Fixes: ca059fa5e290 ("examples/vhost: demonstrate the new generic APIs")
Cc: sta...@dpdk.org

Signed-off-by: Chenbo Xia 
---
 examples/vhost/virtio_net.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/examples/vhost/virtio_net.c b/examples/vhost/virtio_net.c
index 64bf3d19ff..9064fc3a82 100644
--- a/examples/vhost/virtio_net.c
+++ b/examples/vhost/virtio_net.c
@@ -23,6 +23,7 @@ vs_vhost_net_setup(struct vhost_dev *dev)
uint16_t i;
int vid = dev->vid;
struct vhost_queue *queue;
+   int ret;
 
RTE_LOG(INFO, VHOST_CONFIG,
"setting builtin vhost-user net driver\n");
@@ -33,7 +34,12 @@ vs_vhost_net_setup(struct vhost_dev *dev)
else
dev->hdr_len = sizeof(struct virtio_net_hdr);
 
-   rte_vhost_get_mem_table(vid, &dev->mem);
+   ret = rte_vhost_get_mem_table(vid, &dev->mem);
+   if (ret < 0) {
+   RTE_LOG(ERR, VHOST_CONFIG, "Failed to get "
+   "VM memory layout for device(%d)\n", vid);
+   return;
+   }
 
dev->nr_vrings = rte_vhost_get_vring_num(vid);
for (i = 0; i < dev->nr_vrings; i++) {
-- 
2.17.1



Re: [PATCH v3 4/5] vhost: improve fdset initialization

2024-04-26 Thread Chenbo Xia
Hi Maxime,

> On Apr 9, 2024, at 19:48, Maxime Coquelin  wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
> This patch heavily reworks fdset initialization:
> - fdsets are now dynamically allocated by the FD manager
> - the event dispatcher is now created by the FD manager
> - struct fdset is now opaque to VDUSE and Vhost
> 
> Signed-off-by: Maxime Coquelin 
> ---
> lib/vhost/fd_man.c | 177 -
> lib/vhost/fd_man.h |  39 +-
> lib/vhost/socket.c |  24 ++
> lib/vhost/vduse.c  |  29 ++--
> 4 files changed, 177 insertions(+), 92 deletions(-)
> 
> diff --git a/lib/vhost/fd_man.c b/lib/vhost/fd_man.c
> index 0ae481b785..8b47c97d45 100644
> --- a/lib/vhost/fd_man.c
> +++ b/lib/vhost/fd_man.c
> @@ -3,12 +3,16 @@
>  */
> 
> #include 
> +#include 
> #include 
> #include 
> #include 
> 
> #include 
> #include 
> +#include 
> +#include 
> +#include 
> 
> #include "fd_man.h"
> 
> @@ -19,6 +23,79 @@ RTE_LOG_REGISTER_SUFFIX(vhost_fdset_logtype, fdset, INFO);
> 
> #define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL)
> 
> +struct fdentry {
> +   int fd; /* -1 indicates this entry is empty */
> +   fd_cb rcb;  /* callback when this fd is readable. */
> +   fd_cb wcb;  /* callback when this fd is writeable.*/
> +   void *dat;  /* fd context */
> +   int busy;   /* whether this entry is being used in cb. */
> +};
> +
> +struct fdset {
> +   char name[RTE_THREAD_NAME_SIZE];
> +   struct pollfd rwfds[MAX_FDS];
> +   struct fdentry fd[MAX_FDS];
> +   rte_thread_t tid;
> +   pthread_mutex_t fd_mutex;
> +   pthread_mutex_t fd_polling_mutex;
> +   int num;/* current fd number of this fdset */
> +
> +   union pipefds {
> +   struct {
> +   int pipefd[2];
> +   };
> +   struct {
> +   int readfd;
> +   int writefd;
> +   };
> +   } u;
> +
> +   pthread_mutex_t sync_mutex;
> +   pthread_cond_t sync_cond;
> +   bool sync;
> +   bool destroy;
> +};
> +
> +static int fdset_add_no_sync(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb 
> wcb, void *dat);
> +static uint32_t fdset_event_dispatch(void *arg);
> +
> +#define MAX_FDSETS 8
> +
> +static struct fdset *fdsets[MAX_FDSETS];
> +pthread_mutex_t fdsets_mutex = PTHREAD_MUTEX_INITIALIZER;

Static pthread_mutex_t ?

Thanks,
Chenbo

Re: [PATCH v3 5/5] vhost: manage FD with epoll

2024-04-27 Thread Chenbo Xia
Hi Maxime,

> On Apr 9, 2024, at 19:48, Maxime Coquelin  wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
> From: David Marchand 
> 
> Switch to epoll so that the concern over the poll() fd array
> is removed.
> Add a simple list of used entries and track the next free entry.
> 
> epoll() is thread safe, we no more need a synchronization
> mechanism and so can remove the notification pipe.
> 
> Signed-off-by: David Marchand 
> Signed-off-by: Maxime Coquelin 
> ---
> lib/vhost/fd_man.c | 399 -
> lib/vhost/fd_man.h |   5 +-
> 2 files changed, 106 insertions(+), 298 deletions(-)
> 
> diff --git a/lib/vhost/fd_man.c b/lib/vhost/fd_man.c
> index 8b47c97d45..a4a2965da1 100644
> --- a/lib/vhost/fd_man.c
> +++ b/lib/vhost/fd_man.c
> @@ -3,9 +3,9 @@
>  */
> 
> #include 
> -#include 
> #include 
> #include 
> +#include 
> #include 
> 
> #include 
> @@ -21,49 +21,34 @@ RTE_LOG_REGISTER_SUFFIX(vhost_fdset_logtype, fdset, INFO);
> #define VHOST_FDMAN_LOG(level, ...) \
>RTE_LOG_LINE(level, VHOST_FDMAN, "" __VA_ARGS__)
> 
> -#define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL)
> -
> struct fdentry {
>int fd; /* -1 indicates this entry is empty */
>fd_cb rcb;  /* callback when this fd is readable. */
>fd_cb wcb;  /* callback when this fd is writeable.*/
>void *dat;  /* fd context */
>int busy;   /* whether this entry is being used in cb. */
> +   LIST_ENTRY(fdentry) next;
> };
> 
> struct fdset {
>char name[RTE_THREAD_NAME_SIZE];
> -   struct pollfd rwfds[MAX_FDS];
> +   int epfd;
>struct fdentry fd[MAX_FDS];
> +   LIST_HEAD(, fdentry) fdlist;
> +   int next_free_idx;
>rte_thread_t tid;
>pthread_mutex_t fd_mutex;
> -   pthread_mutex_t fd_polling_mutex;
> -   int num;/* current fd number of this fdset */
> -
> -   union pipefds {
> -   struct {
> -   int pipefd[2];
> -   };
> -   struct {
> -   int readfd;
> -   int writefd;
> -   };
> -   } u;
> -
> -   pthread_mutex_t sync_mutex;
> -   pthread_cond_t sync_cond;
> -   bool sync;
> +

Not sure this blank line is intended or not :)

>bool destroy;
> };
> 
> -static int fdset_add_no_sync(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb 
> wcb, void *dat);
> -static uint32_t fdset_event_dispatch(void *arg);
> -
> #define MAX_FDSETS 8
> 
> static struct fdset *fdsets[MAX_FDSETS];
> pthread_mutex_t fdsets_mutex = PTHREAD_MUTEX_INITIALIZER;
> 
> +static uint32_t fdset_event_dispatch(void *arg);
> +
> static struct fdset *
> fdset_lookup(const char *name)
> {
> @@ -96,166 +81,6 @@ fdset_insert(struct fdset *fdset)
>return -1;
> }
> 
> -static void
> -fdset_pipe_read_cb(int readfd, void *dat,
> -  int *remove __rte_unused)
> -{
> -   char charbuf[16];
> -   struct fdset *fdset = dat;
> -   int r = read(readfd, charbuf, sizeof(charbuf));
> -   /*
> -* Just an optimization, we don't care if read() failed
> -* so ignore explicitly its return value to make the
> -* compiler happy
> -*/
> -   RTE_SET_USED(r);
> -
> -   pthread_mutex_lock(&fdset->sync_mutex);
> -   fdset->sync = true;
> -   pthread_cond_broadcast(&fdset->sync_cond);
> -   pthread_mutex_unlock(&fdset->sync_mutex);
> -}
> -
> -static void
> -fdset_pipe_uninit(struct fdset *fdset)
> -{
> -   fdset_del(fdset, fdset->u.readfd);
> -   close(fdset->u.readfd);
> -   fdset->u.readfd = -1;
> -   close(fdset->u.writefd);
> -   fdset->u.writefd = -1;
> -}
> -
> -static int
> -fdset_pipe_init(struct fdset *fdset)
> -{
> -   int ret;
> -
> -   pthread_mutex_init(&fdset->sync_mutex, NULL);
> -   pthread_cond_init(&fdset->sync_cond, NULL);
> -
> -   if (pipe(fdset->u.pipefd) < 0) {
> -   VHOST_FDMAN_LOG(ERR,
> -   "failed to create pipe for vhost fdset");
> -   return -1;
> -   }
> -
> -   ret = fdset_add_no_sync(fdset, fdset->u.readfd,
> -   fdset_pipe_read_cb, NULL, fdset);
> -   if (ret < 0) {
> -   VHOST_FDMAN_LOG(ERR,
> -   "failed to add pipe readfd %d into vhost server 
> fdset",
> -   fdset->u.readfd);
> -
> -   fdset_pipe_uninit(fdset);
> -   return -1;
> -   }
> -
> -   return 0;
> -}
> -
> -static void
> -fdset_sync(struct fdset *fdset)
> -{
> -   int ret;
> -
> -   pthread_mutex_lock(&fdset->sync_mutex);
> -
> -   fdset->sync = false;
> -   ret = write(fdset->u.writefd, "1", 1);
> -   if (ret < 0) {
> -   VHOST_FDMAN_LOG(ERR,
> -   "Failed to write to notification pipe: %s",
> -   strerror(errno));
> -   goto out_unlock;
> - 

Question about why doing VFIO RESET after device open

2024-05-06 Thread Chenbo Xia
Hi,

I am not sure why we are calling ioctl(vfio_dev_fd, VFIO_DEVICE_RESET) in 
pci_rte_vfio_setup_device()
, which is after opening the VFIO device fd. As I see in linux kernel, opening 
the vfio device fd will
trigger the device reset and put device into clean state. Another 
VFIO_DEVICE_RESET seems not needed?

Could anyone help confirm or correct me?

There was also a related question in community two years ago:
https://mails.dpdk.org/archives/dev/2022-September/251640.html


Thanks,
Chenbo




Re: [PATCH 5/6] bus/pci: use a dynamic logtype

2024-06-25 Thread Chenbo Xia
On Jun 25, 2024, at 20:24, David Marchand  wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
> Register a logtype for this bus driver and stop logging as EAL.
> 
> Signed-off-by: David Marchand 
> ---
> drivers/bus/pci/bsd/pci.c|  44 +++
> drivers/bus/pci/linux/pci.c  |  52 
> drivers/bus/pci/linux/pci_uio.c  |  75 +---
> drivers/bus/pci/linux/pci_vfio.c | 173 +++
> drivers/bus/pci/pci_common.c |  85 ++---
> drivers/bus/pci/pci_common_uio.c |  15 +--
> drivers/bus/pci/pci_params.c |   9 +-
> drivers/bus/pci/private.h|   6 +
> drivers/bus/pci/windows/pci.c|  13 +-
> drivers/bus/pci/windows/pci_netuio.c |   7 +-
> 10 files changed, 202 insertions(+), 277 deletions(-)

Reviewed-by: Chenbo Xia 


Re: [PATCH v6 1/2] bus/pci: fix secondary process PCI uio resource map problem

2024-07-04 Thread Chenbo Xia


> On Jul 2, 2024, at 15:40, Chaoyong He  wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
> From: Zerun Fu 
> 
> For the primary process, the logic loops all BARs and will skip
> the map of BAR with an invalid physical address (0), also will
> assign 'uio_res->nb_maps' with the real mapped BARs number. But
> for the secondary process, instead of loops all BARs, the logic
> using the 'uio_res->nb_map' as index. If the device uses continuous
> BARs there will be no problem, whereas if it uses discrete BARs,
> it will lead to mapping errors.
> 
> Fix this problem by also loops all BARs and skip the map of BAR
> with an invalid physical address in secondary process.
> 
> Fixes: 9b957f378abf ("pci: merge uio functions for linux and bsd")
> Cc: muk...@igel.co.jp
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Zerun Fu 
> Reviewed-by: Chaoyong He 
> Reviewed-by: Long Wu 
> Reviewed-by: Peng Zhang 
> Acked-by: Anatoly Burakov 
> ---
> drivers/bus/pci/pci_common_uio.c | 16 +---
> 1 file changed, 13 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/bus/pci/pci_common_uio.c 
> b/drivers/bus/pci/pci_common_uio.c
> index 76c661f054..f44ccdf27c 100644
> --- a/drivers/bus/pci/pci_common_uio.c
> +++ b/drivers/bus/pci/pci_common_uio.c
> @@ -26,7 +26,7 @@ EAL_REGISTER_TAILQ(rte_uio_tailq)
> static int
> pci_uio_map_secondary(struct rte_pci_device *dev)
> {
> -   int fd, i, j;
> +   int fd, i = 0, j, res_idx;
>struct mapped_pci_resource *uio_res;
>struct mapped_pci_res_list *uio_res_list =
>RTE_TAILQ_CAST(rte_uio_tailq.head, 
> mapped_pci_res_list);
> @@ -37,7 +37,15 @@ pci_uio_map_secondary(struct rte_pci_device *dev)
>if (rte_pci_addr_cmp(&uio_res->pci_addr, &dev->addr))
>continue;
> 
> -   for (i = 0; i != uio_res->nb_maps; i++) {
> +   /* Map all BARs */
> +   for (res_idx = 0; res_idx != PCI_MAX_RESOURCE; res_idx++) {
> +   /* skip empty BAR */
> +   if (dev->mem_resource[res_idx].phys_addr == 0)
> +   continue;
> +
> +   if (i >= uio_res->nb_maps)
> +   return -1;
> +
>/*
> * open devname, to mmap it
> */
> @@ -71,7 +79,9 @@ pci_uio_map_secondary(struct rte_pci_device *dev)
>}
>return -1;
>    }
> -   dev->mem_resource[i].addr = mapaddr;
> +   dev->mem_resource[res_idx].addr = mapaddr;
> +
> +   i++;
>}
>return 0;
>}
> —
> 2.39.1
> 

Reviewed-by: Chenbo Xia 





Re: [PATCH] vhost: fix offset while mmaping log base address

2024-07-09 Thread Chenbo Xia


> On Jul 8, 2024, at 14:57, BillXiang  wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
> From: BillXiang 
> 
> For sanity the offset should be the last parameter of mmap.
> 
> Fixes: fbc4d24 ("vhost: fix offset while mmaping log base address")

The commit id length should be 12.

Since QEMU always send offset 0, I think it’s no need to backport the
patch, so no cc-stable is fine.

With above fixed:

Reviewed-by: Chenbo Xia 

> Signed-off-by: BillXiang 
> ---
> lib/vhost/vhost_user.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
> index 5f470da38a..0893ae80bb 100644
> --- a/lib/vhost/vhost_user.c
> +++ b/lib/vhost/vhost_user.c
> @@ -2399,7 +2399,7 @@ vhost_user_set_log_base(struct virtio_net **pdev,
> * mmap from 0 to workaround a hugepage mmap bug: mmap will
> * fail when offset is not page size aligned.
> */
> -   addr = mmap(0, size + off, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
> +   addr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, off);
>alignment = get_blk_size(fd);
>close(fd);
>if (addr == MAP_FAILED) {
> --
> 2.30.0



Re: [PATCH v6 1/2] bus/pci: add function to enable/disable PASID

2023-11-05 Thread Chenbo Xia
Sorry I missed all previous versions…

+ARM guy

> On Nov 4, 2023, at 02:29, Abdullah Sevincer  
> wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
> This commit implements an internal api to enable and disable PASID for
> a device e.g. device driver event/dlb2.
> 
> For kernels when PASID enabled by default it breaks DLB functionality,
> hence disabling PASID is required for DLB to function properly.
> 
> PASID capability is not exposed to users hence offset can not be
> retrieved by rte_pci_find_ext_capability() api. Therefore, api
> implemented in this commit accepts an offset for PASID with an enable
> flag which is used to enable/disable PASID.
> 
> Signed-off-by: Abdullah Sevincer 

Is PASID now part of PCIe spec? This APIs should both work for x86/arm?
Not sure ARM is OK with the naming, previously they are calling it more as
Sub Stream ID (SSID)

> ---
> drivers/bus/pci/pci_common.c  |  7 +++
> drivers/bus/pci/rte_bus_pci.h | 13 +
> drivers/bus/pci/version.map   |  1 +
> lib/pci/rte_pci.h |  4 
> 4 files changed, 25 insertions(+)
> 
> diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c
> index 921d957bf6..5aac2406f1 100644
> --- a/drivers/bus/pci/pci_common.c
> +++ b/drivers/bus/pci/pci_common.c
> @@ -938,6 +938,13 @@ rte_pci_set_bus_master(const struct rte_pci_device *dev, 
> bool enable)
>return 0;
> }
> 
> +int
> +rte_pci_pasid_ena_dis(const struct rte_pci_device *dev, off_t offset, bool 
> enable)
> +{
> +   uint16_t pasid = enable;
> +   return rte_pci_write_config(dev, &pasid, sizeof(pasid), offset) < 0 ? 
> -1 : 0;
> +}
> +
> struct rte_pci_bus rte_pci_bus = {
>.bus = {
>.scan = rte_pci_scan,
> diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h
> index 21e234abf0..d45b7bf2ab 100644
> --- a/drivers/bus/pci/rte_bus_pci.h
> +++ b/drivers/bus/pci/rte_bus_pci.h
> @@ -295,6 +295,19 @@ void rte_pci_ioport_read(struct rte_pci_ioport *p,
> void rte_pci_ioport_write(struct rte_pci_ioport *p,
>const void *data, size_t len, off_t offset);
> 
> +/**
> + * Enable/Disable PASID.
> + *
> + * @param dev
> + *   A pointer to a rte_pci_device structure.
> + * @param offset
> + *   Offset of the PASID external capability.
> + * @param enable
> + *   Flag to enable or disable PASID.
> + */
> +__rte_internal
> +int rte_pci_pasid_ena_dis(const struct rte_pci_device *dev, off_t offset, 
> bool enable);
> +
> #ifdef __cplusplus
> }
> #endif
> diff --git a/drivers/bus/pci/version.map b/drivers/bus/pci/version.map
> index 74c5b075d5..01e6a09eb6 100644
> --- a/drivers/bus/pci/version.map
> +++ b/drivers/bus/pci/version.map
> @@ -36,6 +36,7 @@ INTERNAL {
>global:
> 
>rte_pci_get_sysfs_path;
> +   rte_pci_pasid_ena_dis;
>rte_pci_register;
>rte_pci_unregister;
> };
> diff --git a/lib/pci/rte_pci.h b/lib/pci/rte_pci.h
> index 69e932d910..d195f01950 100644
> --- a/lib/pci/rte_pci.h
> +++ b/lib/pci/rte_pci.h
> @@ -101,6 +101,10 @@ extern "C" {
> #define RTE_PCI_EXT_CAP_ID_ACS 0x0d/* Access Control Services */
> #define RTE_PCI_EXT_CAP_ID_SRIOV   0x10/* SR-IOV */
> #define RTE_PCI_EXT_CAP_ID_PRI 0x13/* Page Request Interface */
> +#define RTE_PCI_EXT_CAP_ID_PASID0x1B/* Process Address Space ID 
> */
> +
> +/* Process Address Space ID */
> +#define RTE_PCI_PASID_CTRL 0x06/* PASID control register */

Align with old definitions will looks better. Using TAB?

Thanks,
Chenbo

> 
> /* Advanced Error Reporting (RTE_PCI_EXT_CAP_ID_ERR) */
> #define RTE_PCI_ERR_UNCOR_STATUS   0x04/* Uncorrectable Error Status 
> */
> --
> 2.25.1
> 



Re: [PATCH v1] bus/pci: revise support PASID control

2023-11-14 Thread Chenbo Xia
+Nipun

Please cc me and Nipun if there is a new version.

> On Nov 14, 2023, at 01:27, Abdullah Sevincer  
> wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
> This commit revises PASID control function to accept PASID offset to
> pasid *structure* instead of taking exact register for controlling the
> feature.
> 
> PASID control function was introduced in earlier commit.
> Pls see commit 5a6878335b81 ("event/dlb2: disable PASID") and
> commit 60ea19609aec ("bus/pci: add PASID control").

Pls -> Please

> 
> Signed-off-by: Abdullah Sevincer 
> ---
> drivers/bus/pci/pci_common.c  | 5 ++---
> drivers/bus/pci/rte_bus_pci.h | 5 -
> drivers/event/dlb2/pf/dlb2_main.c | 4 ++--
> lib/pci/rte_pci.h | 2 +-
> 4 files changed, 9 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c
> index ba5e280d33..dbe647d15d 100644
> --- a/drivers/bus/pci/pci_common.c
> +++ b/drivers/bus/pci/pci_common.c
> @@ -943,9 +943,8 @@ rte_pci_pasid_set_state(const struct rte_pci_device *dev,
>off_t offset, bool enable)
> {
>uint16_t pasid = enable;
> -   return rte_pci_write_config(dev, &pasid, sizeof(pasid), offset) < 0
> -   ? -1
> -   : 0;
> +   return rte_pci_write_config(dev, &pasid, sizeof(pasid),
> +   offset + RTE_PCI_PASID_CTRL) < 0 ? -1 : 0;
> }

Compare the return value of rte_pci_write_config with sizeof(pasid) will be 
good.
Think about one case that user specify a wrong offset and rte_pci_write_config
returns a value smaller than sizeof(pasid). It will be taken as success but 
actually
it’s wrong. 


> 
> struct rte_pci_bus rte_pci_bus = {
> diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h
> index f07bf9b588..35d07d8294 100644
> --- a/drivers/bus/pci/rte_bus_pci.h
> +++ b/drivers/bus/pci/rte_bus_pci.h
> @@ -161,9 +161,12 @@ int rte_pci_set_bus_master(const struct rte_pci_device 
> *dev, bool enable);
>  * @param dev
>  *   A pointer to a rte_pci_device structure.
>  * @param offset
> - *   Offset of the PASID external capability.
> + *   Offset of the PASID external capability structure.
>  * @param enable
>  *   Flag to enable or disable PASID.
> + *
> + * @return
> + * 0 on success, -1 on error in PCI config space read/write.
>  */
> __rte_internal
> int rte_pci_pasid_set_state(const struct rte_pci_device *dev,
> diff --git a/drivers/event/dlb2/pf/dlb2_main.c 
> b/drivers/event/dlb2/pf/dlb2_main.c
> index 61a7b39eef..a95d3227a4 100644
> --- a/drivers/event/dlb2/pf/dlb2_main.c
> +++ b/drivers/event/dlb2/pf/dlb2_main.c
> @@ -518,8 +518,8 @@ dlb2_pf_reset(struct dlb2_dev *dlb2_dev)
>/* Disable PASID if it is enabled by default, which
> * breaks the DLB if enabled.
> */
> -   off = DLB2_PCI_PASID_CAP_OFFSET + RTE_PCI_PASID_CTRL;
> -   if (rte_pci_pasid_set_state(pdev, off, false)) {
> +   off = DLB2_PCI_PASID_CAP_OFFSET;
> +   if (rte_pci_pasid_set_state(pdev, off, false) < 0) {


I don’t know about the details, so it means for different devices that support 
PASID,
they have different offsets?

Btw, Is this cap still not exposed to user space in latest kernel?

Thanks,
Chenbo 

>DLB2_LOG_ERR("[%s()] failed to write the pcie config space at 
> offset %d\n",
>__func__, (int)off);
>return -1;
> diff --git a/lib/pci/rte_pci.h b/lib/pci/rte_pci.h
> index 0d2d8d8fed..c26fc77209 100644
> --- a/lib/pci/rte_pci.h
> +++ b/lib/pci/rte_pci.h
> @@ -101,7 +101,7 @@ extern "C" {
> #define RTE_PCI_EXT_CAP_ID_ACS 0x0d/* Access Control Services */
> #define RTE_PCI_EXT_CAP_ID_SRIOV   0x10/* SR-IOV */
> #define RTE_PCI_EXT_CAP_ID_PRI 0x13/* Page Request Interface */
> -#define RTE_PCI_EXT_CAP_ID_PASID   0x1B/* Process Address Space ID */
> +#define RTE_PCI_EXT_CAP_ID_PASID   0x1b/* Process Address Space ID */
> 
> /* Advanced Error Reporting (RTE_PCI_EXT_CAP_ID_ERR) */
> #define RTE_PCI_ERR_UNCOR_STATUS   0x04/* Uncorrectable Error Status 
> */
> --
> 2.25.1
> 



Re: [PATCH v1] bus/pci: revise support PASID control

2023-11-14 Thread Chenbo Xia
On Nov 15, 2023, at 01:39, Sevincer, Abdullah  
wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
>> +I don’t know about the details, so it means for different devices that 
>> support PASID, they have different offsets?
> 
>> +Btw, Is this cap still not exposed to user space in latest kernel?
> 
> Yes, may be different offsets for different devices.

But why? It’s not standard capability? In my understanding, standard cap should 
have the same
offset definitions for all devices.

> As of now it is not exposed to user. Bruce's test was on 6.2 generic kernel 
> (6.2.0-36-generic)

Will kernel plan to support that? I can see the related work was done by Intel 
but somehow it’s
not merged into kernel. Could you give more information on this?

If kernel does not want this to be exposed, it means userspace should not 
access this. No?

/Chenbo 



Re: [PATCH] vfio: add get device info API

2023-11-14 Thread Chenbo Xia


> On Nov 14, 2023, at 18:23, Mingjin Ye  wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
> This patch adds an API to support getting device information.
> 
> The driver can use the "rte_vfio_get_device_info" helper to get
> device information from EAL.
> 
> Cc: sta...@dpdk.org

No stable

Please explain why this api is needed in your use case. Taking this with
the use-case patch could be more clear.

Thanks,
Chenbo

> 
> Signed-off-by: Mingjin Ye 
> ---
> lib/eal/include/rte_vfio.h | 26 ++
> lib/eal/linux/eal_vfio.c   | 19 +++
> lib/eal/version.map|  3 +++
> 3 files changed, 48 insertions(+)



Re: [PATCH v1] bus/pci: revise support PASID control

2023-11-16 Thread Chenbo Xia
On Nov 17, 2023, at 01:43, Chen, Mike Ximing  wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
>> -Original Message-----
>> From: Chenbo Xia 
>> Sent: Tuesday, November 14, 2023 8:54 PM
>> To: Sevincer, Abdullah 
>> Cc: dev@dpdk.org; jer...@marvell.com; Chen, Mike Ximing
>> ; Richardson, Bruce
>> ; NBU-Contact-Thomas Monjalon (EXTERNAL)
>> ; Marchand, David ;
>> nipun.gu...@amd.com
>> Subject: Re: [PATCH v1] bus/pci: revise support PASID control
>> 
>> On Nov 15, 2023, at 01:39, Sevincer, Abdullah 
>> wrote:
>>> 
>>> External email: Use caution opening links or attachments
>>> 
>>> 
>>>> +I don’t know about the details, so it means for different devices that 
>>>> support
>> PASID, they have different offsets?
>>> 
>>>> +Btw, Is this cap still not exposed to user space in latest kernel?
>>> 
>>> Yes, may be different offsets for different devices.
>> 
>> But why? It’s not standard capability? In my understanding, standard cap 
>> should
>> have the same offset definitions for all devices.
> 
> PASID is a part of extended capabilities. Its offset can be different for 
> different devices.
> 
>> 
>>> As of now it is not exposed to user. Bruce's test was on 6.2 generic
>>> kernel (6.2.0-36-generic)
>> 
>> Will kernel plan to support that? I can see the related work was done by 
>> Intel but
>> somehow it’s not merged into kernel. Could you give more information on this?
>> 
> Hi Chenbo,
> As you may know there has been a lot of changes in iommu/vfio/SVA/pasid/SIOV
> support in Linux kernel recently. The PASID used to be disabled, but starting 
> with
> kernel 6.2 it is enabled in vfio-pci driver by default.  We did contact the 
> kernel developers
> on this issue. They seem to insist that enabling PASID is needed for whatever 
> new features
> they are developing. This breaks the DLB PF PMD as DLB HW requires the PASID 
> to be
> disable for PF to operate properly (otherwise the HW put DLB in a different 
> mode). We
> will continue to talk to the kernel developers on this issue, but in the 
> meantime would like
> to provide this patch so that DPDK PF PMD can still work with latest kernels.
> 
> In term of exposing the PASID capability to the user space. We are aware of 
> some patches
> Submitted in conjunction to the changes mentioned above, for example,
> https://lkml.iu.edu/hypermail/linux/kernel/2309.3/02380.html
> But we don’t know when and if it will be accepted into the kernel. Hopefully 
> the patch will
> be accepted so we don’t have to use the hard coded offset.
> 
>> If kernel does not want this to be exposed, it means userspace should not 
>> access
>> this. No?
>> 
> The action (disabling PASID) only applies the targeted device. In the DLB PF 
> PMD case,
> the DPDK has full control of the device via vfio-pci.  It does not affect 
> kernel and any
> other device's operation.

Thanks for the long explanation.

Hope to see follow-up in DPDK when this capability get exposed to user later :)

/Chenbo

> 
> Thanks
> Mike
>> /Chenbo
> 



Re: [PATCH v2] bus/pci: revise support PASID control

2023-11-16 Thread Chenbo Xia


> On Nov 15, 2023, at 01:36, Abdullah Sevincer  
> wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
> This commit revises PASID control function to accept PASID offset to
> pasid *structure* instead of taking exact register for controlling the
> feature.
> 
> PASID control function was introduced in earlier commit.
> Please see commit 5a6878335b81 ("event/dlb2: disable PASID") and
> commit 60ea19609aec ("bus/pci: add PASID control").
> 
> Signed-off-by: Abdullah Sevincer 
> ---
> drivers/bus/pci/pci_common.c  | 5 ++---
> drivers/bus/pci/rte_bus_pci.h | 5 -
> drivers/event/dlb2/pf/dlb2_main.c | 4 ++--
> lib/pci/rte_pci.h | 2 +-
> 4 files changed, 9 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c
> index ba5e280d33..889a48d2af 100644
> --- a/drivers/bus/pci/pci_common.c
> +++ b/drivers/bus/pci/pci_common.c
> @@ -943,9 +943,8 @@ rte_pci_pasid_set_state(const struct rte_pci_device *dev,
>off_t offset, bool enable)
> {
>uint16_t pasid = enable;
> -   return rte_pci_write_config(dev, &pasid, sizeof(pasid), offset) < 0
> -   ? -1
> -   : 0;
> +   return rte_pci_write_config(dev, &pasid, sizeof(pasid),
> +   offset + RTE_PCI_PASID_CTRL) != sizeof(pasid) ? -1 : 
> 0;
> }
> 
> struct rte_pci_bus rte_pci_bus = {
> diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h
> index f07bf9b588..35d07d8294 100644
> --- a/drivers/bus/pci/rte_bus_pci.h
> +++ b/drivers/bus/pci/rte_bus_pci.h
> @@ -161,9 +161,12 @@ int rte_pci_set_bus_master(const struct rte_pci_device 
> *dev, bool enable);
>  * @param dev
>  *   A pointer to a rte_pci_device structure.
>  * @param offset
> - *   Offset of the PASID external capability.
> + *   Offset of the PASID external capability structure.
>  * @param enable
>  *   Flag to enable or disable PASID.
> + *
> + * @return
> + * 0 on success, -1 on error in PCI config space read/write.
>  */
> __rte_internal
> int rte_pci_pasid_set_state(const struct rte_pci_device *dev,
> diff --git a/drivers/event/dlb2/pf/dlb2_main.c 
> b/drivers/event/dlb2/pf/dlb2_main.c
> index 61a7b39eef..a95d3227a4 100644
> --- a/drivers/event/dlb2/pf/dlb2_main.c
> +++ b/drivers/event/dlb2/pf/dlb2_main.c
> @@ -518,8 +518,8 @@ dlb2_pf_reset(struct dlb2_dev *dlb2_dev)
>/* Disable PASID if it is enabled by default, which
> * breaks the DLB if enabled.
> */
> -   off = DLB2_PCI_PASID_CAP_OFFSET + RTE_PCI_PASID_CTRL;
> -   if (rte_pci_pasid_set_state(pdev, off, false)) {
> +   off = DLB2_PCI_PASID_CAP_OFFSET;
> +   if (rte_pci_pasid_set_state(pdev, off, false) < 0) {
>DLB2_LOG_ERR("[%s()] failed to write the pcie config space at 
> offset %d\n",
>__func__, (int)off);
>return -1;
> diff --git a/lib/pci/rte_pci.h b/lib/pci/rte_pci.h
> index 0d2d8d8fed..c26fc77209 100644
> --- a/lib/pci/rte_pci.h
> +++ b/lib/pci/rte_pci.h
> @@ -101,7 +101,7 @@ extern "C" {
> #define RTE_PCI_EXT_CAP_ID_ACS 0x0d/* Access Control Services */
> #define RTE_PCI_EXT_CAP_ID_SRIOV   0x10/* SR-IOV */
> #define RTE_PCI_EXT_CAP_ID_PRI 0x13/* Page Request Interface */
> -#define RTE_PCI_EXT_CAP_ID_PASID   0x1B/* Process Address Space ID */
> +#define RTE_PCI_EXT_CAP_ID_PASID   0x1b/* Process Address Space ID */
> 
> /* Advanced Error Reporting (RTE_PCI_EXT_CAP_ID_ERR) */
> #define RTE_PCI_ERR_UNCOR_STATUS   0x04/* Uncorrectable Error Status 
> */
> --
> 2.25.1
> 

Reviewed-by: Chenbo Xia 



Re: [PATCH v7 1/2] vfio: add get device info API

2023-11-28 Thread Chenbo Xia
On Nov 22, 2023, at 18:22, Mingjin Ye  wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
> This patch adds an API to support getting device information.
> 
> The driver can use the "rte_vfio_get_device_info" helper to get
> device information from EAL.
> 
> Signed-off-by: Mingjin Ye 
> ---
> lib/eal/include/rte_vfio.h | 29 +
> lib/eal/linux/eal_vfio.c   | 27 +++
> lib/eal/version.map|  3 +++
> 3 files changed, 59 insertions(+)
> 

Comments not addressed:

https://mails.dpdk.org/archives/dev/2023-November/282200.html

/Chenbo


[RFC 0/4] Support VFIO sparse mmap in PCI bus

2023-04-17 Thread Chenbo Xia
This series introduces a VFIO standard capability, called sparse
mmap to PCI bus. In linux kernel, it's defined as
VFIO_REGION_INFO_CAP_SPARSE_MMAP. Sparse mmap means instead of
mmap whole BAR region into DPDK process, only mmap part of the
BAR region after getting sparse mmap information from kernel.
For the rest of BAR region that is not mmap-ed, DPDK process
can use pread/pwrite system calls to access. Sparse mmap is
useful when kernel does not want userspace to mmap whole BAR
region, or kernel wants to control over access to specific BAR
region. Vendors can choose to enable this feature or not for
their devices in their specific kernel modules. 

In this patchset:

Patch 1-3 is mainly for introducing BAR access APIs so that
driver could use them to access specific BAR using pread/pwrite
system calls when part of the BAR is not mmap-able.

Patch 4 adds the VFIO sparse mmap support finally. A question
is for all sparse mmap regions, should they be mapped to a
continuous virtual address region that follows device-specific
BAR layout or not. In theory, there could be three options to
support this feature.

Option 1: Map sparse mmap regions independently
==
In this approach, we mmap each sparse mmap region one by one
and each region could be located anywhere in process address
space. But accessing the mmaped BAR will not be as easy as
'bar_base_address + bar_offset', driver needs to check the
sparse mmap information to access specific BAR register. 

Patch 4 in this patchset adopts this option. Driver API change
is introduced in bus_pci_driver.h. Corresponding changes in
all drivers are also done and currently I am assuming drivers
do not support this feature so they will not check the
'is_sparse' flag but assumes it to be false. Note that it will
not break any driver and each vendor can add related logic when
they start to support this feature. This is only because I don't
want to introduce complexity to drivers that do not want to
support this feature.

Option 2: Map sparse mmap regions based on device-specific BAR layout 
==
In this approach, the sparse mmap regions are mapped to continuous
virtual address region that follows device-specific BAR layout.
For example, the BAR size is 0x4000 and only 0-0x1000 (sparse mmap
region #1) and 0x3000-0x4000 (sparse mmap region #2) could be
mmaped. Region #1 will be mapped at 'base_addr' and region #2
will be mapped at 'base_addr + 0x3000'. The good thing is if
we implement like this, driver can still access all BAR registers
using 'bar_base_address + bar_offset' way and we don't need
to introduce any driver API change. But the address space
range 'base_addr + 0x1000' to 'base_addr + 0x3000' may need to
be reserved so it could result in waste of address space or memory
(when we use MAP_ANONYMOUS and MAP_PRIVATE flag to reserve this
range). Meanwhile, driver needs to know which part of BAR is
mmaped (this is possible since the range is defined by vendor's
specific kernel module).

Option 3: Support both option 1 & 2 
===
We could define a driver flag to let driver choose which way it
perfers since either option has its own Pros & Cons.

Please share your comments, Thanks!


Chenbo Xia (4):
  bus/pci: introduce an internal representation of PCI device
  bus/pci: avoid depending on private value in kernel source
  bus/pci: introduce helper for MMIO read and write
  bus/pci: add VFIO sparse mmap support

 drivers/baseband/acc/rte_acc100_pmd.c |   6 +-
 drivers/baseband/acc/rte_vrb_pmd.c|   6 +-
 .../fpga_5gnr_fec/rte_fpga_5gnr_fec.c |   6 +-
 drivers/baseband/fpga_lte_fec/fpga_lte_fec.c  |   6 +-
 drivers/bus/pci/bsd/pci.c |  43 +-
 drivers/bus/pci/bus_pci_driver.h  |  24 +-
 drivers/bus/pci/linux/pci.c   |  91 +++-
 drivers/bus/pci/linux/pci_init.h  |  14 +-
 drivers/bus/pci/linux/pci_uio.c   |  34 +-
 drivers/bus/pci/linux/pci_vfio.c  | 445 ++
 drivers/bus/pci/pci_common.c  |  57 ++-
 drivers/bus/pci/pci_common_uio.c  |  12 +-
 drivers/bus/pci/private.h |  25 +-
 drivers/bus/pci/rte_bus_pci.h |  48 ++
 drivers/bus/pci/version.map   |   3 +
 drivers/common/cnxk/roc_dev.c |   4 +-
 drivers/common/cnxk/roc_dpi.c |   2 +-
 drivers/common/cnxk/roc_ml.c  |  22 +-
 drivers/common/qat/dev/qat_dev_gen1.c |   2 +-
 drivers/common/qat/dev/qat_dev_gen4.c |   4 +-
 drivers/common/sfc_efx/sfc_efx.c  |   2 +-
 drivers/compress/octeontx/otx_zip.c   |   4 +-
 drivers/crypto/ccp/ccp_dev.c  |   4 +-
 drivers/crypto/cnxk/cnxk_cryptodev_op

[RFC 1/4] bus/pci: introduce an internal representation of PCI device

2023-04-17 Thread Chenbo Xia
This patch introduces an internal representation of the PCI device
which will be used to store the internal information that don't have
to be exposed to drivers, e.g., the VFIO region sizes/offsets.

In this patch, the internal structure is simply a wrapper of the
rte_pci_device structure. More fields will be added.

Signed-off-by: Chenbo Xia 
---
 drivers/bus/pci/bsd/pci.c| 13 -
 drivers/bus/pci/linux/pci.c  | 28 
 drivers/bus/pci/pci_common.c | 12 ++--
 drivers/bus/pci/private.h| 14 +-
 4 files changed, 43 insertions(+), 24 deletions(-)

diff --git a/drivers/bus/pci/bsd/pci.c b/drivers/bus/pci/bsd/pci.c
index 7459d15c7e..a747eca58c 100644
--- a/drivers/bus/pci/bsd/pci.c
+++ b/drivers/bus/pci/bsd/pci.c
@@ -208,16 +208,19 @@ pci_uio_map_resource_by_index(struct rte_pci_device *dev, 
int res_idx,
 static int
 pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
 {
+   struct rte_pci_device_internal *pdev;
struct rte_pci_device *dev;
struct pci_bar_io bar;
unsigned i, max;
 
-   dev = malloc(sizeof(*dev));
-   if (dev == NULL) {
+   pdev = malloc(sizeof(*pdev));
+   if (pdev == NULL) {
+   RTE_LOG(ERR, EAL, "Cannot allocate memory for internal pci 
device\n");
return -1;
}
 
-   memset(dev, 0, sizeof(*dev));
+   memset(pdev, 0, sizeof(*pdev));
+   dev = &pdev->device;
dev->device.bus = &rte_pci_bus.bus;
 
dev->addr.domain = conf->pc_sel.pc_domain;
@@ -303,7 +306,7 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
memmove(dev2->mem_resource,
dev->mem_resource,
sizeof(dev->mem_resource));
-   pci_free(dev);
+   pci_free(pdev);
}
return 0;
}
@@ -313,7 +316,7 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
return 0;
 
 skipdev:
-   pci_free(dev);
+   pci_free(pdev);
return 0;
 }
 
diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
index ebd1395502..4c2c5ba382 100644
--- a/drivers/bus/pci/linux/pci.c
+++ b/drivers/bus/pci/linux/pci.c
@@ -211,22 +211,26 @@ pci_scan_one(const char *dirname, const struct 
rte_pci_addr *addr)
 {
char filename[PATH_MAX];
unsigned long tmp;
+   struct rte_pci_device_internal *pdev;
struct rte_pci_device *dev;
char driver[PATH_MAX];
int ret;
 
-   dev = malloc(sizeof(*dev));
-   if (dev == NULL)
+   pdev = malloc(sizeof(*pdev));
+   if (pdev == NULL) {
+   RTE_LOG(ERR, EAL, "Cannot allocate memory for internal pci 
device\n");
return -1;
+   }
 
-   memset(dev, 0, sizeof(*dev));
+   memset(pdev, 0, sizeof(*pdev));
+   dev = &pdev->device;
dev->device.bus = &rte_pci_bus.bus;
dev->addr = *addr;
 
/* get vendor id */
snprintf(filename, sizeof(filename), "%s/vendor", dirname);
if (eal_parse_sysfs_value(filename, &tmp) < 0) {
-   pci_free(dev);
+   pci_free(pdev);
return -1;
}
dev->id.vendor_id = (uint16_t)tmp;
@@ -234,7 +238,7 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr 
*addr)
/* get device id */
snprintf(filename, sizeof(filename), "%s/device", dirname);
if (eal_parse_sysfs_value(filename, &tmp) < 0) {
-   pci_free(dev);
+   pci_free(pdev);
return -1;
}
dev->id.device_id = (uint16_t)tmp;
@@ -243,7 +247,7 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr 
*addr)
snprintf(filename, sizeof(filename), "%s/subsystem_vendor",
 dirname);
if (eal_parse_sysfs_value(filename, &tmp) < 0) {
-   pci_free(dev);
+   pci_free(pdev);
return -1;
}
dev->id.subsystem_vendor_id = (uint16_t)tmp;
@@ -252,7 +256,7 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr 
*addr)
snprintf(filename, sizeof(filename), "%s/subsystem_device",
 dirname);
if (eal_parse_sysfs_value(filename, &tmp) < 0) {
-   pci_free(dev);
+   pci_free(pdev);
return -1;
}
dev->id.subsystem_device_id = (uint16_t)tmp;
@@ -261,7 +265,7 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr 
*addr)
snprintf(filename, sizeof(filename), "%s/class",
 dirname);
if (eal_parse_sysfs_value(filename, &tmp) < 0) {
-   pci_free(dev);
+   pci_free(pdev);
return -1;

[RFC 2/4] bus/pci: avoid depending on private value in kernel source

2023-04-17 Thread Chenbo Xia
The value 40 used in VFIO_GET_REGION_ADDR() is a private value
(VFIO_PCI_OFFSET_SHIFT) defined in Linux kernel source [1]. It
is not part of VFIO API, and we should not depend on it.

[1] https://github.com/torvalds/linux/blob/v6.2/include/linux/vfio_pci_core.h

Signed-off-by: Chenbo Xia 
---
 drivers/bus/pci/linux/pci.c  |   4 +-
 drivers/bus/pci/linux/pci_init.h |   4 +-
 drivers/bus/pci/linux/pci_vfio.c | 195 +++
 drivers/bus/pci/private.h|   9 ++
 lib/eal/include/rte_vfio.h   |   1 -
 5 files changed, 158 insertions(+), 55 deletions(-)

diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
index 4c2c5ba382..04e21ae20f 100644
--- a/drivers/bus/pci/linux/pci.c
+++ b/drivers/bus/pci/linux/pci.c
@@ -645,7 +645,7 @@ int rte_pci_read_config(const struct rte_pci_device *device,
return pci_uio_read_config(intr_handle, buf, len, offset);
 #ifdef VFIO_PRESENT
case RTE_PCI_KDRV_VFIO:
-   return pci_vfio_read_config(intr_handle, buf, len, offset);
+   return pci_vfio_read_config(device, buf, len, offset);
 #endif
default:
rte_pci_device_name(&device->addr, devname,
@@ -669,7 +669,7 @@ int rte_pci_write_config(const struct rte_pci_device 
*device,
return pci_uio_write_config(intr_handle, buf, len, offset);
 #ifdef VFIO_PRESENT
case RTE_PCI_KDRV_VFIO:
-   return pci_vfio_write_config(intr_handle, buf, len, offset);
+   return pci_vfio_write_config(device, buf, len, offset);
 #endif
default:
rte_pci_device_name(&device->addr, devname,
diff --git a/drivers/bus/pci/linux/pci_init.h b/drivers/bus/pci/linux/pci_init.h
index dcea726186..9f6659ba6e 100644
--- a/drivers/bus/pci/linux/pci_init.h
+++ b/drivers/bus/pci/linux/pci_init.h
@@ -66,9 +66,9 @@ int pci_uio_ioport_unmap(struct rte_pci_ioport *p);
 #endif
 
 /* access config space */
-int pci_vfio_read_config(const struct rte_intr_handle *intr_handle,
+int pci_vfio_read_config(const struct rte_pci_device *dev,
 void *buf, size_t len, off_t offs);
-int pci_vfio_write_config(const struct rte_intr_handle *intr_handle,
+int pci_vfio_write_config(const struct rte_pci_device *dev,
  const void *buf, size_t len, off_t offs);
 
 int pci_vfio_ioport_map(struct rte_pci_device *dev, int bar,
diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c
index fab3483d9f..1748ad2ae0 100644
--- a/drivers/bus/pci/linux/pci_vfio.c
+++ b/drivers/bus/pci/linux/pci_vfio.c
@@ -43,45 +43,82 @@ static struct rte_tailq_elem rte_vfio_tailq = {
 };
 EAL_REGISTER_TAILQ(rte_vfio_tailq)
 
+static int
+pci_vfio_get_region(const struct rte_pci_device *dev, int index,
+   uint64_t *size, uint64_t *offset)
+{
+   const struct rte_pci_device_internal *pdev =
+   RTE_PCI_DEVICE_INTERNAL_CONST(dev);
+
+   if (index >= VFIO_PCI_NUM_REGIONS || index >= RTE_MAX_PCI_REGIONS)
+   return -1;
+
+   if (pdev->region[index].size == 0 && pdev->region[index].offset == 0)
+   return -1;
+
+   *size   = pdev->region[index].size;
+   *offset = pdev->region[index].offset;
+
+   return 0;
+}
+
 int
-pci_vfio_read_config(const struct rte_intr_handle *intr_handle,
+pci_vfio_read_config(const struct rte_pci_device *dev,
void *buf, size_t len, off_t offs)
 {
-   int vfio_dev_fd = rte_intr_dev_fd_get(intr_handle);
+   uint64_t size, offset;
+   int fd;
 
-   if (vfio_dev_fd < 0)
+   fd = rte_intr_dev_fd_get(dev->intr_handle);
+
+   if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX,
+   &size, &offset) != 0)
+   return -1;
+
+   if ((uint64_t)len + offs > size)
return -1;
 
-   return pread64(vfio_dev_fd, buf, len,
-  VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs);
+   return pread64(fd, buf, len, offset + offs);
 }
 
 int
-pci_vfio_write_config(const struct rte_intr_handle *intr_handle,
+pci_vfio_write_config(const struct rte_pci_device *dev,
const void *buf, size_t len, off_t offs)
 {
-   int vfio_dev_fd = rte_intr_dev_fd_get(intr_handle);
+   uint64_t size, offset;
+   int fd;
 
-   if (vfio_dev_fd < 0)
+   fd = rte_intr_dev_fd_get(dev->intr_handle);
+
+   if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX,
+   &size, &offset) != 0)
return -1;
 
-   return pwrite64(vfio_dev_fd, buf, len,
-  VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs);
+   if ((uint64_t)len + offs > size)
+   return -1;
+
+   return pwrite64(fd, buf, len, offset + offs);
 }
 
 /* get PCI BAR number where MSI-X interrupts are */
 static int
-pci_vfio_get_

[RFC 3/4] bus/pci: introduce helper for MMIO read and write

2023-04-17 Thread Chenbo Xia
The MMIO regions may not be mmap-able for VFIO-PCI devices.
In this case, the driver should explicitly do read and write
to access these regions.

Signed-off-by: Chenbo Xia 
---
 drivers/bus/pci/bsd/pci.c| 22 +++
 drivers/bus/pci/linux/pci.c  | 46 ++
 drivers/bus/pci/linux/pci_init.h | 10 +++
 drivers/bus/pci/linux/pci_uio.c  | 22 +++
 drivers/bus/pci/linux/pci_vfio.c | 36 
 drivers/bus/pci/rte_bus_pci.h| 48 
 drivers/bus/pci/version.map  |  3 ++
 7 files changed, 187 insertions(+)

diff --git a/drivers/bus/pci/bsd/pci.c b/drivers/bus/pci/bsd/pci.c
index a747eca58c..27f12590d4 100644
--- a/drivers/bus/pci/bsd/pci.c
+++ b/drivers/bus/pci/bsd/pci.c
@@ -489,6 +489,28 @@ int rte_pci_write_config(const struct rte_pci_device *dev,
return -1;
 }
 
+/* Read PCI MMIO space. */
+int rte_pci_mmio_read(const struct rte_pci_device *dev, int bar,
+ void *buf, size_t len, off_t offset)
+{
+   if (bar >= PCI_MAX_RESOURCE || dev->mem_resource[bar].addr == NULL ||
+   (uint64_t)offset + len > dev->mem_resource[bar].len)
+   return -1;
+   memcpy(buf, (uint8_t *)dev->mem_resource[bar].addr + offset, len);
+   return len;
+}
+
+/* Write PCI MMIO space. */
+int rte_pci_mmio_write(const struct rte_pci_device *dev, int bar,
+  const void *buf, size_t len, off_t offset)
+{
+   if (bar >= PCI_MAX_RESOURCE || dev->mem_resource[bar].addr == NULL ||
+   (uint64_t)offset + len > dev->mem_resource[bar].len)
+   return -1;
+   memcpy((uint8_t *)dev->mem_resource[bar].addr + offset, buf, len);
+   return len;
+}
+
 int
 rte_pci_ioport_map(struct rte_pci_device *dev, int bar,
struct rte_pci_ioport *p)
diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
index 04e21ae20f..3d237398d9 100644
--- a/drivers/bus/pci/linux/pci.c
+++ b/drivers/bus/pci/linux/pci.c
@@ -680,6 +680,52 @@ int rte_pci_write_config(const struct rte_pci_device 
*device,
}
 }
 
+/* Read PCI MMIO space. */
+int rte_pci_mmio_read(const struct rte_pci_device *device, int bar,
+   void *buf, size_t len, off_t offset)
+{
+   char devname[RTE_DEV_NAME_MAX_LEN] = "";
+
+   switch (device->kdrv) {
+   case RTE_PCI_KDRV_IGB_UIO:
+   case RTE_PCI_KDRV_UIO_GENERIC:
+   return pci_uio_mmio_read(device, bar, buf, len, offset);
+#ifdef VFIO_PRESENT
+   case RTE_PCI_KDRV_VFIO:
+   return pci_vfio_mmio_read(device, bar, buf, len, offset);
+#endif
+   default:
+   rte_pci_device_name(&device->addr, devname,
+   RTE_DEV_NAME_MAX_LEN);
+   RTE_LOG(ERR, EAL,
+   "Unknown driver type for %s\n", devname);
+   return -1;
+   }
+}
+
+/* Write PCI MMIO space. */
+int rte_pci_mmio_write(const struct rte_pci_device *device, int bar,
+   const void *buf, size_t len, off_t offset)
+{
+   char devname[RTE_DEV_NAME_MAX_LEN] = "";
+
+   switch (device->kdrv) {
+   case RTE_PCI_KDRV_IGB_UIO:
+   case RTE_PCI_KDRV_UIO_GENERIC:
+   return pci_uio_mmio_write(device, bar, buf, len, offset);
+#ifdef VFIO_PRESENT
+   case RTE_PCI_KDRV_VFIO:
+   return pci_vfio_mmio_write(device, bar, buf, len, offset);
+#endif
+   default:
+   rte_pci_device_name(&device->addr, devname,
+   RTE_DEV_NAME_MAX_LEN);
+   RTE_LOG(ERR, EAL,
+   "Unknown driver type for %s\n", devname);
+   return -1;
+   }
+}
+
 int
 rte_pci_ioport_map(struct rte_pci_device *dev, int bar,
struct rte_pci_ioport *p)
diff --git a/drivers/bus/pci/linux/pci_init.h b/drivers/bus/pci/linux/pci_init.h
index 9f6659ba6e..d842809ccd 100644
--- a/drivers/bus/pci/linux/pci_init.h
+++ b/drivers/bus/pci/linux/pci_init.h
@@ -37,6 +37,11 @@ int pci_uio_read_config(const struct rte_intr_handle 
*intr_handle,
 int pci_uio_write_config(const struct rte_intr_handle *intr_handle,
 const void *buf, size_t len, off_t offs);
 
+int pci_uio_mmio_read(const struct rte_pci_device *dev, int bar,
+   void *buf, size_t len, off_t offset);
+int pci_uio_mmio_write(const struct rte_pci_device *dev, int bar,
+   const void *buf, size_t len, off_t offset);
+
 int pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
   struct rte_pci_ioport *p);
 void pci_uio_ioport_read(struct rte_pci_ioport *p,
@@ -71,6 +76,11 @@ int pci_vfio_read_config(const struct rte_pci_device *dev,
 int pci_vfio_write_config(const struct rte_pci_device *dev,
  const void *buf, si

[RFC 4/4] bus/pci: add VFIO sparse mmap support

2023-04-17 Thread Chenbo Xia
This patch adds sparse mmap support in PCI bus. Sparse mmap is a
capability defined in VFIO which allows multiple mmap areas in one
VFIO region.

Signed-off-by: Chenbo Xia 
---
 drivers/baseband/acc/rte_acc100_pmd.c |   6 +-
 drivers/baseband/acc/rte_vrb_pmd.c|   6 +-
 .../fpga_5gnr_fec/rte_fpga_5gnr_fec.c |   6 +-
 drivers/baseband/fpga_lte_fec/fpga_lte_fec.c  |   6 +-
 drivers/bus/pci/bsd/pci.c |  20 +-
 drivers/bus/pci/bus_pci_driver.h  |  24 +-
 drivers/bus/pci/linux/pci.c   |  13 +-
 drivers/bus/pci/linux/pci_uio.c   |  24 +-
 drivers/bus/pci/linux/pci_vfio.c  | 214 +++---
 drivers/bus/pci/pci_common.c  |  45 ++--
 drivers/bus/pci/pci_common_uio.c  |  12 +-
 drivers/bus/pci/private.h |   2 +
 drivers/common/cnxk/roc_dev.c |   4 +-
 drivers/common/cnxk/roc_dpi.c |   2 +-
 drivers/common/cnxk/roc_ml.c  |  22 +-
 drivers/common/qat/dev/qat_dev_gen1.c |   2 +-
 drivers/common/qat/dev/qat_dev_gen4.c |   4 +-
 drivers/common/sfc_efx/sfc_efx.c  |   2 +-
 drivers/compress/octeontx/otx_zip.c   |   4 +-
 drivers/crypto/ccp/ccp_dev.c  |   4 +-
 drivers/crypto/cnxk/cnxk_cryptodev_ops.c  |   2 +-
 drivers/crypto/nitrox/nitrox_device.c |   4 +-
 drivers/crypto/octeontx/otx_cryptodev_ops.c   |   6 +-
 drivers/crypto/virtio/virtio_pci.c|   6 +-
 drivers/dma/cnxk/cnxk_dmadev.c|   2 +-
 drivers/dma/hisilicon/hisi_dmadev.c   |   6 +-
 drivers/dma/idxd/idxd_pci.c   |   4 +-
 drivers/dma/ioat/ioat_dmadev.c|   2 +-
 drivers/event/dlb2/pf/dlb2_main.c |  16 +-
 drivers/event/octeontx/ssovf_probe.c  |  38 ++--
 drivers/event/octeontx/timvf_probe.c  |  18 +-
 drivers/event/skeleton/skeleton_eventdev.c|   2 +-
 drivers/mempool/octeontx/octeontx_fpavf.c |   6 +-
 drivers/net/ark/ark_ethdev.c  |   4 +-
 drivers/net/atlantic/atl_ethdev.c |   2 +-
 drivers/net/avp/avp_ethdev.c  |  20 +-
 drivers/net/axgbe/axgbe_ethdev.c  |   4 +-
 drivers/net/bnx2x/bnx2x_ethdev.c  |   6 +-
 drivers/net/bnxt/bnxt_ethdev.c|   8 +-
 drivers/net/cpfl/cpfl_ethdev.c|   4 +-
 drivers/net/cxgbe/cxgbe_ethdev.c  |   2 +-
 drivers/net/cxgbe/cxgbe_main.c|   2 +-
 drivers/net/cxgbe/cxgbevf_ethdev.c|   2 +-
 drivers/net/cxgbe/cxgbevf_main.c  |   2 +-
 drivers/net/e1000/em_ethdev.c |   4 +-
 drivers/net/e1000/igb_ethdev.c|   4 +-
 drivers/net/ena/ena_ethdev.c  |   4 +-
 drivers/net/enetc/enetc_ethdev.c  |   2 +-
 drivers/net/enic/enic_main.c  |   4 +-
 drivers/net/fm10k/fm10k_ethdev.c  |   2 +-
 drivers/net/gve/gve_ethdev.c  |   4 +-
 drivers/net/hinic/base/hinic_pmd_hwif.c   |  14 +-
 drivers/net/hns3/hns3_ethdev.c|   2 +-
 drivers/net/hns3/hns3_ethdev_vf.c |   2 +-
 drivers/net/hns3/hns3_rxtx.c  |   4 +-
 drivers/net/i40e/i40e_ethdev.c|   2 +-
 drivers/net/iavf/iavf_ethdev.c|   2 +-
 drivers/net/ice/ice_dcf.c |   2 +-
 drivers/net/ice/ice_ethdev.c  |   2 +-
 drivers/net/idpf/idpf_ethdev.c|   4 +-
 drivers/net/igc/igc_ethdev.c  |   2 +-
 drivers/net/ionic/ionic_dev_pci.c |   2 +-
 drivers/net/ixgbe/ixgbe_ethdev.c  |   4 +-
 drivers/net/liquidio/lio_ethdev.c |   4 +-
 drivers/net/nfp/nfp_ethdev.c  |   2 +-
 drivers/net/nfp/nfp_ethdev_vf.c   |   6 +-
 drivers/net/nfp/nfpcore/nfp_cpp_pcie_ops.c|   4 +-
 drivers/net/ngbe/ngbe_ethdev.c|   2 +-
 drivers/net/octeon_ep/otx_ep_ethdev.c |   2 +-
 drivers/net/octeontx/base/octeontx_pkivf.c|   6 +-
 drivers/net/octeontx/base/octeontx_pkovf.c|  12 +-
 drivers/net/qede/qede_main.c  |   6 +-
 drivers/net/sfc/sfc.c |   2 +-
 drivers/net/thunderx/nicvf_ethdev.c   |   2 +-
 drivers/net/txgbe/txgbe_ethdev.c  |   2 +-
 drivers/net/txgbe/txgbe_ethdev_vf.c   |   2 +-
 drivers/net/virtio/virtio_pci.c   |   6 +-
 drivers/net/vmxnet3/vmxnet3_ethdev.c  |   4 +-
 drivers/raw/cnxk_bphy/cnxk_bphy.c |  10 +-
 drivers/raw/cnxk_bphy/cnxk_bphy_cgx.c |   6 +-
 drivers/raw/ifpga/afu_pmd_n3000.c |   4 +-
 drivers/raw/ifpga/ifpga_rawdev.c  |   6 +-
 drivers/raw/ntb/ntb_hw_intel.c|   8 +-
 drivers/vdpa/ifc/ifcvf_vdpa.c |   6 +-
 drivers/vdpa/sfc/sfc_vdpa_hw.c|   2 +-
 drivers/vdpa/sfc/sfc_vdpa_ops.c

[PATCH] bus/pci: fix missing MMIO APIs in Windows

2023-06-08 Thread Chenbo Xia
MMIO read and write APIs were defined in PCI bus. But the corresponding
implementations are not done in windows. This patch fixes this.

Bugzilla ID: 1245
Fixes: 095cf6e68b28 ("bus/pci: introduce MMIO read/write")
Cc: sta...@dpdk.org

Signed-off-by: Chenbo Xia 
---
 drivers/bus/pci/windows/pci.c | 24 
 1 file changed, 24 insertions(+)

diff --git a/drivers/bus/pci/windows/pci.c b/drivers/bus/pci/windows/pci.c
index df5221d913..45a12bcb52 100644
--- a/drivers/bus/pci/windows/pci.c
+++ b/drivers/bus/pci/windows/pci.c
@@ -88,6 +88,30 @@ rte_pci_write_config(const struct rte_pci_device *dev 
__rte_unused,
return 0;
 }
 
+/* Read PCI MMIO space. */
+int
+rte_pci_mmio_read(const struct rte_pci_device *dev, int bar,
+ void *buf, size_t len, off_t offset)
+{
+   if (bar >= PCI_MAX_RESOURCE || dev->mem_resource[bar].addr == NULL ||
+   (uint64_t)offset + len > dev->mem_resource[bar].len)
+   return -1;
+   memcpy(buf, (uint8_t *)dev->mem_resource[bar].addr + offset, len);
+   return len;
+}
+
+/* Write PCI MMIO space. */
+int
+rte_pci_mmio_write(const struct rte_pci_device *dev, int bar,
+  const void *buf, size_t len, off_t offset)
+{
+   if (bar >= PCI_MAX_RESOURCE || dev->mem_resource[bar].addr == NULL ||
+   (uint64_t)offset + len > dev->mem_resource[bar].len)
+   return -1;
+   memcpy((uint8_t *)dev->mem_resource[bar].addr + offset, buf, len);
+   return len;
+}
+
 enum rte_iova_mode
 pci_device_iova_mode(const struct rte_pci_driver *pdrv __rte_unused,
const struct rte_pci_device *pdev __rte_unused)
-- 
2.17.1



[PATCH] maintainers: update for PCI bus driver and library

2023-06-13 Thread Chenbo Xia
Add myself as maintainer of PCI bus driver and co-maintainer of PCI
library.

Signed-off-by: Chenbo Xia 
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 906b31f97c..fea84b8cb9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -586,6 +586,7 @@ F: drivers/bus/dpaa/
 F: drivers/bus/fslmc/
 
 PCI bus driver
+M: Chenbo Xia 
 F: drivers/bus/pci/
 
 Platform bus driver
@@ -1667,6 +1668,7 @@ F: app/test/test_rcu*
 F: doc/guides/prog_guide/rcu_lib.rst
 
 PCI
+M: Chenbo Xia 
 M: Gaetan Rivet 
 F: lib/pci/
 
-- 
2.17.1



[dpdk-dev] [PATCH] vhost: promote some APIs to stable

2021-09-06 Thread Chenbo Xia
As reported by symbol bot, APIs listed in this patch have been
experimental for more than two years. This patch promotes these
18 APIs to stable.

Signed-off-by: Chenbo Xia 
---
 lib/vhost/rte_vhost.h| 13 -
 lib/vhost/rte_vhost_crypto.h |  5 -
 lib/vhost/version.map| 36 ++--
 3 files changed, 18 insertions(+), 36 deletions(-)

diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h
index 8d875e9322..fd372d5259 100644
--- a/lib/vhost/rte_vhost.h
+++ b/lib/vhost/rte_vhost.h
@@ -342,7 +342,6 @@ rte_vhost_gpa_to_vva(struct rte_vhost_memory *mem, uint64_t 
gpa)
  * @return
  *  the host virtual address on success, 0 on failure
  */
-__rte_experimental
 static __rte_always_inline uint64_t
 rte_vhost_va_from_guest_pa(struct rte_vhost_memory *mem,
   uint64_t gpa, uint64_t *len)
@@ -522,7 +521,6 @@ int rte_vhost_driver_get_features(const char *path, 
uint64_t *features);
  * @return
  *  0 on success, -1 on failure
  */
-__rte_experimental
 int
 rte_vhost_driver_set_protocol_features(const char *path,
uint64_t protocol_features);
@@ -537,7 +535,6 @@ rte_vhost_driver_set_protocol_features(const char *path,
  * @return
  *  0 on success, -1 on failure
  */
-__rte_experimental
 int
 rte_vhost_driver_get_protocol_features(const char *path,
uint64_t *protocol_features);
@@ -552,7 +549,6 @@ rte_vhost_driver_get_protocol_features(const char *path,
  * @return
  *  0 on success, -1 on failure
  */
-__rte_experimental
 int
 rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num);
 
@@ -768,7 +764,6 @@ int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
  * @return
  *  0 on success, -1 on failure
  */
-__rte_experimental
 int
 rte_vhost_get_vhost_ring_inflight(int vid, uint16_t vring_idx,
struct rte_vhost_ring_inflight *vring);
@@ -788,7 +783,6 @@ rte_vhost_get_vhost_ring_inflight(int vid, uint16_t 
vring_idx,
  * @return
  *  0 on success, -1 on failure
  */
-__rte_experimental
 int
 rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx,
uint16_t idx);
@@ -811,7 +805,6 @@ rte_vhost_set_inflight_desc_split(int vid, uint16_t 
vring_idx,
  * @return
  *  0 on success, -1 on failure
  */
-__rte_experimental
 int
 rte_vhost_set_inflight_desc_packed(int vid, uint16_t vring_idx,
uint16_t head, uint16_t last, uint16_t *inflight_entry);
@@ -828,7 +821,6 @@ rte_vhost_set_inflight_desc_packed(int vid, uint16_t 
vring_idx,
  * @return
  *  0 on success, -1 on failure
  */
-__rte_experimental
 int
 rte_vhost_set_last_inflight_io_split(int vid,
uint16_t vring_idx, uint16_t idx);
@@ -848,7 +840,6 @@ rte_vhost_set_last_inflight_io_split(int vid,
  * @return
  *  0 on success, -1 on failure
  */
-__rte_experimental
 int
 rte_vhost_set_last_inflight_io_packed(int vid,
uint16_t vring_idx, uint16_t head);
@@ -867,7 +858,6 @@ rte_vhost_set_last_inflight_io_packed(int vid,
  * @return
  *  0 on success, -1 on failure
  */
-__rte_experimental
 int
 rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx,
uint16_t last_used_idx, uint16_t idx);
@@ -884,7 +874,6 @@ rte_vhost_clr_inflight_desc_split(int vid, uint16_t 
vring_idx,
  * @return
  *  0 on success, -1 on failure
  */
-__rte_experimental
 int
 rte_vhost_clr_inflight_desc_packed(int vid, uint16_t vring_idx,
uint16_t head);
@@ -965,7 +954,6 @@ rte_vhost_get_vring_base(int vid, uint16_t queue_id,
  * @return
  *  0 on success, -1 on failure
  */
-__rte_experimental
 int
 rte_vhost_get_vring_base_from_inflight(int vid,
uint16_t queue_id, uint16_t *last_avail_idx, uint16_t *last_used_idx);
@@ -1000,7 +988,6 @@ rte_vhost_set_vring_base(int vid, uint16_t queue_id,
  * @return
  *  0 on success, -1 on failure
  */
-__rte_experimental
 int
 rte_vhost_extern_callback_register(int vid,
struct rte_vhost_user_extern_ops const * const ops, void *ctx);
diff --git a/lib/vhost/rte_vhost_crypto.h b/lib/vhost/rte_vhost_crypto.h
index 8531757285..f54d731139 100644
--- a/lib/vhost/rte_vhost_crypto.h
+++ b/lib/vhost/rte_vhost_crypto.h
@@ -58,7 +58,6 @@ rte_vhost_crypto_driver_start(const char *path);
  *  0 if the Vhost Crypto Instance is created successfully.
  *  Negative integer if otherwise
  */
-__rte_experimental
 int
 rte_vhost_crypto_create(int vid, uint8_t cryptodev_id,
struct rte_mempool *sess_pool,
@@ -74,7 +73,6 @@ rte_vhost_crypto_create(int vid, uint8_t cryptodev_id,
  *  0 if the Vhost Crypto Instance is created successfully.
  *  Negative integer if otherwise.
  */
-__rte_experimental
 int
 rte_vhost_crypto_free(int vid);
 
@@ -89,7 +87,6 @@ rte_vhost_crypto_free(int vid);
  *  0 if completed successfully.
  *  Negative integer if otherwise.
  */
-__rte_experimental
 int
 rte_vhost_crypto_set_zero_copy(int vid, enum rte_vhost_crypto_zero_copy 
option);
 
@@ -110,7 +107,6 @@ rte_vhost_crypto_set_zero_copy(int vid, enum

[dpdk-dev] [PATCH] vhost: fix wrong IOTLB initialization

2021-05-13 Thread Chenbo Xia
This patch fixes an issue of application crash because of vhost iotlb
not initialized when virtio has multiqueue enabled.

iotlb messages will be sent when some queues are not enabled. If we
initialize iotlb in vhost_user_set_vring_num, it could happen that
iotlb update comes when iotlb pool of disabled queues are not
initialized.

Fixes: 968bbc7e2e50 ("vhost: avoid IOTLB mempool allocation while IOMMU 
disabled")

Signed-off-by: Chenbo Xia 
---
 lib/vhost/vhost_user.c | 13 +
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 611ff209e3..ae4df8eb69 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -311,6 +311,7 @@ vhost_user_set_features(struct virtio_net **pdev, struct 
VhostUserMsg *msg,
uint64_t features = msg->payload.u64;
uint64_t vhost_features = 0;
struct rte_vdpa_device *vdpa_dev;
+   uint32_t i;
 
if (validate_msg_fds(msg, 0) != 0)
return RTE_VHOST_MSG_RESULT_ERR;
@@ -389,6 +390,14 @@ vhost_user_set_features(struct virtio_net **pdev, struct 
VhostUserMsg *msg,
vdpa_dev->ops->set_features(dev->vid);
 
dev->flags &= ~VIRTIO_DEV_FEATURES_FAILED;
+
+   if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) {
+   for (i = 0; i < dev->nr_vring; i++) {
+   if (vhost_user_iotlb_init(dev, i))
+   return RTE_VHOST_MSG_RESULT_ERR;
+   }
+   }
+
return RTE_VHOST_MSG_RESULT_OK;
 }
 
@@ -469,10 +478,6 @@ vhost_user_set_vring_num(struct virtio_net **pdev,
return RTE_VHOST_MSG_RESULT_ERR;
}
 
-   if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) {
-   if (vhost_user_iotlb_init(dev, msg->payload.state.index))
-   return RTE_VHOST_MSG_RESULT_ERR;
-   }
return RTE_VHOST_MSG_RESULT_OK;
 }
 
-- 
2.17.1



[dpdk-dev] [RFC v3 0/6] Add mdev (Mediated device) support in DPDK

2021-05-31 Thread Chenbo Xia
Hi everyone,

This is a draft implementation of the mdev (Mediated device [1])
support in DPDK PCI bus driver. Mdev is a way to virtualize devices
in Linux kernel. Based on the device-api (mdev_type/device_api),
there could be different types of mdev devices (e.g. vfio-pci).
In this patchset, the PCI bus driver is extended to support scanning
and probing the mdev devices whose device-api is "vfio-pci".

 +-+
 | PCI bus |
 +++
  |
 ++---+---++
 ||   ||
  Physical PCI devices ...   Mediated PCI devices ...

The first four patches in this patchset are mainly preparation of mdev
bus support. The left two patches are the key implementation of mdev bus.

The implementation of mdev bus in DPDK has several options:

1: Embed mdev bus in current pci bus

   This patchset takes this option for an example. Mdev has several
   device types: pci/platform/amba/ccw/ap. DPDK currently only cares
   pci devices in all mdev device types so we could embed the mdev bus
   into current pci bus. Then pci bus with mdev support will scan/plug/
   unplug/.. not only normal pci devices but also mediated pci devices.

2: A new mdev bus that scans mediated pci devices and probes mdev driver to
   plug-in pci devices to pci bus

   If we took this option, a new mdev bus will be implemented to scan
   mediated pci devices and a new mdev driver for pci devices will be
   implemented in pci bus to plug-in mediated pci devices to pci bus.

   Our RFC v1 takes this option:
   
http://patchwork.dpdk.org/project/dpdk/cover/20190403071844.21126-1-tiwei@intel.com/

   Note that: for either option 1 or 2, device drivers do not know the
   implementation difference but only use structs/functions exposed by
   pci bus. Mediated pci devices are different from normal pci devices
   on: 1. Mediated pci devices use UUID as address but normal ones use BDF.
   2. Mediated pci devices may have some capabilities that normal pci
   devices do not have. For example, mediated pci devices could have
   regions that have sparse mmap capability, which allows a region to have
   multiple mmap areas. Another example is mediated pci devices may have
   regions/part of regions not mmaped but need to access them. Above
   difference will change the current ABI (i.e., struct rte_pci_device).
   Please check 5th and 6th patch for details.

3. A brand new mdev bus that does everything

   This option will implement a new and standalone mdev bus. This option
   does not need any changes in current pci bus but only needs some shared
   code (linux vfio part) in pci bus. Drivers of devices that support mdev
   will register itself as a mdev driver and do not rely on pci bus anymore.
   This option, IMHO, will make the code clean. The only potential problem
   may be code duplication, which could be solved by making code of linux
   vfio part of pci bus common and shared.

Your comments on above three options are welcomed and appreciated!

Thanks!
Chenbo


RFC v3:
- Add sparse mmap support
- Minor fixes and improvements

RFC v2:
- Let PCI bus scan mediated PCI devices directly
- Address Keith's comments
- Merge below patch into this series (David)
   http://patches.dpdk.org/patch/55927/
- Add internal representation of PCI device (David)
- Minor fixes and improvements

[1] 
https://github.com/torvalds/linux/blob/master/Documentation/driver-api/vfio-mediated-device.rst

Chenbo Xia (1):
  bus/pci: add sparse mmap support for mediated PCI devices

Tiwei Bie (5):
  bus/pci: introduce an internal representation of PCI device
  bus/pci: avoid depending on private value in kernel source
  bus/pci: introduce helper for MMIO read and write
  eal: add a helper for reading string from sysfs
  bus/pci: add mdev support

 drivers/bus/pci/bsd/pci.c |  36 +-
 drivers/bus/pci/linux/pci.c   | 107 -
 drivers/bus/pci/linux/pci_init.h  |  29 +-
 drivers/bus/pci/linux/pci_uio.c   |  22 +
 drivers/bus/pci/linux/pci_vfio.c  | 586 ++
 drivers/bus/pci/linux/pci_vfio_mdev.c | 277 
 drivers/bus/pci/meson.build   |   1 +
 drivers/bus/pci/pci_common.c  |  86 ++--
 drivers/bus/pci/pci_params.c  |  36 +-
 drivers/bus/pci/private.h |  40 ++
 drivers/bus/pci/rte_bus_pci.h |  83 +++-
 drivers/bus/pci/version.map   |   4 +
 lib/eal/common/eal_filesystem.h   |  10 +
 lib/eal/freebsd/eal.c |  22 +
 lib/eal/linux/eal.c   |  39 +-
 lib/eal/version.map   |   3 +
 16 files changed, 1224 insertions(+), 157 deletions(-)
 create mode 100644 drivers/bus/pci/linux/pci_vfio_mdev.c

-- 
2.17.1



[dpdk-dev] [RFC v3 1/6] bus/pci: introduce an internal representation of PCI device

2021-05-31 Thread Chenbo Xia
From: Tiwei Bie 

This patch introduces an internal representation of the PCI device
which will be used to store the internal information that don't have
to be exposed, e.g. the VFIO region sizes/offsets.

In this patch, the internal structure is simply a wrapper of the
rte_pci_device structure. More fields will be added in the coming
patches.

Suggested-by: David Marchand 
Signed-off-by: Tiwei Bie 
Signed-off-by: Chenbo Xia 
---
 drivers/bus/pci/bsd/pci.c| 14 +-
 drivers/bus/pci/linux/pci.c  | 27 ---
 drivers/bus/pci/pci_common.c |  2 +-
 drivers/bus/pci/private.h| 12 
 4 files changed, 38 insertions(+), 17 deletions(-)

diff --git a/drivers/bus/pci/bsd/pci.c b/drivers/bus/pci/bsd/pci.c
index 4b8a208781..20ce979f60 100644
--- a/drivers/bus/pci/bsd/pci.c
+++ b/drivers/bus/pci/bsd/pci.c
@@ -212,16 +212,20 @@ pci_uio_map_resource_by_index(struct rte_pci_device *dev, 
int res_idx,
 static int
 pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
 {
+   struct rte_pci_device_internal *pdev;
struct rte_pci_device *dev;
struct pci_bar_io bar;
unsigned i, max;
 
-   dev = malloc(sizeof(*dev));
-   if (dev == NULL) {
+   pdev = malloc(sizeof(*pdev));
+   if (pdev == NULL) {
+   RTE_LOG(ERR, EAL, "Cannot allocate memory for internal pci 
device\n");
return -1;
}
 
-   memset(dev, 0, sizeof(*dev));
+   memset(pdev, 0, sizeof(*pdev));
+
+   dev = &pdev->device;
dev->device.bus = &rte_pci_bus.bus;
 
dev->addr.domain = conf->pc_sel.pc_domain;
@@ -307,7 +311,7 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
memmove(dev2->mem_resource,
dev->mem_resource,
sizeof(dev->mem_resource));
-   free(dev);
+   free(pdev);
}
return 0;
}
@@ -317,7 +321,7 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
return 0;
 
 skipdev:
-   free(dev);
+   free(pdev);
return 0;
 }
 
diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
index 0dc99e9cb2..6dbba10657 100644
--- a/drivers/bus/pci/linux/pci.c
+++ b/drivers/bus/pci/linux/pci.c
@@ -218,22 +218,27 @@ pci_scan_one(const char *dirname, const struct 
rte_pci_addr *addr)
 {
char filename[PATH_MAX];
unsigned long tmp;
+   struct rte_pci_device_internal *pdev;
struct rte_pci_device *dev;
char driver[PATH_MAX];
int ret;
 
-   dev = malloc(sizeof(*dev));
-   if (dev == NULL)
+   pdev = malloc(sizeof(*pdev));
+   if (pdev == NULL) {
+   RTE_LOG(ERR, EAL, "Cannot allocate memory for internal pci 
device\n");
return -1;
+   }
+
+   memset(pdev, 0, sizeof(*pdev));
 
-   memset(dev, 0, sizeof(*dev));
+   dev = &pdev->device;
dev->device.bus = &rte_pci_bus.bus;
dev->addr = *addr;
 
/* get vendor id */
snprintf(filename, sizeof(filename), "%s/vendor", dirname);
if (eal_parse_sysfs_value(filename, &tmp) < 0) {
-   free(dev);
+   free(pdev);
return -1;
}
dev->id.vendor_id = (uint16_t)tmp;
@@ -241,7 +246,7 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr 
*addr)
/* get device id */
snprintf(filename, sizeof(filename), "%s/device", dirname);
if (eal_parse_sysfs_value(filename, &tmp) < 0) {
-   free(dev);
+   free(pdev);
return -1;
}
dev->id.device_id = (uint16_t)tmp;
@@ -250,7 +255,7 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr 
*addr)
snprintf(filename, sizeof(filename), "%s/subsystem_vendor",
 dirname);
if (eal_parse_sysfs_value(filename, &tmp) < 0) {
-   free(dev);
+   free(pdev);
return -1;
}
dev->id.subsystem_vendor_id = (uint16_t)tmp;
@@ -259,7 +264,7 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr 
*addr)
snprintf(filename, sizeof(filename), "%s/subsystem_device",
 dirname);
if (eal_parse_sysfs_value(filename, &tmp) < 0) {
-   free(dev);
+   free(pdev);
return -1;
}
dev->id.subsystem_device_id = (uint16_t)tmp;
@@ -268,7 +273,7 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr 
*addr)
snprintf(filename, sizeof(filename), "%s/class",
 dirname);
if (eal_parse_sysfs_value(filename, &tmp) < 0) {
-   free(dev);
+   free(pdev);
  

[dpdk-dev] [RFC v3 2/6] bus/pci: avoid depending on private value in kernel source

2021-05-31 Thread Chenbo Xia
From: Tiwei Bie 

The value 40 used in VFIO_GET_REGION_ADDR() is a private value
(VFIO_PCI_OFFSET_SHIFT) defined in Linux kernel source [1]. It
is not part of VFIO API, and we should not depend on it.

[1] 
https://github.com/torvalds/linux/blob/v5.12/drivers/vfio/pci/vfio_pci_private.h

Signed-off-by: Tiwei Bie 
---
 drivers/bus/pci/linux/pci.c  |   4 +-
 drivers/bus/pci/linux/pci_init.h |   4 +-
 drivers/bus/pci/linux/pci_vfio.c | 176 ---
 drivers/bus/pci/private.h|   9 ++
 4 files changed, 153 insertions(+), 40 deletions(-)

diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
index 6dbba10657..8f1fddbf20 100644
--- a/drivers/bus/pci/linux/pci.c
+++ b/drivers/bus/pci/linux/pci.c
@@ -647,7 +647,7 @@ int rte_pci_read_config(const struct rte_pci_device *device,
return pci_uio_read_config(intr_handle, buf, len, offset);
 #ifdef VFIO_PRESENT
case RTE_PCI_KDRV_VFIO:
-   return pci_vfio_read_config(intr_handle, buf, len, offset);
+   return pci_vfio_read_config(device, buf, len, offset);
 #endif
default:
rte_pci_device_name(&device->addr, devname,
@@ -671,7 +671,7 @@ int rte_pci_write_config(const struct rte_pci_device 
*device,
return pci_uio_write_config(intr_handle, buf, len, offset);
 #ifdef VFIO_PRESENT
case RTE_PCI_KDRV_VFIO:
-   return pci_vfio_write_config(intr_handle, buf, len, offset);
+   return pci_vfio_write_config(device, buf, len, offset);
 #endif
default:
rte_pci_device_name(&device->addr, devname,
diff --git a/drivers/bus/pci/linux/pci_init.h b/drivers/bus/pci/linux/pci_init.h
index dcea726186..9f6659ba6e 100644
--- a/drivers/bus/pci/linux/pci_init.h
+++ b/drivers/bus/pci/linux/pci_init.h
@@ -66,9 +66,9 @@ int pci_uio_ioport_unmap(struct rte_pci_ioport *p);
 #endif
 
 /* access config space */
-int pci_vfio_read_config(const struct rte_intr_handle *intr_handle,
+int pci_vfio_read_config(const struct rte_pci_device *dev,
 void *buf, size_t len, off_t offs);
-int pci_vfio_write_config(const struct rte_intr_handle *intr_handle,
+int pci_vfio_write_config(const struct rte_pci_device *dev,
  const void *buf, size_t len, off_t offs);
 
 int pci_vfio_ioport_map(struct rte_pci_device *dev, int bar,
diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c
index 07706f7338..012e7f72c1 100644
--- a/drivers/bus/pci/linux/pci_vfio.c
+++ b/drivers/bus/pci/linux/pci_vfio.c
@@ -43,35 +43,82 @@ static struct rte_tailq_elem rte_vfio_tailq = {
 };
 EAL_REGISTER_TAILQ(rte_vfio_tailq)
 
+static int
+pci_vfio_get_region(const struct rte_pci_device *dev, int index,
+   uint64_t *size, uint64_t *offset)
+{
+   const struct rte_pci_device_internal *pdev =
+   RTE_PCI_DEVICE_INTERNAL_CONST(dev);
+
+   if (index >= VFIO_PCI_NUM_REGIONS || index >= RTE_MAX_PCI_REGIONS)
+   return -1;
+
+   if (pdev->region[index].size == 0 && pdev->region[index].offset == 0)
+   return -1;
+
+   *size   = pdev->region[index].size;
+   *offset = pdev->region[index].offset;
+
+   return 0;
+}
+
 int
-pci_vfio_read_config(const struct rte_intr_handle *intr_handle,
+pci_vfio_read_config(const struct rte_pci_device *dev,
void *buf, size_t len, off_t offs)
 {
-   return pread64(intr_handle->vfio_dev_fd, buf, len,
-  VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs);
+   uint64_t size, offset;
+   int fd;
+
+   fd = dev->intr_handle.vfio_dev_fd;
+
+   if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX,
+   &size, &offset) != 0)
+   return -1;
+
+   if ((uint64_t)len + offs > size)
+   return -1;
+
+   return pread64(fd, buf, len, offset + offs);
 }
 
 int
-pci_vfio_write_config(const struct rte_intr_handle *intr_handle,
+pci_vfio_write_config(const struct rte_pci_device *dev,
const void *buf, size_t len, off_t offs)
 {
-   return pwrite64(intr_handle->vfio_dev_fd, buf, len,
-  VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs);
+   uint64_t size, offset;
+   int fd;
+
+   fd = dev->intr_handle.vfio_dev_fd;
+
+   if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX,
+   &size, &offset) != 0)
+   return -1;
+
+   if ((uint64_t)len + offs > size)
+   return -1;
+
+   return pwrite64(fd, buf, len, offset + offs);
 }
 
 /* get PCI BAR number where MSI-X interrupts are */
 static int
-pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table)
+pci_vfio_get_msix_bar(const struct rte_pci_device *dev, int fd,
+   struct pci_msix_table *msix_table)
 {
int ret;
uint32_t reg;
uint16_t flags;
uint8_t cap_id, cap_offs

[dpdk-dev] [RFC v3 3/6] bus/pci: introduce helper for MMIO read and write

2021-05-31 Thread Chenbo Xia
From: Tiwei Bie 

The MMIO regions may not be mmap-able for mediated PCI device.
In this case, the application should explicitly do read and write
to access these regions.

Signed-off-by: Tiwei Bie 
---
 drivers/bus/pci/bsd/pci.c| 22 +++
 drivers/bus/pci/linux/pci.c  | 46 ++
 drivers/bus/pci/linux/pci_init.h | 10 +++
 drivers/bus/pci/linux/pci_uio.c  | 22 +++
 drivers/bus/pci/linux/pci_vfio.c | 36 
 drivers/bus/pci/rte_bus_pci.h| 48 
 drivers/bus/pci/version.map  |  4 +++
 7 files changed, 188 insertions(+)

diff --git a/drivers/bus/pci/bsd/pci.c b/drivers/bus/pci/bsd/pci.c
index 20ce979f60..781f65c637 100644
--- a/drivers/bus/pci/bsd/pci.c
+++ b/drivers/bus/pci/bsd/pci.c
@@ -494,6 +494,28 @@ int rte_pci_write_config(const struct rte_pci_device *dev,
return -1;
 }
 
+/* Read PCI MMIO space. */
+int rte_pci_mmio_read(const struct rte_pci_device *dev, int bar,
+ void *buf, size_t len, off_t offset)
+{
+   if (bar >= PCI_MAX_RESOURCE || dev->mem_resource[bar].addr == NULL ||
+   (uint64_t)offset + len > dev->mem_resource[bar].len)
+   return -1;
+   memcpy(buf, (uint8_t *)dev->mem_resource[bar].addr + offset, len);
+   return len;
+}
+
+/* Write PCI MMIO space. */
+int rte_pci_mmio_write(const struct rte_pci_device *dev, int bar,
+  const void *buf, size_t len, off_t offset)
+{
+   if (bar >= PCI_MAX_RESOURCE || dev->mem_resource[bar].addr == NULL ||
+   (uint64_t)offset + len > dev->mem_resource[bar].len)
+   return -1;
+   memcpy((uint8_t *)dev->mem_resource[bar].addr + offset, buf, len);
+   return len;
+}
+
 int
 rte_pci_ioport_map(struct rte_pci_device *dev, int bar,
struct rte_pci_ioport *p)
diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
index 8f1fddbf20..4805f277c5 100644
--- a/drivers/bus/pci/linux/pci.c
+++ b/drivers/bus/pci/linux/pci.c
@@ -682,6 +682,52 @@ int rte_pci_write_config(const struct rte_pci_device 
*device,
}
 }
 
+/* Read PCI MMIO space. */
+int rte_pci_mmio_read(const struct rte_pci_device *device, int bar,
+   void *buf, size_t len, off_t offset)
+{
+   char devname[RTE_DEV_NAME_MAX_LEN] = "";
+
+   switch (device->kdrv) {
+   case RTE_PCI_KDRV_IGB_UIO:
+   case RTE_PCI_KDRV_UIO_GENERIC:
+   return pci_uio_mmio_read(device, bar, buf, len, offset);
+#ifdef VFIO_PRESENT
+   case RTE_PCI_KDRV_VFIO:
+   return pci_vfio_mmio_read(device, bar, buf, len, offset);
+#endif
+   default:
+   rte_pci_device_name(&device->addr, devname,
+   RTE_DEV_NAME_MAX_LEN);
+   RTE_LOG(ERR, EAL,
+   "Unknown driver type for %s\n", devname);
+   return -1;
+   }
+}
+
+/* Write PCI MMIO space. */
+int rte_pci_mmio_write(const struct rte_pci_device *device, int bar,
+   const void *buf, size_t len, off_t offset)
+{
+   char devname[RTE_DEV_NAME_MAX_LEN] = "";
+
+   switch (device->kdrv) {
+   case RTE_PCI_KDRV_IGB_UIO:
+   case RTE_PCI_KDRV_UIO_GENERIC:
+   return pci_uio_mmio_write(device, bar, buf, len, offset);
+#ifdef VFIO_PRESENT
+   case RTE_PCI_KDRV_VFIO:
+   return pci_vfio_mmio_write(device, bar, buf, len, offset);
+#endif
+   default:
+   rte_pci_device_name(&device->addr, devname,
+   RTE_DEV_NAME_MAX_LEN);
+   RTE_LOG(ERR, EAL,
+   "Unknown driver type for %s\n", devname);
+   return -1;
+   }
+}
+
 int
 rte_pci_ioport_map(struct rte_pci_device *dev, int bar,
struct rte_pci_ioport *p)
diff --git a/drivers/bus/pci/linux/pci_init.h b/drivers/bus/pci/linux/pci_init.h
index 9f6659ba6e..6853fa88a3 100644
--- a/drivers/bus/pci/linux/pci_init.h
+++ b/drivers/bus/pci/linux/pci_init.h
@@ -37,6 +37,11 @@ int pci_uio_read_config(const struct rte_intr_handle 
*intr_handle,
 int pci_uio_write_config(const struct rte_intr_handle *intr_handle,
 const void *buf, size_t len, off_t offs);
 
+int pci_uio_mmio_read(const struct rte_pci_device *dev, int bar,
+ void *buf, size_t len, off_t offset);
+int pci_uio_mmio_write(const struct rte_pci_device *dev, int bar,
+  const void *buf, size_t len, off_t offset);
+
 int pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
   struct rte_pci_ioport *p);
 void pci_uio_ioport_read(struct rte_pci_ioport *p,
@@ -71,6 +76,11 @@ int pci_vfio_read_config(const struct rte_pci_device *dev,
 int pci_vfio_write_config(const struct rte_pci_device *dev,
  const void *buf, size_t len, off_t offs);
 
+int pci_vfio_mmio_read(const struct rte_pci_device *dev, i

[dpdk-dev] [RFC v3 4/6] eal: add a helper for reading string from sysfs

2021-05-31 Thread Chenbo Xia
From: Tiwei Bie 

This patch adds a helper for reading string from sysfs.

Signed-off-by: Cunming Liang 
Signed-off-by: Tiwei Bie 
---
 lib/eal/common/eal_filesystem.h | 10 ++
 lib/eal/freebsd/eal.c   | 22 ++
 lib/eal/linux/eal.c | 22 ++
 lib/eal/version.map |  3 +++
 4 files changed, 57 insertions(+)

diff --git a/lib/eal/common/eal_filesystem.h b/lib/eal/common/eal_filesystem.h
index 5d21f07c20..be4c51ebb2 100644
--- a/lib/eal/common/eal_filesystem.h
+++ b/lib/eal/common/eal_filesystem.h
@@ -104,4 +104,14 @@ eal_get_hugefile_path(char *buffer, size_t buflen, const 
char *hugedir, int f_id
  * Used to read information from files on /sys */
 int eal_parse_sysfs_value(const char *filename, unsigned long *val);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Function to read a line from a file on the filesystem.
+ * Used to read information from files on /sys
+ */
+__rte_experimental
+int rte_eal_parse_sysfs_str(const char *filename, char *buf, unsigned long sz);
+
 #endif /* EAL_FILESYSTEM_H */
diff --git a/lib/eal/freebsd/eal.c b/lib/eal/freebsd/eal.c
index f4d1676754..002f07f4da 100644
--- a/lib/eal/freebsd/eal.c
+++ b/lib/eal/freebsd/eal.c
@@ -169,6 +169,28 @@ eal_parse_sysfs_value(const char *filename, unsigned long 
*val)
return 0;
 }
 
+int
+rte_eal_parse_sysfs_str(const char *filename, char *buf, unsigned long sz)
+{
+   FILE *f;
+
+   f = fopen(filename, "r");
+   if (f == NULL) {
+   RTE_LOG(ERR, EAL, "%s(): cannot open sysfs file %s\n",
+   __func__, filename);
+   return -1;
+   }
+
+   if (fgets(buf, sz, f) == NULL) {
+   RTE_LOG(ERR, EAL, "%s(): cannot read sysfs file %s\n",
+   __func__, filename);
+   fclose(f);
+   return -1;
+   }
+
+   fclose(f);
+   return 0;
+}
 
 /* create memory configuration in shared/mmap memory. Take out
  * a write lock on the memsegs, so we can auto-detect primary/secondary.
diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c
index ba19fc6347..d5917a48ca 100644
--- a/lib/eal/linux/eal.c
+++ b/lib/eal/linux/eal.c
@@ -260,6 +260,28 @@ eal_parse_sysfs_value(const char *filename, unsigned long 
*val)
return 0;
 }
 
+int
+rte_eal_parse_sysfs_str(const char *filename, char *buf, unsigned long sz)
+{
+   FILE *f;
+
+   f = fopen(filename, "r");
+   if (f == NULL) {
+   RTE_LOG(ERR, EAL, "%s(): cannot open sysfs file %s\n",
+   __func__, filename);
+   return -1;
+   }
+
+   if (fgets(buf, sz, f) == NULL) {
+   RTE_LOG(ERR, EAL, "%s(): cannot read sysfs file %s\n",
+   __func__, filename);
+   fclose(f);
+   return -1;
+   }
+
+   fclose(f);
+   return 0;
+}
 
 /* create memory configuration in shared/mmap memory. Take out
  * a write lock on the memsegs, so we can auto-detect primary/secondary.
diff --git a/lib/eal/version.map b/lib/eal/version.map
index fe5c3dac98..3d7fce26a4 100644
--- a/lib/eal/version.map
+++ b/lib/eal/version.map
@@ -423,6 +423,9 @@ EXPERIMENTAL {
rte_version_release; # WINDOWS_NO_EXPORT
rte_version_suffix; # WINDOWS_NO_EXPORT
rte_version_year; # WINDOWS_NO_EXPORT
+
+   # added in 21.08
+   rte_eal_parse_sysfs_str; # WINDOWS_NO_EXPORT
 };
 
 INTERNAL {
-- 
2.17.1



[dpdk-dev] [RFC v3 5/6] bus/pci: add mdev support

2021-05-31 Thread Chenbo Xia
From: Tiwei Bie 

This patch adds the mdev (Mediated device) support in PCI bus
driver. With this patch, the PCI bus driver will be able to scan
and probe the mediated PCI devices (i.e. the Mediated devices
whose device API is "vfio-pci") in the system.

There are several things different between physical PCI devices
and mediated PCI devices:

- Mediated PCI devices have to be accessed through VFIO API;
- The regions in mediated PCI devices may not be mmap-able,
  and drivers need to call read/write function to access them
  in this case;
- Mediated PCI devices use UUID as device address;

Signed-off-by: Cunming Liang 
Signed-off-by: Tiwei Bie 
Signed-off-by: Chenbo Xia 
---
 drivers/bus/pci/linux/pci.c   |  30 ++-
 drivers/bus/pci/linux/pci_init.h  |  15 +-
 drivers/bus/pci/linux/pci_vfio.c  | 147 --
 drivers/bus/pci/linux/pci_vfio_mdev.c | 277 ++
 drivers/bus/pci/meson.build   |   1 +
 drivers/bus/pci/pci_common.c  |  84 +---
 drivers/bus/pci/pci_params.c  |  36 +++-
 drivers/bus/pci/private.h |  17 ++
 drivers/bus/pci/rte_bus_pci.h |  17 +-
 lib/eal/linux/eal.c   |  17 +-
 10 files changed, 571 insertions(+), 70 deletions(-)
 create mode 100644 drivers/bus/pci/linux/pci_vfio_mdev.c

diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
index 4805f277c5..29dd9ba26f 100644
--- a/drivers/bus/pci/linux/pci.c
+++ b/drivers/bus/pci/linux/pci.c
@@ -30,7 +30,7 @@
 
 extern struct rte_pci_bus rte_pci_bus;
 
-static int
+int
 pci_get_kernel_driver_by_path(const char *filename, char *dri_name,
  size_t len)
 {
@@ -70,7 +70,7 @@ rte_pci_map_device(struct rte_pci_device *dev)
switch (dev->kdrv) {
case RTE_PCI_KDRV_VFIO:
 #ifdef VFIO_PRESENT
-   if (pci_vfio_is_enabled())
+   if (pci_vfio_is_enabled(dev))
ret = pci_vfio_map_resource(dev);
 #endif
break;
@@ -99,7 +99,7 @@ rte_pci_unmap_device(struct rte_pci_device *dev)
switch (dev->kdrv) {
case RTE_PCI_KDRV_VFIO:
 #ifdef VFIO_PRESENT
-   if (pci_vfio_is_enabled())
+   if (pci_vfio_is_enabled(dev))
pci_vfio_unmap_resource(dev);
 #endif
break;
@@ -347,6 +347,15 @@ pci_scan_one(const char *dirname, const struct 
rte_pci_addr *addr)
int ret;
 
TAILQ_FOREACH(dev2, &rte_pci_bus.device_list, next) {
+   /*
+* Insert physical PCI devices before all mediated
+* PCI devices.
+*/
+   if (dev2->is_mdev) {
+   rte_pci_insert_device(dev2, dev);
+   return 0;
+   }
+
ret = rte_pci_addr_cmp(&dev->addr, &dev2->addr);
if (ret > 0)
continue;
@@ -465,8 +474,14 @@ rte_pci_scan(void)
return 0;
 
 #ifdef VFIO_PRESENT
-   if (!pci_vfio_is_enabled())
-   RTE_LOG(DEBUG, EAL, "VFIO PCI modules not loaded\n");
+   if (!rte_vfio_is_enabled("vfio_pci"))
+   RTE_LOG(DEBUG, EAL, "VFIO PCI module not loaded\n");
+
+   if (!rte_vfio_is_enabled("vfio_mdev"))
+   RTE_LOG(DEBUG, EAL, "VFIO MDEV module not loaded\n");
+
+   if (pci_scan_mdev() != 0)
+   return -1;
 #endif
 
dir = opendir(rte_pci_get_sysfs_path());
@@ -737,7 +752,7 @@ rte_pci_ioport_map(struct rte_pci_device *dev, int bar,
switch (dev->kdrv) {
 #ifdef VFIO_PRESENT
case RTE_PCI_KDRV_VFIO:
-   if (pci_vfio_is_enabled())
+   if (pci_vfio_is_enabled(dev))
ret = pci_vfio_ioport_map(dev, bar, p);
break;
 #endif
@@ -801,8 +816,7 @@ rte_pci_ioport_unmap(struct rte_pci_ioport *p)
switch (p->dev->kdrv) {
 #ifdef VFIO_PRESENT
case RTE_PCI_KDRV_VFIO:
-   if (pci_vfio_is_enabled())
-   ret = pci_vfio_ioport_unmap(p);
+   ret = -1;
break;
 #endif
case RTE_PCI_KDRV_IGB_UIO:
diff --git a/drivers/bus/pci/linux/pci_init.h b/drivers/bus/pci/linux/pci_init.h
index 6853fa88a3..0c0191b6d5 100644
--- a/drivers/bus/pci/linux/pci_init.h
+++ b/drivers/bus/pci/linux/pci_init.h
@@ -19,6 +19,9 @@
 extern void *pci_map_addr;
 void *pci_find_max_end_va(void);
 
+int pci_get_kernel_driver_by_path(const char *filename, char *dri_name,
+ size_t len);
+
 /* parse one line of the "resource" sysfs file (note that the 'line'
  * string is modified)
  */
@@ -93,7 +96,17 @@ int pci_vfio_ioport_unmap(struct rte_pci_ioport *p);
 int pci_vfio_map_resource(struct

[dpdk-dev] [RFC v3 6/6] bus/pci: add sparse mmap support for mediated PCI devices

2021-05-31 Thread Chenbo Xia
This patch adds sparse mmap support in PCI bus. Sparse mmap is a
capability defined in VFIO which allows multiple mmap areas in one
VFIO region. Mediated pci devices could use this capability to let
mdev parent driver have control over access of non-mmapable part
of regions.

Signed-off-by: Chenbo Xia 
---
 drivers/bus/pci/linux/pci_vfio.c | 229 +++
 drivers/bus/pci/private.h|   2 +
 drivers/bus/pci/rte_bus_pci.h|  18 ++-
 3 files changed, 218 insertions(+), 31 deletions(-)

diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c
index 00ba5db03a..e68eccb63f 100644
--- a/drivers/bus/pci/linux/pci_vfio.c
+++ b/drivers/bus/pci/linux/pci_vfio.c
@@ -654,6 +654,82 @@ pci_vfio_mmap_bar(int vfio_dev_fd, struct 
mapped_pci_resource *vfio_res,
return 0;
 }
 
+static int
+pci_vfio_sparse_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res,
+   struct vfio_region_sparse_mmap_area *vfio_areas,
+   uint32_t nr_areas, int bar_index, int additional_flags,
+   int numa_node)
+{
+   struct pci_map *map = &vfio_res->maps[bar_index];
+   struct rte_mem_map_area *area;
+   struct vfio_region_sparse_mmap_area *sparse;
+   void *bar_addr;
+   uint32_t i, j;
+
+   map->nr_areas = nr_areas;
+
+   if (map->size == 0) {
+   RTE_LOG(DEBUG, EAL, "Bar size is 0, skip BAR%d\n", bar_index);
+   return 0;
+   }
+
+   if (!map->nr_areas) {
+   RTE_LOG(DEBUG, EAL, "Skip bar %d with no sparse mmap areas\n",
+   bar_index);
+   map->areas = NULL;
+   return 0;
+   }
+
+   if (map->areas == NULL) {
+   map->areas = rte_zmalloc_socket(NULL,
+   sizeof(*map->areas) * nr_areas,
+   RTE_CACHE_LINE_SIZE, numa_node);
+   if (map->areas == NULL) {
+   RTE_LOG(ERR, EAL,
+   "Cannot alloc memory for sparse map areas\n");
+   return -1;
+   }
+   }
+
+   for (i = 0; i < map->nr_areas; i++) {
+   area = &map->areas[i];
+   sparse = &vfio_areas[i];
+
+   bar_addr = mmap(map->addr, sparse->size, 0, MAP_PRIVATE |
+   MAP_ANONYMOUS | additional_flags, -1, 0);
+   if (bar_addr != MAP_FAILED) {
+   area->addr = pci_map_resource(bar_addr, vfio_dev_fd,
+   map->offset + sparse->offset, sparse->size,
+   RTE_MAP_FORCE_ADDRESS);
+   if (area->addr == NULL) {
+   munmap(bar_addr, sparse->size);
+   RTE_LOG(ERR, EAL, "Failed to map pci BAR%d\n",
+   bar_index);
+   goto err_map;
+   }
+
+   area->offset = sparse->offset;
+   area->size = sparse->size;
+   } else {
+   RTE_LOG(ERR, EAL, "Failed to create inaccessible 
mapping for BAR%d\n",
+   bar_index);
+   goto err_map;
+   }
+   }
+
+   return 0;
+
+err_map:
+   for (j = 0; j < i; j++) {
+   pci_unmap_resource(map->areas[j].addr, map->areas[j].size);
+   map->areas[j].offset = 0;
+   map->areas[j].size = 0;
+   }
+   rte_free(map->areas);
+   map->nr_areas = 0;
+   return -1;
+}
+
 /*
  * region info may contain capability headers, so we need to keep reallocating
  * the memory until we match allocated memory size with argsz.
@@ -770,6 +846,31 @@ pci_vfio_fill_regions(struct rte_pci_device *dev, int 
vfio_dev_fd,
return 0;
 }
 
+static void
+clean_up_pci_resource(struct mapped_pci_resource *vfio_res)
+{
+   struct pci_map *map;
+   uint32_t i, j;
+
+   for (i = 0; i < PCI_MAX_RESOURCE; i++) {
+   map = &vfio_res->maps[i];
+   if (map->nr_areas > 1) {
+   for (j = 0; j < map->nr_areas; j++)
+   pci_unmap_resource(map->areas[j].addr,
+   map->areas[j].size);
+   } else {
+   /*
+* We do not need to be aware of MSI-X BAR mappings.
+* Using current maps array is enough.
+*/
+   if (map->addr)
+   pci_unmap_resource(map->addr, map->size);
+   }
+   }
+
+   rte_free(map->areas);
+}
+
 static int
 pci_vfio_map_resource_prim

[dpdk-dev] [PATCH] doc: announce removal of ABIs in PCI bus driver

2021-06-01 Thread Chenbo Xia
All ABIs in PCI bus driver, which are defined in rte_buc_pci.h,
will be removed and the header will be made internal.

Signed-off-by: Chenbo Xia 
---
 doc/guides/rel_notes/deprecation.rst | 5 +
 1 file changed, 5 insertions(+)

diff --git a/doc/guides/rel_notes/deprecation.rst 
b/doc/guides/rel_notes/deprecation.rst
index 9584d6bfd7..b01f46c62e 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -147,3 +147,8 @@ Deprecation Notices
 * cmdline: ``cmdline`` structure will be made opaque to hide platform-specific
   content. On Linux and FreeBSD, supported prior to DPDK 20.11,
   original structure will be kept until DPDK 21.11.
+
+* pci: To reduce unnecessary ABIs exposed by DPDK bus driver, "rte_bus_pci.h"
+  will be made internal in 21.11 and macros/data structures/functions defined
+  in the header will not be considered as ABI anymore. This change is inspired
+  by the RFC https://patchwork.dpdk.org/project/dpdk/list/?series=17176.
-- 
2.17.1



Re: [PATCH 2/2] vhost: add reconnection support to VDUSE

2024-09-06 Thread Chenbo Xia
Hi Maxime,

> On Sep 5, 2024, at 22:26, Maxime Coquelin  wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
> This patch enables VDUSE reconnection support making use of
> the newly introduced reconnection mechanism in Vhost
> library.
> 
> At DPDK VDUSE device creation time, there are two
> possibilities:
> 1. The Kernel VDUSE device does not exist:
>  a. A reconnection file named after the VUDSE device name
> is created in VDUSE tmpfs.
>  b. The file is truncated to 'struct vhost_reconnect_data'
> size, and mmapped.
>  c. Negotiated features, Virtio status... are saved for
> sanity checks at reconnect time.
> 2. The Kernel VDUSE device already exists:
>  a. Exit with failure if no reconnect file exists for
> this device.
>  b. Open and mmap the reconnect file.
>  c. Perform sanity check to ensure features are compatible.
>  d. Restore virtqueues' available indexes at startup time.
> 
> Then at runtime, the virtqueues' available index are logged by
> the Vhost reconnection mechanism.
> 
> At DPDK VDUSE device destruction time, there are two
> possibilities:
> 1. The Kernel VDUSE device destruction succeed, which
>means it is no more attached to the vDPA bus. The
>reconnection file is unmapped and then removed.
> 2. The Kernel VDUSE device destruction failed, meaning it
>is no more attached to the vDPA bus. The reconnection
>file is unmapped but not removed to make possible later
>reconnection.
> 
> Signed-off-by: Maxime Coquelin 
> ---
> lib/vhost/vduse.c | 280 +++---
> 1 file changed, 241 insertions(+), 39 deletions(-)
> 
> diff --git a/lib/vhost/vduse.c b/lib/vhost/vduse.c
> index c66602905c..bd0e492d62 100644
> --- a/lib/vhost/vduse.c
> +++ b/lib/vhost/vduse.c
> @@ -136,7 +136,7 @@ vduse_control_queue_event(int fd, void *arg, int *remove 
> __rte_unused)
> }
> 
> static void
> -vduse_vring_setup(struct virtio_net *dev, unsigned int index)
> +vduse_vring_setup(struct virtio_net *dev, unsigned int index, bool reconnect)
> {
>struct vhost_virtqueue *vq = dev->virtqueue[index];
>struct vhost_vring_addr *ra = &vq->ring_addrs;
> @@ -152,6 +152,19 @@ vduse_vring_setup(struct virtio_net *dev, unsigned int 
> index)
>return;
>}
> 
> +   if (reconnect) {
> +   vq->last_avail_idx = vq->reconnect_log->last_avail_idx;
> +   vq->last_used_idx = vq->reconnect_log->last_avail_idx;
> +   } else {
> +   vq->last_avail_idx = vq_info.split.avail_index;
> +   vq->last_used_idx = vq_info.split.avail_index;
> +   }
> +   vq->size = vq_info.num;
> +   vq->ready = true;
> +   vq->enabled = vq_info.ready;
> +   ra->desc_user_addr = vq_info.desc_addr;
> +   ra->avail_user_addr = vq_info.driver_addr;
> +   ra->used_user_addr = vq_info.device_addr;
>VHOST_CONFIG_LOG(dev->ifname, INFO, "VQ %u info:", index);
>VHOST_CONFIG_LOG(dev->ifname, INFO, "\tnum: %u", vq_info.num);
>VHOST_CONFIG_LOG(dev->ifname, INFO, "\tdesc_addr: %llx",
> @@ -162,15 +175,6 @@ vduse_vring_setup(struct virtio_net *dev, unsigned int 
> index)
>(unsigned long long)vq_info.device_addr);
>VHOST_CONFIG_LOG(dev->ifname, INFO, "\tavail_idx: %u", 
> vq_info.split.avail_index);
>VHOST_CONFIG_LOG(dev->ifname, INFO, "\tready: %u", vq_info.ready);
> -
> -   vq->last_avail_idx = vq_info.split.avail_index;
> -   vq->size = vq_info.num;
> -   vq->ready = true;
> -   vq->enabled = vq_info.ready;
> -   ra->desc_user_addr = vq_info.desc_addr;
> -   ra->avail_user_addr = vq_info.driver_addr;
> -   ra->used_user_addr = vq_info.device_addr;
> -
>vq->kickfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
>if (vq->kickfd < 0) {
>VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to init kickfd for 
> VQ %u: %s",
> @@ -267,7 +271,7 @@ vduse_vring_cleanup(struct virtio_net *dev, unsigned int 
> index)
> }
> 
> static void
> -vduse_device_start(struct virtio_net *dev)
> +vduse_device_start(struct virtio_net *dev, bool reconnect)
> {
>unsigned int i, ret;
> 
> @@ -287,6 +291,15 @@ vduse_device_start(struct virtio_net *dev)
>return;
>}
> 
> +   if (reconnect && dev->features != dev->reconnect_log->features) {
> +   VHOST_CONFIG_LOG(dev->ifname, ERR,
> +   "Mismatch between reconnect file features 
> 0x%" PRIx64 " & device features 0x%" PRIx64,

Checkpatch reports long line

> +   dev->reconnect_log->features, dev->features);
> +   return;
> +   }
> +
> +   dev->reconnect_log->features = dev->features;
> +
>VHOST_CONFIG_LOG(dev->ifname, INFO, "Negotiated Virtio features: 0x%" 
> PRIx64,
>dev->features);
> 
> @@ -300,7 +313,7 @@ vduse_device_start(struct virtio_net *dev)
>}
> 
>for (i = 0; i < dev->n

[dpdk-dev] [PATCH v1] net/virtio-user: fix return value check

2019-04-09 Thread Chenbo Xia
Fix unchecked return value for fcntl.

Coverity issue: 277210
Fixes: bd8f50a45d0f ("net/virtio-user: support server mode")
Cc: sta...@dpdk.org

Signed-off-by: Chenbo Xia 
---
 drivers/net/virtio/virtio_user/vhost_user.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_user/vhost_user.c 
b/drivers/net/virtio/virtio_user/vhost_user.c
index 827a48ad6..4b74bd2d8 100644
--- a/drivers/net/virtio/virtio_user/vhost_user.c
+++ b/drivers/net/virtio/virtio_user/vhost_user.c
@@ -394,7 +394,10 @@ virtio_user_start_server(struct virtio_user_dev *dev, 
struct sockaddr_un *un)
return -1;
 
flag = fcntl(fd, F_GETFL);
-   fcntl(fd, F_SETFL, flag | O_NONBLOCK);
+   if (fcntl(fd, F_SETFL, flag | O_NONBLOCK) < 0) {
+   PMD_DRV_LOG(ERR, "fcntl failed, %s", strerror(errno));
+   return -1;
+   }
 
return 0;
 }
-- 
2.17.1



[dpdk-dev] [PATCH v1] app/testpmd: fix return value check

2019-04-09 Thread Chenbo Xia
Fix unchecked return value issue for rte_eth_dev_configure.

Coverity issue: 195021
Fixes: 2a977b891f99 ("app/testpmd: fix DCB configuration")
Cc: sta...@dpdk.org

Signed-off-by: Chenbo Xia 
---
 app/test-pmd/testpmd.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index aeaa74c98..a52c37229 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -2960,8 +2960,9 @@ init_port_dcb_config(portid_t pid,
port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
 
/* re-configure the device . */
-   rte_eth_dev_configure(pid, nb_rxq, nb_rxq, &port_conf);
-
+   retval = rte_eth_dev_configure(pid, nb_rxq, nb_rxq, &port_conf);
+   if (retval < 0)
+   return retval;
rte_eth_dev_info_get(pid, &rte_port->dev_info);
 
/* If dev_info.vmdq_pool_base is greater than 0,
-- 
2.17.1



[dpdk-dev] [PATCH v1] crypto/virtio: fix return value check

2019-04-18 Thread Chenbo Xia
Fix unchecked return value issue for rte_pci_read_config.

Coverity issue: 302861
Fixes: 25500d4b8076 ("crypto/virtio: support device init")
Cc: sta...@dpdk.org

Signed-off-by: Chenbo Xia 
---
 drivers/crypto/virtio/virtio_pci.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/virtio/virtio_pci.c 
b/drivers/crypto/virtio/virtio_pci.c
index 0c0c64471..8137b3c5a 100644
--- a/drivers/crypto/virtio/virtio_pci.c
+++ b/drivers/crypto/virtio/virtio_pci.c
@@ -397,9 +397,13 @@ virtio_read_caps(struct rte_pci_device *dev, struct 
virtio_crypto_hw *hw)
hw->common_cfg = get_cfg_addr(dev, &cap);
break;
case VIRTIO_PCI_CAP_NOTIFY_CFG:
-   rte_pci_read_config(dev, &hw->notify_off_multiplier,
+   ret = rte_pci_read_config(dev, 
&hw->notify_off_multiplier,
4, pos + sizeof(cap));
-   hw->notify_base = get_cfg_addr(dev, &cap);
+   if (ret != 4)
+   VIRTIO_CRYPTO_INIT_LOG_ERR(
+   "failed to read notify_off_multiplier: 
ret %d", ret);
+   else
+   hw->notify_base = get_cfg_addr(dev, &cap);
break;
case VIRTIO_PCI_CAP_DEVICE_CFG:
hw->dev_cfg = get_cfg_addr(dev, &cap);
-- 
2.17.1



[dpdk-dev] [PATCH 0/9] Introduce vfio-user library

2020-12-17 Thread Chenbo Xia
This series enables DPDK to be an alternative I/O device emulation library of
building virtualized devices in separate processes outside QEMU. It introduces
a new library for device emulation (librte_vfio_user).

*librte_vfio_user* library is an implementation of VFIO-over-socket[1] (also
known as vfio-user) which is a protocol that allows a device to be virtualized
in a separate process outside of QEMU. 

Background & Motivation 
---
The disaggregated/multi-process QEMU is using VFIO-over-socket/vfio-user
as the main transport mechanism to disaggregate IO services from QEMU[2].
Vfio-user essentially implements the VFIO device model presented to the
user process by a set of messages over a unix-domain socket. The main
difference between application using vfio-user and application using vfio
kernel module is that device manipulation is based on socket messages for
vfio-user but system calls for vfio kernel module. The vfio-user devices
consist of a generic VFIO device type, living in QEMU, which is called the
client[3], and the core device implementation (emulated device), living
outside of QEMU, which is called the server. With emulated devices removed
from QEMU enabled by vfio-user implementation, other places should be
introduced to accommodate virtualized/emulated device. This series introduces
vfio-user support in DPDK to enable DPDK as one of the living places for
emulated device except QEMU.

This series introduce the server and client implementation of vfio-user 
protocol.
The server plays the role as emulated devices and the client is the device
consumer. With this implementation, DPDK will be enabled to be both device
provider and consumer.

Design overview
---

+--+ +--+ 
| +--+ | | +--+ |
| | Generic  | | | | Emulated | |
| | vfio-dev | | | | device   | |
| +--+ | | +|-+ |
| +--+ | | +|-+ |
| | vfio-user| | | | vfio-user| |
| | client   | |<--->| | server   | |
| +--+ | | +--+ |
| QEMU/DPDK| | DPDK |
+--+ +--+

- Generic vfio-dev. 
  It is the generic vfio framework in vfio applications like QEMU or DPDK.
  Applications can keep the most of vfio device management and plug in a
  vfio-user device type. Note that in current implementation, we have not
  yet integrated client vfio-user into kernel vfio in DPDK but it is viable
  and good to do so.

- vfio-user client.
  For DPDK, it is part of librte_vfio_user implementation to provide ways to
  manipulate a vfio-user based emulated devices. This manipulation is very
  similar with kernel vfio (i.e., syscalls like ioctl, mmap and pread/pwrite).
  It is a base for vfio-user device consumer.

- vfio-user server. 
  It is server part of librte_vfio_user. It provides ways to emulate your own
  device. A device provider could only care about device layout that VFIO
  defines but does not need to know how it communicates with vfio-user client.

- Emulated device.
  It is emulated device of any type (e.g., network, crypto and etc.).

References
--
[1]: https://patchew.org/QEMU/20201130161229.23164-1-thanos.maka...@nutanix.com/
[2]: https://wiki.qemu.org/Features/MultiProcessQEMU
[3]: https://github.com/oracle/qemu/tree/vfio-user-v0.2

Chenbo Xia (9):
  lib: introduce vfio-user library
  vfio_user: implement lifecycle related APIs
  vfio_user: implement device and region related APIs
  vfio_user: implement DMA table and socket address API
  vfio_user: implement interrupt related APIs
  vfio_user: add client APIs of device attach/detach
  vfio_user: add client APIs of DMA/IRQ/region
  test/vfio_user: introduce functional test
  doc: add vfio-user library guide

 MAINTAINERS |4 +
 app/test/meson.build|4 +
 app/test/test_vfio_user.c   |  646 ++
 doc/guides/prog_guide/index.rst |1 +
 doc/guides/prog_guide/vfio_user_lib.rst |  215 
 doc/guides/rel_notes/release_21_02.rst  |   11 +
 lib/librte_vfio_user/meson.build|   11 +
 lib/librte_vfio_user/rte_vfio_user.h|  426 +++
 lib/librte_vfio_user/version.map|   26 +
 lib/librte_vfio_user/vfio_user_base.c   |  217 
 lib/librte_vfio_user/vfio_user_base.h   |  109 ++
 lib/librte_vfio_user/vfio_user_client.c |  691 ++
 lib/librte_vfio_user/vfio_user_client.h |   25 +
 lib/librte_vfio_user/vfio_user_server.c | 1553 +++
 lib/librte_vfio_user/vfio_user_server.h |   66 +
 lib/meson.build |1 +
 16 files changed, 4006 insertions(+)
 create mode 100644 app/test/test_vfio_user.c
 create mode 100644 doc/guides/prog_guide/vfio_user_lib.rst
 create mode 100644 lib/librte_vfio_user/meson.build
 create mode 100644 lib/librte_vfio_user/rte_vfio_user.h
 create mode 100644 lib/librte_vfio_user/version.map
 create mode 100644 lib/librte

[dpdk-dev] [PATCH 1/9] lib: introduce vfio-user library

2020-12-17 Thread Chenbo Xia
This patch introduces vfio-user library, which follows vfio-user
protocol v1.0. As vfio-user has server and client implementaion,
this patch introduces basic structures and internal functions that
will be used by both server and client.

Signed-off-by: Chenbo Xia 
Signed-off-by: Xiuchun Lu 
---
 MAINTAINERS   |   4 +
 lib/librte_vfio_user/meson.build  |   9 ++
 lib/librte_vfio_user/version.map  |   3 +
 lib/librte_vfio_user/vfio_user_base.c | 205 ++
 lib/librte_vfio_user/vfio_user_base.h |  65 
 lib/meson.build   |   1 +
 6 files changed, 287 insertions(+)
 create mode 100644 lib/librte_vfio_user/meson.build
 create mode 100644 lib/librte_vfio_user/version.map
 create mode 100644 lib/librte_vfio_user/vfio_user_base.c
 create mode 100644 lib/librte_vfio_user/vfio_user_base.h

diff --git a/MAINTAINERS b/MAINTAINERS
index eafe9f8c46..5fb4880758 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1540,6 +1540,10 @@ M: Nithin Dabilpuram 
 M: Pavan Nikhilesh 
 F: lib/librte_node/
 
+Vfio-user - EXPERIMENTAL
+M: Chenbo Xia 
+M: Xiuchun Lu 
+F: lib/librte_vfio_user/
 
 Test Applications
 -
diff --git a/lib/librte_vfio_user/meson.build b/lib/librte_vfio_user/meson.build
new file mode 100644
index 00..0f6407b80f
--- /dev/null
+++ b/lib/librte_vfio_user/meson.build
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2020 Intel Corporation
+
+if not is_linux
+   build = false
+   reason = 'only supported on Linux'
+endif
+
+sources = files('vfio_user_base.c')
diff --git a/lib/librte_vfio_user/version.map b/lib/librte_vfio_user/version.map
new file mode 100644
index 00..33c1b976f1
--- /dev/null
+++ b/lib/librte_vfio_user/version.map
@@ -0,0 +1,3 @@
+EXPERIMENTAL {
+   local: *;
+};
diff --git a/lib/librte_vfio_user/vfio_user_base.c 
b/lib/librte_vfio_user/vfio_user_base.c
new file mode 100644
index 00..bbad553e0a
--- /dev/null
+++ b/lib/librte_vfio_user/vfio_user_base.c
@@ -0,0 +1,205 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include 
+#include 
+#include 
+
+#include "vfio_user_base.h"
+
+int vfio_user_log_level;
+
+const char *vfio_user_msg_str[VFIO_USER_MAX] = {
+   [VFIO_USER_NONE] = "VFIO_USER_NONE",
+   [VFIO_USER_VERSION] = "VFIO_USER_VERSION",
+};
+
+inline void vfio_user_close_msg_fds(VFIO_USER_MSG *msg)
+{
+   int i;
+
+   for (i = 0; i < msg->fd_num; i++)
+   close(msg->fds[i]);
+}
+
+int vfio_user_check_msg_fdnum(VFIO_USER_MSG *msg, int expected_fds)
+{
+   if (msg->fd_num == expected_fds)
+   return 0;
+
+   VFIO_USER_LOG(ERR, "Expect %d FDs for request %s, received %d\n",
+   expected_fds, vfio_user_msg_str[msg->cmd], msg->fd_num);
+
+   vfio_user_close_msg_fds(msg);
+
+   return -1;
+}
+
+static int vfio_user_recv_fd_msg(int sockfd, char *buf, int buflen, int *fds,
+   int max_fds, int *fd_num)
+{
+   struct iovec iov;
+   struct msghdr msgh;
+   char control[CMSG_SPACE(max_fds * sizeof(int))];
+   struct cmsghdr *cmsg;
+   int fd_sz, got_fds = 0;
+   int ret, i;
+
+   *fd_num = 0;
+
+   memset(&msgh, 0, sizeof(msgh));
+   iov.iov_base = buf;
+   iov.iov_len  = buflen;
+
+   msgh.msg_iov = &iov;
+   msgh.msg_iovlen = 1;
+   msgh.msg_control = control;
+   msgh.msg_controllen = sizeof(control);
+
+   ret = recvmsg(sockfd, &msgh, 0);
+   if (ret <= 0) {
+   if (ret)
+   VFIO_USER_LOG(DEBUG, "recvmsg failed\n");
+   return ret;
+   }
+
+   if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
+   VFIO_USER_LOG(ERR, "Message is truncated\n");
+   return -1;
+   }
+
+   for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
+   cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
+   if ((cmsg->cmsg_level == SOL_SOCKET) &&
+   (cmsg->cmsg_type == SCM_RIGHTS)) {
+   fd_sz = cmsg->cmsg_len - CMSG_LEN(0);
+   got_fds = fd_sz / sizeof(int);
+   if (got_fds >= max_fds) {
+   /* Invalid message, close fds */
+   int *close_fd = (int *)CMSG_DATA(cmsg);
+   for (i = 0; i < got_fds; i++) {
+   close_fd += i;
+   close(*close_fd);
+   }
+   VFIO_USER_LOG(ERR, "fd num exceeds max "
+   "in vfio-user msg\n");
+   return -1;
+   }
+   *fd_num = got_fd

[dpdk-dev] [PATCH 2/9] vfio_user: implement lifecycle related APIs

2020-12-17 Thread Chenbo Xia
This patch implements three lifecycle related APIs for vfio-user server,
which are rte_vfio_user_register(), rte_vfio_user_unregister() and
rte_vfio_user_start(). Socket an device management is implemented
along with the API introduction.

Signed-off-by: Chenbo Xia 
Signed-off-by: Xiuchun Lu 
---
 lib/librte_vfio_user/meson.build|   3 +-
 lib/librte_vfio_user/rte_vfio_user.h|  51 ++
 lib/librte_vfio_user/version.map|   6 +
 lib/librte_vfio_user/vfio_user_base.h   |   4 +
 lib/librte_vfio_user/vfio_user_server.c | 690 
 lib/librte_vfio_user/vfio_user_server.h |  55 ++
 6 files changed, 808 insertions(+), 1 deletion(-)
 create mode 100644 lib/librte_vfio_user/rte_vfio_user.h
 create mode 100644 lib/librte_vfio_user/vfio_user_server.c
 create mode 100644 lib/librte_vfio_user/vfio_user_server.h

diff --git a/lib/librte_vfio_user/meson.build b/lib/librte_vfio_user/meson.build
index 0f6407b80f..b7363f61c6 100644
--- a/lib/librte_vfio_user/meson.build
+++ b/lib/librte_vfio_user/meson.build
@@ -6,4 +6,5 @@ if not is_linux
reason = 'only supported on Linux'
 endif
 
-sources = files('vfio_user_base.c')
+sources = files('vfio_user_base.c', 'vfio_user_server.c')
+headers = files('rte_vfio_user.h')
diff --git a/lib/librte_vfio_user/rte_vfio_user.h 
b/lib/librte_vfio_user/rte_vfio_user.h
new file mode 100644
index 00..0d4f6c1be2
--- /dev/null
+++ b/lib/librte_vfio_user/rte_vfio_user.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef _RTE_VFIO_USER_H
+#define _RTE_VFIO_USER_H
+
+#include 
+
+/**
+ *  Below APIs are for vfio-user server (device provider) to use:
+ * *rte_vfio_user_register
+ * *rte_vfio_user_unregister
+ * *rte_vfio_user_start
+ */
+
+/**
+ * Register a vfio-user device.
+ *
+ * @param sock_addr
+ *   Unix domain socket address
+ * @return
+ *   0 on success, -1 on failure
+ */
+__rte_experimental
+int rte_vfio_user_register(const char *sock_addr);
+
+/**
+ * Unregister a vfio-user device.
+ *
+ * @param sock_addr
+ *   Unix domain socket address
+ * @return
+ *   0 on success, -1 on failure
+ */
+__rte_experimental
+int rte_vfio_user_unregister(const char *sock_addr);
+
+/**
+ * Start vfio-user handling for the device.
+ *
+ * This function triggers vfio-user message handling.
+ * @param sock_addr
+ *   Unix domain socket address
+ * @return
+ *   0 on success, -1 on failure
+ */
+__rte_experimental
+int rte_vfio_user_start(const char *sock_addr);
+
+#endif
diff --git a/lib/librte_vfio_user/version.map b/lib/librte_vfio_user/version.map
index 33c1b976f1..e53095eda8 100644
--- a/lib/librte_vfio_user/version.map
+++ b/lib/librte_vfio_user/version.map
@@ -1,3 +1,9 @@
 EXPERIMENTAL {
+   global:
+
+   rte_vfio_user_register;
+   rte_vfio_user_unregister;
+   rte_vfio_user_start;
+
local: *;
 };
diff --git a/lib/librte_vfio_user/vfio_user_base.h 
b/lib/librte_vfio_user/vfio_user_base.h
index 6db45b1819..926cecfa7a 100644
--- a/lib/librte_vfio_user/vfio_user_base.h
+++ b/lib/librte_vfio_user/vfio_user_base.h
@@ -7,6 +7,10 @@
 
 #include 
 
+#include "rte_vfio_user.h"
+
+#define VFIO_USER_VERSION_MAJOR 1
+#define VFIO_USER_VERSION_MINOR 0
 #define VFIO_USER_MAX_FD 1024
 #define VFIO_USER_MAX_VERSION_DATA 512
 
diff --git a/lib/librte_vfio_user/vfio_user_server.c 
b/lib/librte_vfio_user/vfio_user_server.c
new file mode 100644
index 00..545c779fb0
--- /dev/null
+++ b/lib/librte_vfio_user/vfio_user_server.c
@@ -0,0 +1,690 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "vfio_user_server.h"
+
+#define MAX_VFIO_USER_DEVICE 1024
+
+static struct vfio_user_server *vfio_user_devices[MAX_VFIO_USER_DEVICE];
+static pthread_mutex_t vfio_dev_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static struct vfio_user_ep_sock vfio_ep_sock = {
+   .ep = {
+   .fd_mutex = PTHREAD_MUTEX_INITIALIZER,
+   .fd_num = 0
+   },
+   .sock_num = 0,
+   .mutex = PTHREAD_MUTEX_INITIALIZER,
+};
+
+static int vfio_user_negotiate_version(struct vfio_user_server *dev,
+   VFIO_USER_MSG *msg)
+{
+   struct vfio_user_version *ver = &msg->payload.ver;
+
+   if (vfio_user_check_msg_fdnum(msg, 0) != 0)
+   return -EINVAL;
+
+   if (ver->major == dev->ver.major && ver->minor <= dev->ver.minor)
+   return 0;
+   else
+   return -ENOTSUP;
+}
+
+static vfio_user_msg_handler_t vfio_user_msg_handlers[VFIO_USER_MAX] = {
+   [VFIO_USER_NONE] = NULL,
+   [VFIO_USER_VERSION] = vfio_user_negotiate_version,
+};
+
+static struct vfio_user_server_socket *
+find_vfio_user_socket(const char *sock_addr)
+{
+   uint32_t i;
+
+   if (sock_addr == NULL)
+   return NULL;
+
+  

[dpdk-dev] [PATCH 3/9] vfio_user: implement device and region related APIs

2020-12-17 Thread Chenbo Xia
This patch introduces device and region related APIs, which are
rte_vfio_user_set_dev_info() and rte_vfio_user_set_reg_info().
The corresponding vfio-user command handling is also added with
the definition of all vfio-user command identity.

Signed-off-by: Chenbo Xia 
Signed-off-by: Xiuchun Lu 
---
 lib/librte_vfio_user/rte_vfio_user.h|  60 ++
 lib/librte_vfio_user/version.map|   2 +
 lib/librte_vfio_user/vfio_user_base.c   |  12 ++
 lib/librte_vfio_user/vfio_user_base.h   |  32 +++-
 lib/librte_vfio_user/vfio_user_server.c | 232 
 lib/librte_vfio_user/vfio_user_server.h |   2 +
 6 files changed, 339 insertions(+), 1 deletion(-)

diff --git a/lib/librte_vfio_user/rte_vfio_user.h 
b/lib/librte_vfio_user/rte_vfio_user.h
index 0d4f6c1be2..8a999c7aa0 100644
--- a/lib/librte_vfio_user/rte_vfio_user.h
+++ b/lib/librte_vfio_user/rte_vfio_user.h
@@ -5,13 +5,35 @@
 #ifndef _RTE_VFIO_USER_H
 #define _RTE_VFIO_USER_H
 
+#include 
+
 #include 
 
+struct rte_vfio_user_reg_info;
+
+typedef ssize_t (*rte_vfio_user_reg_acc_t)(struct rte_vfio_user_reg_info *reg,
+   char *buf, size_t count, loff_t pos, bool iswrite);
+
+struct rte_vfio_user_reg_info {
+   rte_vfio_user_reg_acc_t rw;
+   void *base;
+   int fd;
+   struct vfio_region_info *info;
+   void *priv;
+};
+
+struct rte_vfio_user_regions {
+   uint32_t reg_num;
+   struct rte_vfio_user_reg_info reg_info[];
+};
+
 /**
  *  Below APIs are for vfio-user server (device provider) to use:
  * *rte_vfio_user_register
  * *rte_vfio_user_unregister
  * *rte_vfio_user_start
+ * *rte_vfio_user_set_dev_info
+ * *rte_vfio_user_set_reg_info
  */
 
 /**
@@ -48,4 +70,42 @@ int rte_vfio_user_unregister(const char *sock_addr);
 __rte_experimental
 int rte_vfio_user_start(const char *sock_addr);
 
+/**
+ * Set the device information for a vfio-user device.
+ *
+ * This information must be set before calling rte_vfio_user_start, and should
+ * not be updated after start. Update after start can be done by unregistration
+ * and re-registration, and then the device-level change can be detected by
+ * vfio-user client.
+ *
+ * @param sock_addr
+ *   Unix domain socket address
+ * @param dev_info
+ *   Device information for the vfio-user device
+ * @return
+ *   0 on success, -1 on failure
+ */
+__rte_experimental
+int rte_vfio_user_set_dev_info(const char *sock_addr,
+   struct vfio_device_info *dev_info);
+
+/**
+ * Set the region information for a vfio-user device.
+ *
+ * This information must be set before calling rte_vfio_user_start, and should
+ * not be updated after start. Update after start can be done by unregistration
+ * and re-registration, and then the device-level change can be detected by
+ * vfio-user client.
+ *
+ * @param sock_addr
+ *   Unix domain socket address
+ * @param reg
+ *   Region information for the vfio-user device
+ * @return
+ *   0 on success, -1 on failure
+ */
+__rte_experimental
+int rte_vfio_user_set_reg_info(const char *sock_addr,
+   struct rte_vfio_user_regions *reg);
+
 #endif
diff --git a/lib/librte_vfio_user/version.map b/lib/librte_vfio_user/version.map
index e53095eda8..0f4f5acba5 100644
--- a/lib/librte_vfio_user/version.map
+++ b/lib/librte_vfio_user/version.map
@@ -4,6 +4,8 @@ EXPERIMENTAL {
rte_vfio_user_register;
rte_vfio_user_unregister;
rte_vfio_user_start;
+   rte_vfio_user_set_dev_info;
+   rte_vfio_user_set_reg_info;
 
local: *;
 };
diff --git a/lib/librte_vfio_user/vfio_user_base.c 
b/lib/librte_vfio_user/vfio_user_base.c
index bbad553e0a..4960589519 100644
--- a/lib/librte_vfio_user/vfio_user_base.c
+++ b/lib/librte_vfio_user/vfio_user_base.c
@@ -13,6 +13,18 @@ int vfio_user_log_level;
 const char *vfio_user_msg_str[VFIO_USER_MAX] = {
[VFIO_USER_NONE] = "VFIO_USER_NONE",
[VFIO_USER_VERSION] = "VFIO_USER_VERSION",
+   [VFIO_USER_DMA_MAP] = "VFIO_USER_DMA_MAP",
+   [VFIO_USER_DMA_UNMAP] = "VFIO_USER_DMA_UNMAP",
+   [VFIO_USER_DEVICE_GET_INFO] = "VFIO_USER_DEVICE_GET_INFO",
+   [VFIO_USER_DEVICE_GET_REGION_INFO] = "VFIO_USER_GET_REGION_INFO",
+   [VFIO_USER_DEVICE_GET_IRQ_INFO] = "VFIO_USER_DEVICE_GET_IRQ_INFO",
+   [VFIO_USER_DEVICE_SET_IRQS] = "VFIO_USER_DEVICE_SET_IRQS",
+   [VFIO_USER_REGION_READ] = "VFIO_USER_REGION_READ",
+   [VFIO_USER_REGION_WRITE] = "VFIO_USER_REGION_WRITE",
+   [VFIO_USER_DMA_READ] = "VFIO_USER_DMA_READ",
+   [VFIO_USER_DMA_WRITE] = "VFIO_USER_DMA_WRITE",
+   [VFIO_USER_VM_INTERRUPT] = "VFIO_USER_VM_INTERRUPT",
+   [VFIO_USER_DEVICE_RESET] = "VFIO_USER_DEVICE_RESET",
 };
 
 inline void vfio_user_close_msg_fds(VFIO_USER_MSG *msg)
diff --git a/lib/librte_vfio_user/vfio_user_base.h 
b/lib/librte_vfio_user/vfio_user_base.h
index 926cecfa7a..0d8abde816 100644
--

[dpdk-dev] [PATCH 4/9] vfio_user: implement DMA table and socket address API

2020-12-17 Thread Chenbo Xia
This patch introduces an API called rte_vfio_user_get_mem_table()
for emulated devices to acquire DMA memory table from vfio-user
library.

Notify operations are also introduced to notify the emulated
devices of several events. Another socket address API is introduced
for translation between device ID and socket address in notify
callbacks.

Signed-off-by: Chenbo Xia 
Signed-off-by: Xiuchun Lu 
---
 lib/librte_vfio_user/rte_vfio_user.h|  75 -
 lib/librte_vfio_user/version.map|   2 +
 lib/librte_vfio_user/vfio_user_base.h   |   2 +
 lib/librte_vfio_user/vfio_user_server.c | 363 +++-
 lib/librte_vfio_user/vfio_user_server.h |   3 +
 5 files changed, 437 insertions(+), 8 deletions(-)

diff --git a/lib/librte_vfio_user/rte_vfio_user.h 
b/lib/librte_vfio_user/rte_vfio_user.h
index 8a999c7aa0..044c43e7dc 100644
--- a/lib/librte_vfio_user/rte_vfio_user.h
+++ b/lib/librte_vfio_user/rte_vfio_user.h
@@ -5,10 +5,52 @@
 #ifndef _RTE_VFIO_USER_H
 #define _RTE_VFIO_USER_H
 
+#include 
+#include 
+#include 
 #include 
+#include 
 
 #include 
 
+#define RTE_VUSER_MAX_DMA 256
+
+struct rte_vfio_user_notify_ops {
+   /* Add device */
+   int (*new_device)(int dev_id);
+   /* Remove device */
+   void (*destroy_device)(int dev_id);
+   /* Update device status */
+   int (*update_status)(int dev_id);
+   /* Lock or unlock data path */
+   int (*lock_dp)(int dev_id, int lock);
+   /* Reset device */
+   int (*reset_device)(int dev_id);
+};
+
+struct rte_vfio_user_mem_reg {
+   uint64_t gpa;
+   uint64_t size;
+   uint64_t fd_offset;
+   uint32_t protection;/* attributes in  */
+#define RTE_VUSER_MEM_MAPPABLE (0x1 << 0)
+   uint32_t flags;
+};
+
+struct rte_vfio_user_mtb_entry {
+   uint64_t gpa;
+   uint64_t size;
+   uint64_t host_user_addr;
+   void *mmap_addr;
+   uint64_t mmap_size;
+   int fd;
+};
+
+struct rte_vfio_user_mem {
+   uint32_t entry_num;
+   struct rte_vfio_user_mtb_entry entry[RTE_VUSER_MAX_DMA];
+};
+
 struct rte_vfio_user_reg_info;
 
 typedef ssize_t (*rte_vfio_user_reg_acc_t)(struct rte_vfio_user_reg_info *reg,
@@ -32,6 +74,8 @@ struct rte_vfio_user_regions {
  * *rte_vfio_user_register
  * *rte_vfio_user_unregister
  * *rte_vfio_user_start
+ * *rte_vfio_get_sock_addr
+ * *rte_vfio_user_get_mem_table
  * *rte_vfio_user_set_dev_info
  * *rte_vfio_user_set_reg_info
  */
@@ -41,11 +85,14 @@ struct rte_vfio_user_regions {
  *
  * @param sock_addr
  *   Unix domain socket address
+ * @param ops
+ *   Notify ops for the device
  * @return
  *   0 on success, -1 on failure
  */
 __rte_experimental
-int rte_vfio_user_register(const char *sock_addr);
+int rte_vfio_user_register(const char *sock_addr,
+   const struct rte_vfio_user_notify_ops *ops);
 
 /**
  * Unregister a vfio-user device.
@@ -70,6 +117,17 @@ int rte_vfio_user_unregister(const char *sock_addr);
 __rte_experimental
 int rte_vfio_user_start(const char *sock_addr);
 
+/**
+ * Get the memory table of a vfio-user device.
+ *
+ * @param dev_id
+ *   Vfio-user device ID
+ * @return
+ *   Pointer to memory table on success, NULL on failure
+ */
+__rte_experimental
+const struct rte_vfio_user_mem *rte_vfio_user_get_mem_table(int dev_id);
+
 /**
  * Set the device information for a vfio-user device.
  *
@@ -108,4 +166,19 @@ __rte_experimental
 int rte_vfio_user_set_reg_info(const char *sock_addr,
struct rte_vfio_user_regions *reg);
 
+/**
+ * Get the socket address for a vfio-user device.
+ *
+ * @param dev_id
+ *   Vfio-user device ID
+ * @param[out] buf
+ *   Buffer to store socket address
+ * @param len
+ *   The length of the buffer
+ * @return
+ *   0 on success, -1 on failure
+ */
+__rte_experimental
+int rte_vfio_get_sock_addr(int dev_id, char *buf, size_t len);
+
 #endif
diff --git a/lib/librte_vfio_user/version.map b/lib/librte_vfio_user/version.map
index 0f4f5acba5..3a50b5ef0e 100644
--- a/lib/librte_vfio_user/version.map
+++ b/lib/librte_vfio_user/version.map
@@ -4,6 +4,8 @@ EXPERIMENTAL {
rte_vfio_user_register;
rte_vfio_user_unregister;
rte_vfio_user_start;
+   rte_vfio_get_sock_addr;
+   rte_vfio_user_get_mem_table;
rte_vfio_user_set_dev_info;
rte_vfio_user_set_reg_info;
 
diff --git a/lib/librte_vfio_user/vfio_user_base.h 
b/lib/librte_vfio_user/vfio_user_base.h
index 0d8abde816..5f5e651e87 100644
--- a/lib/librte_vfio_user/vfio_user_base.h
+++ b/lib/librte_vfio_user/vfio_user_base.h
@@ -9,6 +9,7 @@
 
 #include "rte_vfio_user.h"
 
+#define VFIO_USER_MSG_MAX_NREG 8
 #define VFIO_USER_VERSION_MAJOR 1
 #define VFIO_USER_VERSION_MINOR 0
 #define VFIO_USER_MAX_RSVD 512
@@ -79,6 +80,7 @@ typedef struct vfio_user_msg {
uint32_t err;   /* Valid in reply, optional */
union {
struct vfio_user_version ver;
+   struct rte_vfio_use

[dpdk-dev] [PATCH 5/9] vfio_user: implement interrupt related APIs

2020-12-17 Thread Chenbo Xia
This patch implements two interrupt related APIs, which are
rte_vfio_user_get_irq() and rte_vfio_user_set_irq_info().
The former is for devices to get interrupt configuration
(e.g., irqfds). The latter is for setting interrupt information
before vfio-user starts.

Signed-off-by: Chenbo Xia 
Signed-off-by: Xiuchun Lu 
---
 lib/librte_vfio_user/rte_vfio_user.h|  44 
 lib/librte_vfio_user/version.map|   2 +
 lib/librte_vfio_user/vfio_user_base.h   |   8 +
 lib/librte_vfio_user/vfio_user_server.c | 292 +++-
 lib/librte_vfio_user/vfio_user_server.h |   6 +
 5 files changed, 347 insertions(+), 5 deletions(-)

diff --git a/lib/librte_vfio_user/rte_vfio_user.h 
b/lib/librte_vfio_user/rte_vfio_user.h
index 044c43e7dc..6c12b0b6ed 100644
--- a/lib/librte_vfio_user/rte_vfio_user.h
+++ b/lib/librte_vfio_user/rte_vfio_user.h
@@ -69,6 +69,11 @@ struct rte_vfio_user_regions {
struct rte_vfio_user_reg_info reg_info[];
 };
 
+struct rte_vfio_user_irq_info {
+   uint32_t irq_num;
+   struct vfio_irq_info irq_info[];
+};
+
 /**
  *  Below APIs are for vfio-user server (device provider) to use:
  * *rte_vfio_user_register
@@ -76,8 +81,10 @@ struct rte_vfio_user_regions {
  * *rte_vfio_user_start
  * *rte_vfio_get_sock_addr
  * *rte_vfio_user_get_mem_table
+ * *rte_vfio_user_get_irq
  * *rte_vfio_user_set_dev_info
  * *rte_vfio_user_set_reg_info
+ * *rte_vfio_user_set_irq_info
  */
 
 /**
@@ -181,4 +188,41 @@ int rte_vfio_user_set_reg_info(const char *sock_addr,
 __rte_experimental
 int rte_vfio_get_sock_addr(int dev_id, char *buf, size_t len);
 
+/**
+ * Get the irqfds of a vfio-user device.
+ *
+ * @param dev_id
+ *   Vfio-user device ID
+ * @param index
+ *   irq index
+ * @param count
+ *   irq count
+ * @param[out] fds
+ *   Pointer to the irqfds
+ * @return
+ *   0 on success, -1 on failure
+ */
+__rte_experimental
+int rte_vfio_user_get_irq(int dev_id, uint32_t index, uint32_t count,
+   int *fds);
+
+/**
+ * Set the irq information for a vfio-user device.
+ *
+ * This information must be set before calling rte_vfio_user_start, and should
+ * not be updated after start. Update after start can be done by unregistration
+ * and re-registration, and then the device-level change can be detected by
+ * vfio-user client.
+ *
+ * @param sock_addr
+ *   Unix domain socket address
+ * @param irq
+ *   IRQ information for the vfio-user device
+ * @return
+ *   0 on success, -1 on failure
+ */
+__rte_experimental
+int rte_vfio_user_set_irq_info(const char *sock_addr,
+   struct rte_vfio_user_irq_info *irq);
+
 #endif
diff --git a/lib/librte_vfio_user/version.map b/lib/librte_vfio_user/version.map
index 3a50b5ef0e..621a51a9fc 100644
--- a/lib/librte_vfio_user/version.map
+++ b/lib/librte_vfio_user/version.map
@@ -6,8 +6,10 @@ EXPERIMENTAL {
rte_vfio_user_start;
rte_vfio_get_sock_addr;
rte_vfio_user_get_mem_table;
+   rte_vfio_user_get_irq;
rte_vfio_user_set_dev_info;
rte_vfio_user_set_reg_info;
+   rte_vfio_user_set_irq_info;
 
local: *;
 };
diff --git a/lib/librte_vfio_user/vfio_user_base.h 
b/lib/librte_vfio_user/vfio_user_base.h
index 5f5e651e87..7fed52a44e 100644
--- a/lib/librte_vfio_user/vfio_user_base.h
+++ b/lib/librte_vfio_user/vfio_user_base.h
@@ -61,6 +61,12 @@ struct vfio_user_reg {
uint8_t rsvd[VFIO_USER_MAX_RSVD];
 };
 
+struct vfio_user_irq_set {
+   struct vfio_irq_set set;
+   /* Reserved for data of irq set */
+   uint8_t rsvd[VFIO_USER_MAX_RSVD];
+};
+
 struct vfio_user_reg_rw {
uint64_t reg_offset;
uint32_t reg_idx;
@@ -83,6 +89,8 @@ typedef struct vfio_user_msg {
struct rte_vfio_user_mem_reg memory[VFIO_USER_MSG_MAX_NREG];
struct vfio_device_info dev_info;
struct vfio_user_reg reg_info;
+   struct vfio_irq_info irq_info;
+   struct vfio_user_irq_set irq_set;
struct vfio_user_reg_rw reg_rw;
} payload;
int fds[VFIO_USER_MAX_FD];
diff --git a/lib/librte_vfio_user/vfio_user_server.c 
b/lib/librte_vfio_user/vfio_user_server.c
index 1162e463b7..cbaf3b5ed5 100644
--- a/lib/librte_vfio_user/vfio_user_server.c
+++ b/lib/librte_vfio_user/vfio_user_server.c
@@ -9,6 +9,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "vfio_user_server.h"
 
@@ -301,6 +302,146 @@ static int vfio_user_device_get_reg_info(struct 
vfio_user_server *dev,
return 0;
 }
 
+static int vfio_user_device_get_irq_info(struct vfio_user_server *dev,
+   VFIO_USER_MSG *msg)
+{
+   struct vfio_irq_info *irq_info = &msg->payload.irq_info;
+   struct rte_vfio_user_irq_info *info = dev->irqs.info;
+   uint32_t i;
+
+   if (vfio_user_check_msg_fdnum(msg, 0) != 0)
+   return -EINVAL;
+
+   for (i = 0; i < info->irq_num; i++) {
+   if (irq_info->index == info->irq_info[i].index) {

[dpdk-dev] [PATCH 6/9] vfio_user: add client APIs of device attach/detach

2020-12-17 Thread Chenbo Xia
This patch implements two APIs, rte_vfio_user_attach_dev() and
rte_vfio_user_detach_dev() for vfio-user client to connect to
or disconnect from a vfio-user device on server side.

Signed-off-by: Chenbo Xia 
Signed-off-by: Xiuchun Lu 
---
 lib/librte_vfio_user/meson.build|   3 +-
 lib/librte_vfio_user/rte_vfio_user.h|  30 +++
 lib/librte_vfio_user/version.map|   2 +
 lib/librte_vfio_user/vfio_user_client.c | 279 
 lib/librte_vfio_user/vfio_user_client.h |  25 +++
 5 files changed, 338 insertions(+), 1 deletion(-)
 create mode 100644 lib/librte_vfio_user/vfio_user_client.c
 create mode 100644 lib/librte_vfio_user/vfio_user_client.h

diff --git a/lib/librte_vfio_user/meson.build b/lib/librte_vfio_user/meson.build
index b7363f61c6..5761f0edd1 100644
--- a/lib/librte_vfio_user/meson.build
+++ b/lib/librte_vfio_user/meson.build
@@ -6,5 +6,6 @@ if not is_linux
reason = 'only supported on Linux'
 endif
 
-sources = files('vfio_user_base.c', 'vfio_user_server.c')
+sources = files('vfio_user_base.c', 'vfio_user_server.c',
+   'vfio_user_client.c')
 headers = files('rte_vfio_user.h')
diff --git a/lib/librte_vfio_user/rte_vfio_user.h 
b/lib/librte_vfio_user/rte_vfio_user.h
index 6c12b0b6ed..b09d83e224 100644
--- a/lib/librte_vfio_user/rte_vfio_user.h
+++ b/lib/librte_vfio_user/rte_vfio_user.h
@@ -225,4 +225,34 @@ __rte_experimental
 int rte_vfio_user_set_irq_info(const char *sock_addr,
struct rte_vfio_user_irq_info *irq);
 
+/**
+ *  Below APIs are for vfio-user client (device consumer) to use:
+ * *rte_vfio_user_attach_dev
+ * *rte_vfio_user_detach_dev
+ */
+
+/**
+ * Attach to a vfio-user device.
+ *
+ * @param sock_addr
+ *   Unix domain socket address
+ * @return
+ *   - >=0: Success, device attached. Returned value is the device ID.
+ *   - <0: Failure on device attach
+ */
+__rte_experimental
+int rte_vfio_user_attach_dev(const char *sock_addr);
+
+/**
+ * Detach from a vfio-user device.
+ *
+ * @param dev_id
+ *   Device ID of the vfio-user device
+ * @return
+ *   - 0: Success, device detached
+ *   - <0: Failure on device detach
+ */
+__rte_experimental
+int rte_vfio_user_detach_dev(int dev_id);
+
 #endif
diff --git a/lib/librte_vfio_user/version.map b/lib/librte_vfio_user/version.map
index 621a51a9fc..a0cda2b49c 100644
--- a/lib/librte_vfio_user/version.map
+++ b/lib/librte_vfio_user/version.map
@@ -10,6 +10,8 @@ EXPERIMENTAL {
rte_vfio_user_set_dev_info;
rte_vfio_user_set_reg_info;
rte_vfio_user_set_irq_info;
+   rte_vfio_user_attach_dev;
+   rte_vfio_user_detach_dev;
 
local: *;
 };
diff --git a/lib/librte_vfio_user/vfio_user_client.c 
b/lib/librte_vfio_user/vfio_user_client.c
new file mode 100644
index 00..85b2e8cb46
--- /dev/null
+++ b/lib/librte_vfio_user/vfio_user_client.c
@@ -0,0 +1,279 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "vfio_user_client.h"
+#include "rte_vfio_user.h"
+
+#define REPLY_USEC 1000
+#define RECV_MAX_TRY 50
+
+static struct vfio_user_client_devs vfio_client_devs = {
+   .cl_num = 0,
+   .mutex = PTHREAD_MUTEX_INITIALIZER,
+};
+
+/* Check if the sock_addr exists. If not, alloc and return index */
+static int vfio_user_client_allocate(const char *sock_addr)
+{
+   uint32_t i, count = 0;
+   int index = -1;
+
+   if (sock_addr == NULL)
+   return -1;
+
+   if (vfio_client_devs.cl_num == 0)
+   return 0;
+
+   for (i = 0; i < MAX_VFIO_USER_CLIENT; i++) {
+   struct vfio_user_client *cl = vfio_client_devs.cl[i];
+
+   if (!cl) {
+   if (index == -1)
+   index = i;
+   continue;
+   }
+
+   if (!strcmp(cl->sock.sock_addr, sock_addr))
+   return -1;
+
+   count++;
+   if (count == vfio_client_devs.cl_num)
+   break;
+   }
+
+   return index;
+}
+
+static struct vfio_user_client *
+vfio_user_client_create_dev(const char *sock_addr)
+{
+   struct vfio_user_client *cl;
+   struct vfio_user_socket *sock;
+   int fd, idx;
+   struct sockaddr_un un;
+
+   pthread_mutex_lock(&vfio_client_devs.mutex);
+   if (vfio_client_devs.cl_num == MAX_VFIO_USER_CLIENT) {
+   VFIO_USER_LOG(ERR, "Failed to create client:"
+   " client num reaches max\n");
+   goto err;
+   }
+
+   idx = vfio_user_client_allocate(sock_addr);
+   if (idx < 0) {
+   VFIO_USER_LOG(ERR, "Failed to create client:"
+   "socket addr exists\n");
+   goto err;
+

[dpdk-dev] [PATCH 7/9] vfio_user: add client APIs of DMA/IRQ/region

2020-12-17 Thread Chenbo Xia
This patch introduces nine APIs
- Device related:
  rte_vfio_user_get_dev_info and rte_vfio_user_reset
- DMA related:
  rte_vfio_user_dma_map/unmap
- Region related:
  rte_vfio_user_get_reg_info and rte_vfio_user_region_read/write
- IRQ related:
  rte_vfio_user_get_irq_info and rte_vfio_user_set_irqs

Signed-off-by: Chenbo Xia 
Signed-off-by: Xiuchun Lu 
---
 lib/librte_vfio_user/rte_vfio_user.h| 168 ++
 lib/librte_vfio_user/version.map|   9 +
 lib/librte_vfio_user/vfio_user_client.c | 412 
 3 files changed, 589 insertions(+)

diff --git a/lib/librte_vfio_user/rte_vfio_user.h 
b/lib/librte_vfio_user/rte_vfio_user.h
index b09d83e224..fe27d05992 100644
--- a/lib/librte_vfio_user/rte_vfio_user.h
+++ b/lib/librte_vfio_user/rte_vfio_user.h
@@ -229,6 +229,15 @@ int rte_vfio_user_set_irq_info(const char *sock_addr,
  *  Below APIs are for vfio-user client (device consumer) to use:
  * *rte_vfio_user_attach_dev
  * *rte_vfio_user_detach_dev
+ * *rte_vfio_user_get_dev_info
+ * *rte_vfio_user_get_reg_info
+ * *rte_vfio_user_get_irq_info
+ * *rte_vfio_user_dma_map
+ * *rte_vfio_user_dma_unmap
+ * *rte_vfio_user_set_irqs
+ * *rte_vfio_user_region_read
+ * *rte_vfio_user_region_write
+ * *rte_vfio_user_reset
  */
 
 /**
@@ -255,4 +264,163 @@ int rte_vfio_user_attach_dev(const char *sock_addr);
 __rte_experimental
 int rte_vfio_user_detach_dev(int dev_id);
 
+/**
+ * Get device information of a vfio-user device.
+ *
+ * @param dev_id
+ *   Device ID of the vfio-user device
+ * @param[out] info
+ *   A pointer to a structure of type *vfio_device_info* to be filled with the
+ *   information of the device.
+ * @return
+ *   - 0: Success, device information updated
+ *   - <0: Failure on get device information
+ */
+__rte_experimental
+int rte_vfio_user_get_dev_info(int dev_id, struct vfio_device_info *info);
+
+/**
+ * Get region information of a vfio-user device.
+ *
+ * @param dev_id
+ *   Device ID of the vfio-user device
+ * @param[out] info
+ *   A pointer to a structure of type *vfio_region_info* to be filled with the
+ *   information of the device region.
+ * @param[out] fd
+ *   A pointer to the file descriptor of the region
+ * @return
+ *   - 0: Success, region information and file descriptor updated. If the 
region
+ *can not be mmaped, the file descriptor should be -1.
+ *   - <0: Failure on get region information
+ */
+__rte_experimental
+int rte_vfio_user_get_reg_info(int dev_id, struct vfio_region_info *info,
+   int *fd);
+
+/**
+ * Get IRQ information of a vfio-user device.
+ *
+ * @param dev_id
+ *   Device ID of the vfio-user device
+ * @param[out] info
+ *   A pointer to a structure of type *vfio_irq_info* to be filled with the
+ *   information of the IRQ.
+ * @return
+ *   - 0: Success, IRQ information updated
+ *   - <0: Failure on get IRQ information
+ */
+__rte_experimental
+int rte_vfio_user_get_irq_info(int dev_id, struct vfio_irq_info *info);
+
+/**
+ * Map DMA regions for the vfio-user device.
+ *
+ * @param dev_id
+ *   Device ID of the vfio-user device
+ * @param mem
+ *   A pointer to a structure of type *vfio_user_mem_reg* that identifies
+ *   one or several DMA regions.
+ * @param fds
+ *   A pointer to a list of file descriptors. One file descriptor maps to
+ *   one DMA region.
+ * @param num
+ *   Number of DMA regions (or file descriptors)
+ * @return
+ *   - 0: Success, all DMA regions are mapped.
+ *   - <0: Failure on DMA map. It should be assumed that all DMA regions
+ * are not mapped.
+ */
+__rte_experimental
+int rte_vfio_user_dma_map(int dev_id, struct rte_vfio_user_mem_reg *mem,
+   int *fds, uint32_t num);
+
+/**
+ * Unmap DMA regions for the vfio-user device.
+ *
+ * @param dev_id
+ *   Device ID of the vfio-user device
+ * @param mem
+ *   A pointer to a structure of type *vfio_user_mem_reg* that identifies
+ *   one or several DMA regions.
+ * @param num
+ *   Number of DMA regions
+ * @return
+ *   - 0: Success, all DMA regions are unmapped.
+ *   - <0: Failure on DMA unmap. It should be assumed that all DMA regions
+ * are not unmapped.
+ */
+__rte_experimental
+int rte_vfio_user_dma_unmap(int dev_id, struct rte_vfio_user_mem_reg *mem,
+   uint32_t num);
+
+/**
+ * Set interrupt signaling, masking, and unmasking for the vfio-user device.
+ *
+ * @param dev_id
+ *   Device ID of the vfio-user device
+ * @param set
+ *   A pointer to a structure of type *vfio_irq_set* that specifies the set
+ *   data and action
+ * @return
+ *   - 0: Success, IRQs are set successfully.
+ *   - <0: Failure on IRQ set.
+ */
+__rte_experimental
+int rte_vfio_user_set_irqs(int dev_id, struct vfio_irq_set *set);
+
+/**
+ * Read region of the vfio-user device.
+ *
+ * @param dev_id
+ *   Device ID of the vfio-user device
+ * @param idx
+ *   The region index
+ * @param offset
+ *   The region offset
+ * @param size
+ *   Size of the read data
+ 

[dpdk-dev] [PATCH 8/9] test/vfio_user: introduce functional test

2020-12-17 Thread Chenbo Xia
This patch introduces functional test for vfio_user client and
server. Note that the test can only be run with server and client
both started and server should be started first.

Signed-off-by: Chenbo Xia 
Signed-off-by: Xiuchun Lu 
---
 app/test/meson.build  |   4 +
 app/test/test_vfio_user.c | 646 ++
 2 files changed, 650 insertions(+)
 create mode 100644 app/test/test_vfio_user.c

diff --git a/app/test/meson.build b/app/test/meson.build
index 94fd39fecb..f5b15ac44c 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -138,6 +138,7 @@ test_sources = files('commands.c',
'test_trace.c',
'test_trace_register.c',
'test_trace_perf.c',
+   'test_vfio_user.c',
'test_version.c',
'virtual_pmd.c'
 )
@@ -173,6 +174,7 @@ test_deps = ['acl',
'ring',
'security',
'stack',
+   'vfio_user',
'telemetry',
'timer'
 ]
@@ -266,6 +268,8 @@ fast_tests = [
 ['service_autotest', true],
 ['thash_autotest', true],
 ['trace_autotest', true],
+['vfio_user_autotest_client', false],
+['vfio_user_autotest_server', false],
 ]
 
 perf_test_names = [
diff --git a/app/test/test_vfio_user.c b/app/test/test_vfio_user.c
new file mode 100644
index 00..ee245e437d
--- /dev/null
+++ b/app/test/test_vfio_user.c
@@ -0,0 +1,646 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "test.h"
+
+#define REGION_SIZE 0x100
+
+struct server_mem_tb {
+   uint32_t entry_num;
+   struct rte_vfio_user_mtb_entry entry[];
+};
+
+static const char test_sock[] = "/tmp/dpdk_vfio_test";
+struct server_mem_tb *server_mem;
+int server_irqfd;
+atomic_uint test_failed;
+atomic_uint server_destroyed;
+
+static int test_set_dev_info(const char *sock,
+   struct vfio_device_info *info)
+{
+   int ret;
+
+   info->argsz = sizeof(*info);
+   info->flags = VFIO_DEVICE_FLAGS_RESET | VFIO_DEVICE_FLAGS_PCI;
+   info->num_irqs = VFIO_PCI_NUM_IRQS;
+   info->num_regions = VFIO_PCI_NUM_REGIONS;
+   ret = rte_vfio_user_set_dev_info(sock, info);
+   if (ret) {
+   printf("Failed to set device info\n");
+   return -1;
+   }
+
+   return 0;
+}
+
+static ssize_t test_dev_cfg_rw(struct rte_vfio_user_reg_info *reg, char *buf,
+   size_t count, loff_t pos, bool iswrite)
+{
+   char *loc = (char *)reg->base + pos;
+
+   if (!iswrite) {
+   if (pos + count > reg->info->size)
+   return -1;
+   memcpy(buf, loc, count);
+   return count;
+   }
+
+   memcpy(loc, buf, count);
+   return count;
+}
+
+static int test_set_reg_info(const char *sock_addr,
+   struct rte_vfio_user_regions *reg)
+{
+   struct rte_vfio_user_reg_info *reg_info;
+   void *cfg_base = NULL;
+   uint32_t i, j, sz = 0, reg_sz = REGION_SIZE;
+   int ret;
+
+   reg->reg_num = VFIO_PCI_NUM_REGIONS;
+   sz = sizeof(struct vfio_region_info);
+
+   for (i = 0; i < reg->reg_num; i++) {
+   reg_info = ®->reg_info[i];
+
+   reg_info->info = rte_zmalloc(NULL, sz, 0);
+   if (!reg_info->info) {
+   printf("Failed to alloc vfio region info\n");
+   goto err;
+   }
+
+   reg_info->priv = NULL;
+   reg_info->fd = -1;
+   reg_info->info->argsz = sz;
+   reg_info->info->cap_offset = sz;
+   reg_info->info->index = i;
+   reg_info->info->offset = 0;
+   reg_info->info->flags = VFIO_REGION_INFO_FLAG_READ |
+   VFIO_REGION_INFO_FLAG_WRITE;
+
+   if (i == VFIO_PCI_CONFIG_REGION_INDEX) {
+   cfg_base = rte_zmalloc(NULL, reg_sz, 0);
+   if (!cfg_base) {
+   printf("Failed to alloc cfg space\n");
+   goto err;
+   }
+   reg_info->base = cfg_base;
+   reg_info->rw = test_dev_cfg_rw;
+   reg_info->info->size = reg_sz;
+   } else {
+   reg_info->base = NULL;
+   reg_info->rw = NULL;
+   reg_info->info->size = 0;
+   }
+   }
+
+   ret = rte_vfio_user_set_reg_info(sock_addr, reg);
+   if (ret) {
+   pri

[dpdk-dev] [PATCH 9/9] doc: add vfio-user library guide

2020-12-17 Thread Chenbo Xia
Add vfio-user library guide and update release notes.

Signed-off-by: Chenbo Xia 
Signed-off-by: Xiuchun Lu 
---
 doc/guides/prog_guide/index.rst |   1 +
 doc/guides/prog_guide/vfio_user_lib.rst | 215 
 doc/guides/rel_notes/release_21_02.rst  |  11 ++
 3 files changed, 227 insertions(+)
 create mode 100644 doc/guides/prog_guide/vfio_user_lib.rst

diff --git a/doc/guides/prog_guide/index.rst b/doc/guides/prog_guide/index.rst
index 45c7dec88d..f9847b1058 100644
--- a/doc/guides/prog_guide/index.rst
+++ b/doc/guides/prog_guide/index.rst
@@ -70,3 +70,4 @@ Programmer's Guide
 lto
 profile_app
 glossary
+vfio_user_lib
diff --git a/doc/guides/prog_guide/vfio_user_lib.rst 
b/doc/guides/prog_guide/vfio_user_lib.rst
new file mode 100644
index 00..6daec4d8e5
--- /dev/null
+++ b/doc/guides/prog_guide/vfio_user_lib.rst
@@ -0,0 +1,215 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+Copyright(c) 2020 Intel Corporation.
+
+Vfio User Library
+=
+
+The vfio-user library implements the vfio-user protocol, which is a protocol
+that allows an I/O device to be emulated in a separate process outside of a
+Virtual Machine Monitor (VMM). The protocol has a client/server model, in which
+the server emulates the device and the client (e.g., VMM) consumes the device.
+Vfio-user library uses the device model of Linux kernel VFIO and core concepts
+defined in its API. The main difference between kernel VFIO and vfio-user is
+that the device consumer uses messages over a UNIX domain socket instead of
+system calls in vfio-user.
+
+The vfio-user library is used to construct and consume emulated devices. The
+server side implementation is mainly for construction of devices and the client
+side implementation is mainly for consumption and manipulation of devices. You
+use server APIs mainly for two things: provide the device information (e.g.,
+region/irq information) to vfio-user library and acquire the configuration
+(e.g., DMA table) from client. To construct a device, you could only focus on
+the device abstraction that vfio-user library defines rather than how the
+server side communicated with client. You use client APIs mainly for acquiring
+the device information and configuring the device. The client API usage is
+almost the same as the kernel VFIO ioctl.
+
+
+Vfio User Server API Overview
+--
+
+The following is an overview of key Vfio User Server API functions. You will
+know how to build an emulated device with this overview.
+
+There are mainly four steps of using Vfio User API to build your device:
+
+1. Register
+
+This step includes one API in Vfio User.
+
+* ``rte_vfio_user_register(sock_addr, notify_ops)``
+
+  This function registers a session to communicate with vfio-user client. A
+  session maps to one device so that a device instance will be created upon
+  registration.
+
+  ``sock_addr`` specifies the Unix domain socket file path and is the identity
+  of the session.
+
+  ``notify_ops`` is the a set of callbacks for vfio-user library to notify
+  emulated device. Currently, there are five callbacks:
+
+  - ``new_device``
+This callback is invoked when the device is configured and becomes ready.
+The dev_id is for vfio-user library to identify one uniqueue device.
+
+  - ``destroy_device``
+This callback is invoked when the device is destroyed. In most cases, it
+means the client is disconnected from the server.
+
+  - ``update_status``
+This callback is invoked when the device configuration is updated (e.g.,
+DMA table/IRQ update)
+
+  - ``lock_dp``
+This callback is invoked when data path needs to be locked or unlocked.
+
+  - ``reset_device``
+This callback is invoked when the emulated device need reset.
+
+2. Set device information
+
+This step includes three APIs in Vfio User.
+
+* ``rte_vfio_user_set_dev_info(sock_addr, dev_info)``
+
+  This function sets the device information to vfio-user library. The device
+  information is defined in Linux VFIO which mainly consists of device type
+  and the number of vfio regions and IRQs.
+
+* ``rte_vfio_user_set_reg_info(sock_addr, reg)``
+
+  This function sets the vfio region information to vfio-user library. Regions
+  should be created before using this API. The information mainly includes the
+  process virtual address, size, file descriptor, attibutes and capabilities of
+  regions.
+
+* ``rte_vfio_user_set_irq_info(sock_addr, irq)``
+
+  This function sets the IRQ information to vfio-user library. The information
+  includes how many IRQ type the device supports (e.g., MSI/MSI-X) and the IRQ
+  count of each type.
+
+3. Start
+
+This step includes one API in Vfio User.
+
+* ``rte_vfio_user_start(sock_addr)``
+
+  This function starts the registered session with vfio-user client. This means
+  a control thread will start to listen and handle messages sent from the 
client.
+  Note that only one thread is created for all vfio-user based de

[dpdk-dev] [PATCH 0/8] Introduce emudev library and iavf emudev driver

2020-12-18 Thread Chenbo Xia
This series introduces a new device abstraction called emudev for emulated
devices. A new library (librte_emudev) is implemented. The first emudev
driver is also introduced, which emulates Intel Adaptive Virtual Function
(iavf) as a software network device.

This series has a dependency on librte_vfio_user patch series:
http://patchwork.dpdk.org/cover/85389/

Background & Motivation 
---
The disaggregated/multi-process QEMU is using VFIO-over-socket/vfio-user
as the main transport mechanism to disaggregate IO services from QEMU.
Therefore, librte_vfio_user is introduced in DPDK to accommodate
emulated devices for high performance I/O. Although vfio-user library
provides possibility of emulating devices in DPDK, DPDK does not have
a device abstraction for emulated devices. A good device abstraction will
be useful for applications or high performance data path driver. With
this consideration, emudev library is designed and implemented. It also
make it possbile to keep modular design on emulated devices by implementing
data path related logic in a standalone driver (e.g., an ethdev driver)
and keeps the unrelated logic in the emudev driver.

Design overview
---

+---+
|   +---++---+  |
|   |  iavf_emudev  |<-->| data path |  |
|   |driver ||   driver  |  |
|   +---++---+  |
|   |   |
| --- VDEV BUS  |
|   |   |
|   +---+   |
+--+|   | vdev: |   |
| +--+ ||   | /path/to/vfio |   |
| | Generic  | ||   +---+   |
| | vfio-dev | ||   |   |
| +--+ ||   |   |
| +--+ ||  +--+ |
| | vfio-user| ||  | vfio-user| |
| | client   | |<---|->| server   | |
| +--+ ||  +--+ |
| QEMU/DPDK|| DPDK  |
+--++---+

- Generic vfio-dev/vfio-user client/vfio-user server
  Above concepts are all introduced in librte_vfio_user patch series:
  http://patchwork.dpdk.org/cover/85389/

- vdev:/path/to/vfio.
  It binds to vdev bus driver. The vdev device is defined by DPDK applications
  through command line as '--vdev=emu_iavf, path=/path/to/socket' in iavf_emudev
  case. Parameters in command line include device name (emu_iavf) which is used
  to identify corresponding driver (in this case, iavf_emudev driver),
  path=/path/to/socket which is used to open the transport interface to 
vfio-user
  client in QEMU/DPDK.

- data path driver.
  The data path handling is splited to another standalone driver for modular
  design.


Chenbo Xia (8):
  lib: introduce emudev library
  doc: add emudev library guide
  emu: introduce emulated iavf driver
  emu/iavf: add vfio-user device register and unregister
  emu/iavf: add resource management and internal logic of iavf
  emu/iavf: add emudev operations to fit in emudev framework
  test/emudev: introduce functional test
  doc: update release notes for iavf emudev driver

 MAINTAINERS|   12 +
 app/test/meson.build   |5 +-
 app/test/test_emudev.c |   29 +
 doc/guides/prog_guide/emudev.rst   |  122 +++
 doc/guides/prog_guide/index.rst|1 +
 doc/guides/rel_notes/release_21_02.rst |   16 +
 drivers/emu/iavf/iavf_emu.c|  250 ++
 drivers/emu/iavf/iavf_emu_internal.h   |   69 ++
 drivers/emu/iavf/iavf_emu_test.c   |  174 
 drivers/emu/iavf/iavf_emudev.c |  237 ++
 drivers/emu/iavf/iavf_vfio_user.c  | 1053 
 drivers/emu/iavf/iavf_vfio_user.h  |   57 ++
 drivers/emu/iavf/meson.build   |   17 +
 drivers/emu/iavf/rte_iavf_emu.h|  119 +++
 drivers/emu/iavf/version.map   |3 +
 drivers/emu/meson.build|6 +
 drivers/meson.build|1 +
 lib/librte_emudev/meson.build  |5 +
 lib/librte_emudev/rte_emudev.c |  486 +++
 lib/librte_emudev/rte_emudev.h |  410 +
 lib/librte_emudev/rte_emudev_vdev.h|   53 ++
 lib/librte_emudev/version.map  |   27 +
 lib/meson.build|2 +-
 23 files changed, 3152 insertions(+), 2 deletions(-)
 create mode 100644 app/test/test_emudev.c
 create mode 100644 doc/guides/prog_guide/emudev.rst
 create mode 100644 drivers/emu/iavf/iavf_emu.c
 create mo

[dpdk-dev] [PATCH 1/8] lib: introduce emudev library

2020-12-18 Thread Chenbo Xia
This patch introduces the emudev library. Emudev library is used
to abstract an emulated device, whose type could be general
(e.g., network, crypto and etc.). Several device-level APIs are
implemented to use or manipulate the device. It can be attached
to another data path driver (e.g., ethdev driver) to plug in its
high performance data path.

Signed-off-by: Chenbo Xia 
Signed-off-by: Xiuchun Lu 
Signed-off-by: Miao Li 
---
 MAINTAINERS |   5 +
 lib/librte_emudev/meson.build   |   5 +
 lib/librte_emudev/rte_emudev.c  | 486 
 lib/librte_emudev/rte_emudev.h  | 410 +++
 lib/librte_emudev/rte_emudev_vdev.h |  53 +++
 lib/librte_emudev/version.map   |  27 ++
 lib/meson.build |   2 +-
 7 files changed, 987 insertions(+), 1 deletion(-)
 create mode 100644 lib/librte_emudev/meson.build
 create mode 100644 lib/librte_emudev/rte_emudev.c
 create mode 100644 lib/librte_emudev/rte_emudev.h
 create mode 100644 lib/librte_emudev/rte_emudev_vdev.h
 create mode 100644 lib/librte_emudev/version.map

diff --git a/MAINTAINERS b/MAINTAINERS
index 5fb4880758..1b395e181d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1545,6 +1545,11 @@ M: Chenbo Xia 
 M: Xiuchun Lu 
 F: lib/librte_vfio_user/
 
+Emudev - EXPERIMENTAL
+M: Chenbo Xia 
+M: Xiuchun Lu 
+F: lib/librte_emudev/
+
 Test Applications
 -
 
diff --git a/lib/librte_emudev/meson.build b/lib/librte_emudev/meson.build
new file mode 100644
index 00..4e16cecbaf
--- /dev/null
+++ b/lib/librte_emudev/meson.build
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2020 Intel Corporation
+
+sources = files('rte_emudev.c')
+headers = files('rte_emudev.h', 'rte_emudev_vdev.h')
diff --git a/lib/librte_emudev/rte_emudev.c b/lib/librte_emudev/rte_emudev.c
new file mode 100644
index 00..2bbf3970d8
--- /dev/null
+++ b/lib/librte_emudev/rte_emudev.c
@@ -0,0 +1,486 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include 
+
+#include 
+#include 
+
+#include "rte_emudev.h"
+
+#define RTE_MAX_EMU_DEV 1024
+struct rte_emudev rte_emu_devices[RTE_MAX_EMU_DEV];
+
+static struct rte_emudev_global emu_dev_globals = {
+   .nb_devs = 0
+};
+
+static inline uint16_t rte_emu_alloc_dev_id(void)
+{
+   unsigned int i;
+
+   for (i = 0; i < RTE_MAX_EMU_DEV; i++) {
+   if (rte_emu_devices[i].name[0] == '\0')
+   return i;
+   }
+   return RTE_MAX_EMU_DEV;
+}
+
+uint8_t
+rte_emudev_count(void)
+{
+   return emu_dev_globals.nb_devs;
+}
+
+int
+rte_emudev_get_dev_id(const char *name)
+{
+   uint16_t i;
+
+   if (!name) {
+   RTE_EMUDEV_LOG(ERR, "Failed to get device ID: "
+   "NULL device name\n");
+   return -EINVAL;
+   }
+
+   for (i = 0; i < emu_dev_globals.nb_devs; i++)
+   if (!strncmp(rte_emu_devices[i].name, name,
+   RTE_EMU_NAME_MAX_LEN))
+   return i;
+
+   return -ENODEV;
+}
+
+struct rte_emudev *
+rte_emudev_allocate(const char *name)
+{
+   uint16_t dev_id;
+   struct rte_emudev *emu_dev = NULL;
+   size_t name_len;
+
+   if (!name) {
+   RTE_EMUDEV_LOG(ERR, "Failed to allocate emudev: "
+   "NULL device name\n");
+   return NULL;
+   }
+
+   name_len = strnlen(name, RTE_EMU_NAME_MAX_LEN);
+   if (!name_len) {
+   RTE_EMUDEV_LOG(ERR, "Emulated device name has zero length\n");
+   return NULL;
+   }
+
+   if (name_len >= RTE_EMU_NAME_MAX_LEN) {
+   RTE_EMUDEV_LOG(ERR, "Emulated device name too long\n");
+   return NULL;
+   }
+
+   if (rte_emudev_allocated(name) != NULL) {
+   RTE_EMUDEV_LOG(ERR,
+   "Emulated device with name %s already exists\n",
+   name);
+   return NULL;
+   }
+
+   dev_id = rte_emu_alloc_dev_id();
+   if (dev_id == RTE_MAX_EMU_DEV) {
+   RTE_EMUDEV_LOG(ERR, "Reached max number of Emulated device\n");
+   return NULL;
+   }
+
+   emu_dev = &rte_emu_devices[dev_id];
+   strncpy(emu_dev->name, name, sizeof(emu_dev->name));
+   emu_dev->dev_id = dev_id;
+   emu_dev_globals.nb_devs++;
+
+   return emu_dev;
+}
+
+int
+rte_emudev_release(struct rte_emudev *dev)
+{
+   if (!dev)
+   return -EINVAL;
+
+   if (dev->priv_data) {
+   rte_free(dev->priv_data);
+   dev->priv_data = NULL;
+   }
+
+   memset(dev, 0, sizeof(*dev));
+   emu_dev_globals.nb_devs--;
+   return 0;
+}
+
+struct rte_emudev *
+rte_emudev_allocated(const char *

[dpdk-dev] [PATCH 2/8] doc: add emudev library guide

2020-12-18 Thread Chenbo Xia
Add emudev library guide and update release notes.

Signed-off-by: Chenbo Xia 
---
 doc/guides/prog_guide/emudev.rst   | 122 +
 doc/guides/prog_guide/index.rst|   1 +
 doc/guides/rel_notes/release_21_02.rst |  12 +++
 3 files changed, 135 insertions(+)
 create mode 100644 doc/guides/prog_guide/emudev.rst

diff --git a/doc/guides/prog_guide/emudev.rst b/doc/guides/prog_guide/emudev.rst
new file mode 100644
index 00..91ad520de7
--- /dev/null
+++ b/doc/guides/prog_guide/emudev.rst
@@ -0,0 +1,122 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+Copyright(c) 2020 Intel Corporation.
+
+Emulated Device Library
+=
+
+Introduction
+
+
+The DPDK Emudev library is an abstraction for emulated device. This library
+provides a generic set of APIs for device provider, data path provider and
+applications to use.
+
+A device provider could be implemented as a driver on vdev bus. It should
+expose itself as an emudev for applications to use. It is responsible for the
+device resource management and the device's internal logic. All specifics of a
+device, except data path handling, should be implemented in the device
+provider. The device provider uses emudev APIs mainly for create/destroy an
+emudev instance. The device provider should also use a tranport to communicate
+with device consumer (e.g., virtual machine monitor or container). A potential
+choice could be vfio-user library, which implements the vfio-user protocol for
+emulating devices outside of a virtual machine monitor.
+
+A data path provider could be implemented as any type of driver on vdev bus.
+If the device you want to emulate is a network device, you could implement
+it as an ethdev driver. It is responsible for all data path handling. The data
+path provider uses emudev APIs mainly for getting device-related information
+from the device provider.
+
+Applications uses emudev APIs for device lifecycle management and 
configuration.
+
+Design
+
+
+Some key objects are designed in emudev.
+
+  ``Regions`` are the device layout exposed to the data path provider.
+
+  ``Queues`` are the data path queues that the data path provider needs. Queue
+  information includes queue base address, queue size, queue-related doorbell
+  and interrupt information.
+
+  ``Memory Table`` is the DMA mapping table. The data path provider could use
+  it to perform DMA read/write on device consumer's memory.
+
+Information of above key objects could be acquired through emudev APIs. The
+following will introduce the emudev APIs which are used by data path provider
+and applications. The APIs for device provider to use are allocate/release APIs
+and will not be listed because it's similar to other device abstraction.
+
+There are five categories of APIs:
+
+1. Lifecycle management
+
+* ``rte_emu_dev_start(dev_id)``
+* ``rte_emu_dev_stop(dev_id)``
+* ``rte_emu_dev_configure(dev_id)``
+* ``rte_emu_dev_close(dev_id)``
+
+  Above APIs are respectively for device start/stop/configure/close and mainly
+  for applications to use.
+
+  ``dev_id`` is the emudev device ID.
+
+2. Notification
+
+* ``rte_emu_subscribe_event(dev_id, ev_chnl)``
+* ``rte_emu_unsubscribe_event(dev_id, ev_chnl)``
+
+  Above APIs are for data path provider and applications to register events.
+  The mechanism of event notification could be different in different device
+  providers. A possbile implementation could be event callbacks.
+
+  ``ev_chnl`` is the event channel pointer. The definition varies between
+  different devices.
+
+3. Region-related
+
+* ``rte_emu_region_map(dev_id, index, region_size, base_addr)``
+* ``rte_emu_get_attr(dev_id, attr_name, attr)``
+* ``rte_emu_set_attr(dev_id, attr_name, attr)``
+
+  Above APIs are for data path provider and applications to read/write regions.
+  ``rte_emu_region_map`` is for directly mapping the region and use the mapped
+  address to read/write it. ``rte_emu_get_attr`` and ``rte_emu_set_attr`` are
+  respectively for getting/setting certain attributes in all regions.
+
+  Applications will set attributes or write regions for device configuration.
+
+  In ``rte_emu_region_map``:
+  - ``index`` is the region index.
+  - ``region_size`` is for saving the size of mapped region.
+  - ``base_addr`` is for saving the address of mapped region.
+
+  In ``rte_emu_get_attr`` and ``rte_emu_set_attr``:
+  - ``attr_name`` is the name of attribute. Note that attribute names are 
aligned
+  between device provider and data path provider for the same device.
+  - ``attr`` is the attribute value.
+
+4. Queue-related
+
+* ``rte_emu_get_queue_info(dev_id, queue, info)``
+* ``rte_emu_get_irq_info(dev_id, irq, info)``
+* ``rte_emu_get_db_info(dev_id, doorbell, info)``
+
+  Above APIs are for data path provider to get queue/interrupt/doorbell 
information.
+
+  - ``queue``, ``irq`` and ``doorbell`` are respectively the 
queue/interrupt/doorbell
+  index.
+  - ``info`` is for s

[dpdk-dev] [PATCH 3/8] emu: introduce emulated iavf driver

2020-12-18 Thread Chenbo Xia
This patch introduces emulated iavf driver. It is a vdev driver
emulating all iavf device behavior except data path handling.

Signed-off-by: Chenbo Xia 
Signed-off-by: Xiuchun Lu 
---
 MAINTAINERS  |   7 +
 drivers/emu/iavf/iavf_emu.c  |  29 
 drivers/emu/iavf/iavf_emu_internal.h |  49 +++
 drivers/emu/iavf/iavf_emudev.c   | 207 +++
 drivers/emu/iavf/meson.build |   8 ++
 drivers/emu/iavf/rte_iavf_emu.h  |  43 ++
 drivers/emu/iavf/version.map |   3 +
 drivers/emu/meson.build  |   6 +
 drivers/meson.build  |   1 +
 9 files changed, 353 insertions(+)
 create mode 100644 drivers/emu/iavf/iavf_emu.c
 create mode 100644 drivers/emu/iavf/iavf_emu_internal.h
 create mode 100644 drivers/emu/iavf/iavf_emudev.c
 create mode 100644 drivers/emu/iavf/meson.build
 create mode 100644 drivers/emu/iavf/rte_iavf_emu.h
 create mode 100644 drivers/emu/iavf/version.map
 create mode 100644 drivers/emu/meson.build

diff --git a/MAINTAINERS b/MAINTAINERS
index 1b395e181d..bca206ba8f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1275,6 +1275,13 @@ F: doc/guides/rawdevs/ntb.rst
 F: examples/ntb/
 F: doc/guides/sample_app_ug/ntb.rst
 
+Emudev Drivers
+--
+
+Intel iavf
+M: Chenbo Xia 
+M: Xiuchun Lu 
+F: drivers/emulation/iavf/
 
 Packet processing
 -
diff --git a/drivers/emu/iavf/iavf_emu.c b/drivers/emu/iavf/iavf_emu.c
new file mode 100644
index 00..68d2c440e3
--- /dev/null
+++ b/drivers/emu/iavf/iavf_emu.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include "iavf_emu_internal.h"
+
+static int iavf_emu_dev_close(struct rte_emudev *dev)
+{
+   struct iavf_emudev *iavf;
+
+   /* For now, we don't support device close when data
+* path driver is attached
+*/
+   if (dev->backend_priv) {
+   EMU_IAVF_LOG(ERR, "Close failed because of "
+   "data path attached\n");
+   return -EPERM;
+   }
+
+   iavf = (struct iavf_emudev *)dev->priv_data;
+   iavf_emu_uninit_device(iavf);
+   dev->priv_data = NULL;
+
+   return 0;
+}
+
+struct rte_emudev_ops emu_iavf_ops = {
+   .dev_close = iavf_emu_dev_close,
+};
diff --git a/drivers/emu/iavf/iavf_emu_internal.h 
b/drivers/emu/iavf/iavf_emu_internal.h
new file mode 100644
index 00..a726bfe577
--- /dev/null
+++ b/drivers/emu/iavf/iavf_emu_internal.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef _IAVF_EMU_ITNL_H
+#define _IAVF_EMU_ITNL_H
+
+#include 
+
+#include 
+
+#include "rte_iavf_emu.h"
+
+extern struct rte_emudev_ops emu_iavf_ops;
+
+extern int emu_iavf_logtype;
+#define EMU_IAVF_LOG(level, ...) \
+   rte_log(RTE_LOG_ ## level, emu_iavf_logtype, "EMU_IAVF: " __VA_ARGS__)
+
+struct iavf_emu_intr_info {
+   int enable;
+   int fd;
+};
+
+struct iavf_emu_intr {
+   uint32_t intr_num;
+   struct iavf_emu_intr_info info[RTE_IAVF_EMU_MAX_INTR];
+};
+
+struct iavf_emu_lanQ {
+   uint16_t db_size;
+   void *doorbell;
+};
+
+struct iavf_emudev {
+   struct rte_emudev *edev;
+   /* Maximum LANQ queue pair that this emulated iavf has */
+   uint16_t max_lanqp;
+   /* Maximum LANQ queue pair number that back-end driver can use */
+   uint16_t max_be_lanqp;
+   unsigned int numa_node;
+   char *sock_addr;
+   struct rte_iavf_emu_mem *mem;
+   struct iavf_emu_intr *intr;
+   struct iavf_emu_lanQ *lanq;
+};
+
+void iavf_emu_uninit_device(struct iavf_emudev *dev);
+#endif
diff --git a/drivers/emu/iavf/iavf_emudev.c b/drivers/emu/iavf/iavf_emudev.c
new file mode 100644
index 00..a4cd2deb06
--- /dev/null
+++ b/drivers/emu/iavf/iavf_emudev.c
@@ -0,0 +1,207 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include 
+#include 
+#include 
+
+#include "iavf_emu_internal.h"
+
+#define EMU_IAVF_SOCK_ARG "sock"
+#define EMU_IAVF_QUEUES_ARG "queues"
+
+static const char * const emu_iavf_valid_arg[] = {
+   EMU_IAVF_SOCK_ARG,
+   EMU_IAVF_QUEUES_ARG,
+   NULL
+};
+
+static inline int
+save_sockaddr(const char *key __rte_unused, const char *value,
+   void *extra_args)
+{
+   const char **sock_addr = extra_args;
+
+   if (value == NULL)
+   return -1;
+
+   *sock_addr = value;
+
+   return 0;
+}
+
+static inline int
+save_int(const char *key __rte_unused, const char *value, void *extra_args)
+{
+   uint16_t *n = extra_args;
+
+   if (value == NULL || extra_args == NULL)
+   return -EINVAL;
+
+   *n = (uint16_t)strtoul(value, NULL, 0);
+   if (*n == USHRT_MAX && errno == ERANGE)
+   return -1;
+
+   return 0;
+}
+
+static in

[dpdk-dev] [PATCH 4/8] emu/iavf: add vfio-user device register and unregister

2020-12-18 Thread Chenbo Xia
This patch adds vfio-user APIs call in driver probe and remove.
rte_vfio_user_register() and rte_vfio_user_unregister() are called
to create/destroy a vfio-user device. Notify callbacks that
libvfio_user defines are also implemented.

Signed-off-by: Chenbo Xia 
Signed-off-by: Miao Li 
---
 drivers/emu/iavf/iavf_emu.c  |   3 +-
 drivers/emu/iavf/iavf_emu_internal.h |  19 ++
 drivers/emu/iavf/iavf_emudev.c   |  12 +-
 drivers/emu/iavf/iavf_vfio_user.c| 384 +++
 drivers/emu/iavf/iavf_vfio_user.h|  16 ++
 drivers/emu/iavf/meson.build |   5 +-
 drivers/emu/iavf/rte_iavf_emu.h  |  17 ++
 7 files changed, 452 insertions(+), 4 deletions(-)
 create mode 100644 drivers/emu/iavf/iavf_vfio_user.c
 create mode 100644 drivers/emu/iavf/iavf_vfio_user.h

diff --git a/drivers/emu/iavf/iavf_emu.c b/drivers/emu/iavf/iavf_emu.c
index 68d2c440e3..dfd9796920 100644
--- a/drivers/emu/iavf/iavf_emu.c
+++ b/drivers/emu/iavf/iavf_emu.c
@@ -2,7 +2,7 @@
  * Copyright(c) 2020 Intel Corporation
  */
 
-#include "iavf_emu_internal.h"
+#include "iavf_vfio_user.h"
 
 static int iavf_emu_dev_close(struct rte_emudev *dev)
 {
@@ -18,6 +18,7 @@ static int iavf_emu_dev_close(struct rte_emudev *dev)
}
 
iavf = (struct iavf_emudev *)dev->priv_data;
+   iavf_emu_unregister_vfio_user(iavf);
iavf_emu_uninit_device(iavf);
dev->priv_data = NULL;
 
diff --git a/drivers/emu/iavf/iavf_emu_internal.h 
b/drivers/emu/iavf/iavf_emu_internal.h
index a726bfe577..10197c00ba 100644
--- a/drivers/emu/iavf/iavf_emu_internal.h
+++ b/drivers/emu/iavf/iavf_emu_internal.h
@@ -17,6 +17,13 @@ extern int emu_iavf_logtype;
 #define EMU_IAVF_LOG(level, ...) \
rte_log(RTE_LOG_ ## level, emu_iavf_logtype, "EMU_IAVF: " __VA_ARGS__)
 
+struct iavf_emu_vfio_user {
+   int dev_id;
+   struct vfio_device_info *dev_info;
+   struct rte_vfio_user_regions *reg;
+   struct rte_vfio_user_irq_info *irq;
+};
+
 struct iavf_emu_intr_info {
int enable;
int fd;
@@ -27,6 +34,14 @@ struct iavf_emu_intr {
struct iavf_emu_intr_info info[RTE_IAVF_EMU_MAX_INTR];
 };
 
+struct iavf_emu_adminQ {
+   uint32_t *ring_addr_lo;
+   uint32_t *ring_addr_hi;
+   uint32_t *ring_sz;
+   uint16_t db_size;
+   void *doorbell;
+};
+
 struct iavf_emu_lanQ {
uint16_t db_size;
void *doorbell;
@@ -34,14 +49,18 @@ struct iavf_emu_lanQ {
 
 struct iavf_emudev {
struct rte_emudev *edev;
+   struct iavf_emu_vfio_user *vfio;
/* Maximum LANQ queue pair that this emulated iavf has */
uint16_t max_lanqp;
/* Maximum LANQ queue pair number that back-end driver can use */
uint16_t max_be_lanqp;
unsigned int numa_node;
+   int ready;
char *sock_addr;
+   struct rte_iavf_emu_notify_ops *ops;
struct rte_iavf_emu_mem *mem;
struct iavf_emu_intr *intr;
+   struct iavf_emu_adminQ adq[RTE_IAVF_EMU_ADMINQ_NUM];
struct iavf_emu_lanQ *lanq;
 };
 
diff --git a/drivers/emu/iavf/iavf_emudev.c b/drivers/emu/iavf/iavf_emudev.c
index a4cd2deb06..fbbe3d95a7 100644
--- a/drivers/emu/iavf/iavf_emudev.c
+++ b/drivers/emu/iavf/iavf_emudev.c
@@ -6,7 +6,7 @@
 #include 
 #include 
 
-#include "iavf_emu_internal.h"
+#include "iavf_vfio_user.h"
 
 #define EMU_IAVF_SOCK_ARG "sock"
 #define EMU_IAVF_QUEUES_ARG "queues"
@@ -170,10 +170,20 @@ rte_emu_iavf_probe(struct rte_vdev_device *dev)
iavf->max_lanqp = queues;
edev->priv_data = (void *)iavf;
 
+   ret = iavf_emu_register_vfio_user(iavf);
+   if (ret) {
+   EMU_IAVF_LOG(ERR,
+   "Emulated iavf failed to register vfio user.\n");
+   ret = -1;
+   goto err_reg;
+   }
+
edev->started = 1;
rte_kvargs_free(kvlist);
return 0;
 
+err_reg:
+   iavf_emu_uninit_device(iavf);
 err_ndev:
rte_emudev_release(edev);
 err:
diff --git a/drivers/emu/iavf/iavf_vfio_user.c 
b/drivers/emu/iavf/iavf_vfio_user.c
new file mode 100644
index 00..aae47de9f3
--- /dev/null
+++ b/drivers/emu/iavf/iavf_vfio_user.c
@@ -0,0 +1,384 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include 
+
+#include 
+
+#include "iavf_vfio_user.h"
+#include 
+
+struct iavf_emu_sock_list {
+   TAILQ_ENTRY(iavf_emu_sock_list) next;
+   struct rte_emudev *emu_dev;
+};
+
+TAILQ_HEAD(iavf_emu_sock_list_head, iavf_emu_sock_list);
+
+static struct iavf_emu_sock_list_head sock_list =
+   TAILQ_HEAD_INITIALIZER(sock_list);
+
+static pthread_mutex_t sock_list_lock = PTHREAD_MUTEX_INITIALIZER;
+
+static int iavf_emu_setup_irq(struct iavf_emudev *dev)
+{
+   struct iavf_emu_intr *intr;
+   struct rte_vfio_user_irq_info *irq;
+   int *fds = NULL;
+   uint32_t i, count;
+
+

[dpdk-dev] [PATCH 5/8] emu/iavf: add resource management and internal logic of iavf

2020-12-18 Thread Chenbo Xia
This patch adds the allocation and release of device resources.
Device resources include PCI BARs' memory and interrupt related
resources. Device internal logic is also added.

Signed-off-by: Chenbo Xia 
Signed-off-by: Xiuchun Lu 
---
 drivers/emu/iavf/iavf_emu.c   |   1 +
 drivers/emu/iavf/iavf_emudev.c|  20 +
 drivers/emu/iavf/iavf_vfio_user.c | 669 ++
 drivers/emu/iavf/iavf_vfio_user.h |  41 ++
 drivers/emu/iavf/meson.build  |   8 +
 5 files changed, 739 insertions(+)

diff --git a/drivers/emu/iavf/iavf_emu.c b/drivers/emu/iavf/iavf_emu.c
index dfd9796920..c1a702d744 100644
--- a/drivers/emu/iavf/iavf_emu.c
+++ b/drivers/emu/iavf/iavf_emu.c
@@ -18,6 +18,7 @@ static int iavf_emu_dev_close(struct rte_emudev *dev)
}
 
iavf = (struct iavf_emudev *)dev->priv_data;
+   iavf_emu_uninit_vfio_user(iavf);
iavf_emu_unregister_vfio_user(iavf);
iavf_emu_uninit_device(iavf);
dev->priv_data = NULL;
diff --git a/drivers/emu/iavf/iavf_emudev.c b/drivers/emu/iavf/iavf_emudev.c
index fbbe3d95a7..70cf558eef 100644
--- a/drivers/emu/iavf/iavf_emudev.c
+++ b/drivers/emu/iavf/iavf_emudev.c
@@ -178,10 +178,30 @@ rte_emu_iavf_probe(struct rte_vdev_device *dev)
goto err_reg;
}
 
+   ret = iavf_emu_init_vfio_user(iavf);
+   if (ret) {
+   EMU_IAVF_LOG(ERR,
+   "Emulated iavf failed to init vfio user.\n");
+   ret = -1;
+   goto err_init;
+   }
+
+   ret = iavf_emu_start_vfio_user(iavf);
+   if (ret) {
+   EMU_IAVF_LOG(ERR,
+   "Emulated iavf failed to start vfio user.\n");
+   ret = -1;
+   goto err_start;
+   }
+
edev->started = 1;
rte_kvargs_free(kvlist);
return 0;
 
+err_start:
+   iavf_emu_uninit_vfio_user(iavf);
+err_init:
+   iavf_emu_unregister_vfio_user(iavf);
 err_reg:
iavf_emu_uninit_device(iavf);
 err_ndev:
diff --git a/drivers/emu/iavf/iavf_vfio_user.c 
b/drivers/emu/iavf/iavf_vfio_user.c
index aae47de9f3..a4208de618 100644
--- a/drivers/emu/iavf/iavf_vfio_user.c
+++ b/drivers/emu/iavf/iavf_vfio_user.c
@@ -2,13 +2,36 @@
  * Copyright(c) 2020 Intel Corporation
  */
 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
 #include 
+#include 
 
 #include 
+#include 
+#include 
 
 #include "iavf_vfio_user.h"
 #include 
 
+#define STORE_LE16(addr, val)   (*(__u16 *)addr = val)
+#define STORE_LE32(addr, val)   (*(__u32 *)addr = val)
+
+#define IAVF_EMU_BAR0_SIZE 0x1
+#define IAVF_EMU_BAR3_SIZE 0x1000
+#define IAVF_EMU_BAR_SIZE_MASK 0x
+#define IAVF_EMU_BAR_MASK(sz) (~(sz) + 1)
+#define IAVF_EMU_MSIX_TABLE_SIZE 0x5
+
+#define PCI_VENDOR_ID_INTEL 0x8086
+#define PCI_SUBDEVICE_ID 0x1100
+#define PCI_CLASS_ETHERNET 0x0200
+
 struct iavf_emu_sock_list {
TAILQ_ENTRY(iavf_emu_sock_list) next;
struct rte_emudev *emu_dev;
@@ -174,6 +197,14 @@ static int iavf_emu_setup_queues(struct iavf_emudev *dev)
return 0;
 }
 
+static inline void iavf_emu_cleanup_queues(struct iavf_emudev *dev)
+{
+   memset(&dev->adq, 0, RTE_IAVF_EMU_ADMINQ_NUM *
+   sizeof(struct iavf_emu_adminQ));
+
+   rte_free(dev->lanq);
+}
+
 static inline void iavf_emu_reset_queues(struct iavf_emudev *dev)
 {
memset(&dev->adq, 0, RTE_IAVF_EMU_ADMINQ_NUM *
@@ -191,6 +222,576 @@ static void iavf_emu_reset_all_resources(struct 
iavf_emudev *dev)
iavf_emu_reset_regions(dev);
 }
 
+static int iavf_emu_init_dev(struct iavf_emudev *dev)
+{
+   struct iavf_emu_vfio_user *vfio;
+   struct vfio_device_info *dev_info;
+   struct rte_vfio_user_regions *reg;
+   struct rte_vfio_user_irq_info *irq;
+   struct vfio_region_info_cap_sparse_mmap *sparse;
+   int ret;
+   uint32_t i, j;
+
+   vfio = rte_zmalloc_socket("vfio", sizeof(*vfio), 0, dev->numa_node);
+   if (!vfio) {
+   EMU_IAVF_LOG(ERR, "Failed to alloc iavf_emu_vfio_user\n");
+   ret = -1;
+   goto exit;
+   }
+
+   dev_info = rte_zmalloc_socket("vfio_dev_info",
+   sizeof(*dev_info), 0, dev->numa_node);
+   if (!dev_info) {
+   EMU_IAVF_LOG(ERR, "Failed to alloc vfio dev_info\n");
+   ret = -1;
+   goto err_info;
+   }
+   dev_info->argsz = sizeof(*dev_info);
+   dev_info->flags = VFIO_DEVICE_FLAGS_PCI | VFIO_DEVICE_FLAGS_RESET;
+   dev_info->num_regions = VFIO_PCI_NUM_REGIONS;
+   dev_info->num_irqs = VFIO_PCI_NUM_IRQS;
+
+   reg = rte_zmalloc_socket("vfio_user_regions",
+   sizeof(*reg) + dev_info->num_regions *
+   sizeof(struct rte_vfio_user_reg_info), 0, dev->numa_node);
+   if (!reg) {
+   EMU_IAVF_LOG(ERR, &q

[dpdk-dev] [PATCH 6/8] emu/iavf: add emudev operations to fit in emudev framework

2020-12-18 Thread Chenbo Xia
This patch implements emudev opertions to make emulated iavf
fit into rte_emudev framework. Lifecycle related and device
resource related operations are both implemented.

Signed-off-by: Chenbo Xia 
Signed-off-by: Xiuchun Lu 
---
 drivers/emu/iavf/iavf_emu.c | 218 
 drivers/emu/iavf/rte_iavf_emu.h |  59 +
 2 files changed, 277 insertions(+)

diff --git a/drivers/emu/iavf/iavf_emu.c b/drivers/emu/iavf/iavf_emu.c
index c1a702d744..9ad371ca98 100644
--- a/drivers/emu/iavf/iavf_emu.c
+++ b/drivers/emu/iavf/iavf_emu.c
@@ -2,7 +2,72 @@
  * Copyright(c) 2020 Intel Corporation
  */
 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
 #include "iavf_vfio_user.h"
+#include 
+
+static int iavf_emu_dev_start(struct rte_emudev *dev)
+{
+   struct iavf_emudev *iavf = (struct iavf_emudev *)dev->priv_data;
+
+   if (iavf->ops != NULL && iavf->ops->device_start != NULL)
+   iavf->ops->device_start(dev);
+
+   return 0;
+}
+
+static void iavf_emu_dev_stop(struct rte_emudev *dev)
+{
+   struct iavf_emudev *iavf = (struct iavf_emudev *)dev->priv_data;
+
+   if (iavf->ops != NULL && iavf->ops->device_stop != NULL)
+   iavf->ops->device_stop(dev);
+}
+
+static int iavf_emu_dev_configure(struct rte_emudev *dev,
+   struct rte_emudev_info *dev_conf)
+{
+   struct iavf_emudev *iavf = (struct iavf_emudev *)dev->priv_data;
+   struct rte_iavf_emu_config *conf =
+   (struct rte_iavf_emu_config *)dev_conf->dev_priv;
+
+   if (!dev_conf->dev_priv)
+   return -EINVAL;
+
+   /* Currently emulated iavf does not support max_qp_num
+* and region num configuration
+*/
+   if (dev->dev_info.max_qp_num != dev_conf->max_qp_num ||
+   dev->dev_info.region_num != dev_conf->region_num) {
+   EMU_IAVF_LOG(ERR,
+   "Configure max_qp_num/region num not supported\n");
+   return -ENOTSUP;
+   }
+
+   if (conf->qp_num >  RTE_MAX_QUEUES_PER_PORT ||
+   conf->qp_num > RTE_IAVF_EMU_MAX_QP_NUM) {
+   EMU_IAVF_LOG(ERR, "Queue pair num exceeds max\n");
+   return -EINVAL;
+   }
+
+   /* For now, we don't support device configure when data
+* path driver is attached
+*/
+   if (dev->backend_priv) {
+   EMU_IAVF_LOG(ERR, "Configure failed because of "
+   "data path attached\n");
+   return -EPERM;
+   }
+
+   iavf->max_be_lanqp = conf->qp_num;
+   return 0;
+}
 
 static int iavf_emu_dev_close(struct rte_emudev *dev)
 {
@@ -26,6 +91,159 @@ static int iavf_emu_dev_close(struct rte_emudev *dev)
return 0;
 }
 
+static int iavf_emu_get_dev_info(struct rte_emudev *dev,
+   rte_emudev_obj_t info)
+{
+   struct iavf_emudev *iavf = (struct iavf_emudev *)dev->priv_data;
+   struct rte_iavf_emu_config *conf = (struct rte_iavf_emu_config *)info;
+
+   if (!info)
+   return -EINVAL;
+
+   conf->qp_num = iavf->max_be_lanqp;
+   return 0;
+}
+
+static int iavf_emu_get_mem_table(struct rte_emudev *dev,
+   rte_emudev_mem_table_t *tb)
+{
+   struct iavf_emudev *iavf = (struct iavf_emudev *)dev->priv_data;
+
+   *tb = iavf->mem;
+
+   return 0;
+}
+
+static int iavf_emu_get_queue_info(struct rte_emudev *dev, uint32_t queue,
+   struct rte_emudev_q_info *info)
+{
+   struct iavf_emudev *iavf = (struct iavf_emudev *)dev->priv_data;
+
+   if (queue < RTE_IAVF_EMU_ADMINQ_NUM) {
+   struct iavf_emu_adminQ *adq = &iavf->adq[queue];
+   uint64_t base, size;
+
+   if (adq->ring_addr_lo == NULL ||
+   adq->ring_addr_hi == NULL ||
+   adq->ring_sz == NULL)
+   return -1;
+   base = RTE_IAVF_EMU_32_TO_64(*adq->ring_addr_hi,
+   *adq->ring_addr_lo);
+   size = *adq->ring_sz;
+   info->base = base;
+   info->size = size;
+   info->doorbell_id = queue;
+   /* RX AdminQ should have IRQ vector 0 */
+   info->irq_vector = queue - 1;
+   } else {
+   info->base = 0;
+   info->size = 0;
+   info->doorbell_id = queue;
+   info->irq_vector = -1;
+   }
+
+   return 0;
+}
+
+static int iavf_emu_get_irq_info(struct rte_emudev *dev, uint32_t vector,
+   struct rte_emudev_irq_info *info)
+{
+   struct iavf_emudev *iavf = (struct iavf_emudev *)dev->priv_data;
+   struct iavf_emu_intr *intr = iavf->intr;
+   struct iavf_emu_intr_info *intr_info = &intr->info[vector];
+
+   i

[dpdk-dev] [PATCH 7/8] test/emudev: introduce functional test

2020-12-18 Thread Chenbo Xia
This patch introduces functional test for emudev. The
implementation of iavf emudev selftest is also added.

Signed-off-by: Miao Li 
Signed-off-by: Chenbo Xia 
---
 app/test/meson.build |   5 +-
 app/test/test_emudev.c   |  29 +
 drivers/emu/iavf/iavf_emu.c  |   1 +
 drivers/emu/iavf/iavf_emu_internal.h |   1 +
 drivers/emu/iavf/iavf_emu_test.c | 174 +++
 drivers/emu/iavf/meson.build |   2 +-
 6 files changed, 210 insertions(+), 2 deletions(-)
 create mode 100644 app/test/test_emudev.c
 create mode 100644 drivers/emu/iavf/iavf_emu_test.c

diff --git a/app/test/meson.build b/app/test/meson.build
index f5b15ac44c..b8b79bbc8b 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -139,6 +139,7 @@ test_sources = files('commands.c',
'test_trace_register.c',
'test_trace_perf.c',
'test_vfio_user.c',
+   'test_emudev.c',
'test_version.c',
'virtual_pmd.c'
 )
@@ -176,7 +177,8 @@ test_deps = ['acl',
'stack',
'vfio_user',
'telemetry',
-   'timer'
+   'timer',
+   'emudev'
 ]
 
 # Each test is marked with flag true/false
@@ -327,6 +329,7 @@ driver_test_names = [
 'eventdev_selftest_octeontx',
 'eventdev_selftest_sw',
 'rawdev_autotest',
+'emudev_autotest',
 ]
 
 dump_test_names = [
diff --git a/app/test/test_emudev.c b/app/test/test_emudev.c
new file mode 100644
index 00..0dfce162ed
--- /dev/null
+++ b/app/test/test_emudev.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include 
+#include 
+
+#include "test.h"
+
+static int
+test_emudev_selftest_impl(const char *pmd, const char *opts)
+{
+   int ret = 0;
+
+   if (rte_emudev_get_dev_id(pmd) == -ENODEV)
+   ret = rte_vdev_init(pmd, opts);
+   if (ret)
+   return TEST_SKIPPED;
+
+   return rte_emudev_selftest(rte_emudev_get_dev_id(pmd));
+}
+
+static int
+test_emudev_selftest(void)
+{
+   return test_emudev_selftest_impl("emu_iavf", "sock=/tmp/sock1");
+}
+
+REGISTER_TEST_COMMAND(emudev_autotest, test_emudev_selftest);
diff --git a/drivers/emu/iavf/iavf_emu.c b/drivers/emu/iavf/iavf_emu.c
index 9ad371ca98..88bf2bdf94 100644
--- a/drivers/emu/iavf/iavf_emu.c
+++ b/drivers/emu/iavf/iavf_emu.c
@@ -246,4 +246,5 @@ struct rte_emudev_ops emu_iavf_ops = {
.subscribe_event = iavf_emu_subs_ev,
.unsubscribe_event = iavf_emu_unsubs_ev,
.get_attr = iavf_emu_get_attr,
+   .dev_selftest = iavf_emu_selftest,
 };
diff --git a/drivers/emu/iavf/iavf_emu_internal.h 
b/drivers/emu/iavf/iavf_emu_internal.h
index 10197c00ba..1ac7f96566 100644
--- a/drivers/emu/iavf/iavf_emu_internal.h
+++ b/drivers/emu/iavf/iavf_emu_internal.h
@@ -65,4 +65,5 @@ struct iavf_emudev {
 };
 
 void iavf_emu_uninit_device(struct iavf_emudev *dev);
+int iavf_emu_selftest(uint16_t dev_id);
 #endif
diff --git a/drivers/emu/iavf/iavf_emu_test.c b/drivers/emu/iavf/iavf_emu_test.c
new file mode 100644
index 00..ad19134724
--- /dev/null
+++ b/drivers/emu/iavf/iavf_emu_test.c
@@ -0,0 +1,174 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "iavf_emu_internal.h"
+
+#define TEST_DEV_NAME "emu_iavf"
+#define TEST_SUCCESS 0
+#define TEST_FAILED -1
+
+#define EMUDEV_TEST_RUN(setup, teardown, test) \
+   emudev_test_run(setup, teardown, test, #test)
+static uint16_t test_dev_id;
+static int total;
+static int passed;
+static int failed;
+static int unsupported;
+
+static int
+testsuite_setup(void)
+{
+   uint8_t count;
+   count = rte_emudev_count();
+   if (!count) {
+   EMU_IAVF_LOG(INFO, "No existing emu dev; "
+ "Creating emu_iavf\n");
+   return rte_vdev_init(TEST_DEV_NAME, NULL);
+   }
+
+   return TEST_SUCCESS;
+}
+
+static void
+testsuite_teardown(void)
+{
+   rte_vdev_uninit(TEST_DEV_NAME);
+}
+
+static void emudev_test_run(int (*setup)(void),
+void (*teardown)(void),
+int (*test)(void),
+const char *name)
+{
+   int ret = 0;
+
+   if (setup) {
+   ret = setup();
+   if (ret < 0) {
+   EMU_IAVF_LOG(INFO, "Error setting up test %s\n", name);
+   unsupported++;
+   }
+   }
+
+   if (test) {
+   ret = test();
+   if (ret < 0) {
+   failed++;
+   EMU_IAVF_LOG(I

[dpdk-dev] [PATCH 8/8] doc: update release notes for iavf emudev driver

2020-12-18 Thread Chenbo Xia
Update release notes for emulated iavf driver.

Signed-off-by: Chenbo Xia 
---
 doc/guides/rel_notes/release_21_02.rst | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/doc/guides/rel_notes/release_21_02.rst 
b/doc/guides/rel_notes/release_21_02.rst
index 3d26b6b580..b310b67b7d 100644
--- a/doc/guides/rel_notes/release_21_02.rst
+++ b/doc/guides/rel_notes/release_21_02.rst
@@ -67,7 +67,7 @@ New Features
 
   See :doc:`../prog_guide/vfio_user_lib` for more information.
 
-* **Added emudev Library.**
+* **Added emudev Library and first emudev driver.**
 
   Added an experimental library ``librte_emudev`` to provide device abstraction
   for an emulated device.
@@ -77,6 +77,10 @@ New Features
   crypto and etc.). It can be attached to another data path driver (e.g, ethdev
   driver) to leverage the high performance of DPDK data path driver.
 
+  The first emudev driver is also introduced, which emulates software iavf
+  devices. This driver emulates all iavf device behavior except data path
+  handling.
+
   See :doc:`../prog_guide/emudev` for more information.
 
 Removed Items
-- 
2.17.1



[dpdk-dev] [PATCH v2 0/8] Introduce emudev library and iavf emudev driver

2020-12-18 Thread Chenbo Xia
This series introduces a new device abstraction called emudev for emulated
devices. A new library (librte_emudev) is implemented. The first emudev
driver is also introduced, which emulates Intel Adaptive Virtual Function
(iavf) as a software network device.

This series has a dependency on librte_vfio_user patch series:
http://patchwork.dpdk.org/cover/85389/

Background & Motivation 
---
The disaggregated/multi-process QEMU is using VFIO-over-socket/vfio-user
as the main transport mechanism to disaggregate IO services from QEMU.
Therefore, librte_vfio_user is introduced in DPDK to accommodate
emulated devices for high performance I/O. Although vfio-user library
provides possibility of emulating devices in DPDK, DPDK does not have
a device abstraction for emulated devices. A good device abstraction will
be useful for applications or high performance data path driver. With
this consideration, emudev library is designed and implemented. It also
make it possbile to keep modular design on emulated devices by implementing
data path related logic in a standalone driver (e.g., an ethdev driver)
and keeps the unrelated logic in the emudev driver.

Design overview
---

+---+
|   +---++---+  |
|   |  iavf_emudev  |<-->| data path |  |
|   |driver ||   driver  |  |
|   +---++---+  |
|   |   |
| --- VDEV BUS  |
|   |   |
|   +---+   |
+--+|   | vdev: |   |
| +--+ ||   | /path/to/vfio |   |
| | Generic  | ||   +---+   |
| | vfio-dev | ||   |   |
| +--+ ||   |   |
| +--+ ||  +--+ |
| | vfio-user| ||  | vfio-user| |
| | client   | |<---|->| server   | |
| +--+ ||  +--+ |
| QEMU/DPDK|| DPDK  |
+--++---+

- Generic vfio-dev/vfio-user client/vfio-user server
  Above concepts are all introduced in librte_vfio_user patch series:
  http://patchwork.dpdk.org/cover/85389/

- vdev:/path/to/vfio.
  It binds to vdev bus driver. The vdev device is defined by DPDK applications
  through command line as '--vdev=emu_iavf, path=/path/to/socket' in iavf_emudev
  case. Parameters in command line include device name (emu_iavf) which is used
  to identify corresponding driver (in this case, iavf_emudev driver),
  path=/path/to/socket which is used to open the transport interface to 
vfio-user
  client in QEMU/DPDK.

- data path driver.
  The data path handling is splited to another standalone driver for modular
  design.

Why not rawdev for emulated device
--
Instead of introducing new class emudev, emulated device could be presented as 
rawdev.
However, existing rawdev APIs cannot meet the requirements of emulated device. 
There are
three API categories for emudev. They are emudev device lifecycle management, 
backend
facing APIs, and emudev device provider facing APIs respectively. Existing 
rawdev APIs
could only cover lifecycle management APIs and some of backend facing APIs. 
Other APIs,
even if added to rawdev API are not required by other rawdev applications.

--
v2:
 - fix driver meson build file


Chenbo Xia (8):
  lib: introduce emudev library
  doc: add emudev library guide
  emu: introduce emulated iavf driver
  emu/iavf: add vfio-user device register and unregister
  emu/iavf: add resource management and internal logic of iavf
  emu/iavf: add emudev operations to fit in emudev framework
  test/emudev: introduce functional test
  doc: update release notes for iavf emudev driver

 MAINTAINERS|   12 +
 app/test/meson.build   |5 +-
 app/test/test_emudev.c |   29 +
 doc/guides/prog_guide/emudev.rst   |  122 +++
 doc/guides/prog_guide/index.rst|1 +
 doc/guides/rel_notes/release_21_02.rst |   16 +
 drivers/emu/iavf/iavf_emu.c|  250 ++
 drivers/emu/iavf/iavf_emu_internal.h   |   69 ++
 drivers/emu/iavf/iavf_emu_test.c   |  174 
 drivers/emu/iavf/iavf_emudev.c |  237 ++
 drivers/emu/iavf/iavf_vfio_user.c  | 1053 
 drivers/emu/iavf/iavf_vfio_user.h  |   57 ++
 drivers/emu/iavf/meson.build   |   17 +
 drivers/emu/iavf/rte_iavf_emu.h|  119 +++
 drivers/emu/iavf/ve

[dpdk-dev] [PATCH v2 1/8] lib: introduce emudev library

2020-12-18 Thread Chenbo Xia
This patch introduces the emudev library. Emudev library is used
to abstract an emulated device, whose type could be general
(e.g., network, crypto and etc.). Several device-level APIs are
implemented to use or manipulate the device. It can be attached
to another data path driver (e.g., ethdev driver) to plug in its
high performance data path.

Signed-off-by: Chenbo Xia 
Signed-off-by: Xiuchun Lu 
Signed-off-by: Miao Li 
---
 MAINTAINERS |   5 +
 lib/librte_emudev/meson.build   |   5 +
 lib/librte_emudev/rte_emudev.c  | 486 
 lib/librte_emudev/rte_emudev.h  | 410 +++
 lib/librte_emudev/rte_emudev_vdev.h |  53 +++
 lib/librte_emudev/version.map   |  27 ++
 lib/meson.build |   2 +-
 7 files changed, 987 insertions(+), 1 deletion(-)
 create mode 100644 lib/librte_emudev/meson.build
 create mode 100644 lib/librte_emudev/rte_emudev.c
 create mode 100644 lib/librte_emudev/rte_emudev.h
 create mode 100644 lib/librte_emudev/rte_emudev_vdev.h
 create mode 100644 lib/librte_emudev/version.map

diff --git a/MAINTAINERS b/MAINTAINERS
index 5fb4880758..1b395e181d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1545,6 +1545,11 @@ M: Chenbo Xia 
 M: Xiuchun Lu 
 F: lib/librte_vfio_user/
 
+Emudev - EXPERIMENTAL
+M: Chenbo Xia 
+M: Xiuchun Lu 
+F: lib/librte_emudev/
+
 Test Applications
 -
 
diff --git a/lib/librte_emudev/meson.build b/lib/librte_emudev/meson.build
new file mode 100644
index 00..4e16cecbaf
--- /dev/null
+++ b/lib/librte_emudev/meson.build
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2020 Intel Corporation
+
+sources = files('rte_emudev.c')
+headers = files('rte_emudev.h', 'rte_emudev_vdev.h')
diff --git a/lib/librte_emudev/rte_emudev.c b/lib/librte_emudev/rte_emudev.c
new file mode 100644
index 00..2bbf3970d8
--- /dev/null
+++ b/lib/librte_emudev/rte_emudev.c
@@ -0,0 +1,486 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include 
+
+#include 
+#include 
+
+#include "rte_emudev.h"
+
+#define RTE_MAX_EMU_DEV 1024
+struct rte_emudev rte_emu_devices[RTE_MAX_EMU_DEV];
+
+static struct rte_emudev_global emu_dev_globals = {
+   .nb_devs = 0
+};
+
+static inline uint16_t rte_emu_alloc_dev_id(void)
+{
+   unsigned int i;
+
+   for (i = 0; i < RTE_MAX_EMU_DEV; i++) {
+   if (rte_emu_devices[i].name[0] == '\0')
+   return i;
+   }
+   return RTE_MAX_EMU_DEV;
+}
+
+uint8_t
+rte_emudev_count(void)
+{
+   return emu_dev_globals.nb_devs;
+}
+
+int
+rte_emudev_get_dev_id(const char *name)
+{
+   uint16_t i;
+
+   if (!name) {
+   RTE_EMUDEV_LOG(ERR, "Failed to get device ID: "
+   "NULL device name\n");
+   return -EINVAL;
+   }
+
+   for (i = 0; i < emu_dev_globals.nb_devs; i++)
+   if (!strncmp(rte_emu_devices[i].name, name,
+   RTE_EMU_NAME_MAX_LEN))
+   return i;
+
+   return -ENODEV;
+}
+
+struct rte_emudev *
+rte_emudev_allocate(const char *name)
+{
+   uint16_t dev_id;
+   struct rte_emudev *emu_dev = NULL;
+   size_t name_len;
+
+   if (!name) {
+   RTE_EMUDEV_LOG(ERR, "Failed to allocate emudev: "
+   "NULL device name\n");
+   return NULL;
+   }
+
+   name_len = strnlen(name, RTE_EMU_NAME_MAX_LEN);
+   if (!name_len) {
+   RTE_EMUDEV_LOG(ERR, "Emulated device name has zero length\n");
+   return NULL;
+   }
+
+   if (name_len >= RTE_EMU_NAME_MAX_LEN) {
+   RTE_EMUDEV_LOG(ERR, "Emulated device name too long\n");
+   return NULL;
+   }
+
+   if (rte_emudev_allocated(name) != NULL) {
+   RTE_EMUDEV_LOG(ERR,
+   "Emulated device with name %s already exists\n",
+   name);
+   return NULL;
+   }
+
+   dev_id = rte_emu_alloc_dev_id();
+   if (dev_id == RTE_MAX_EMU_DEV) {
+   RTE_EMUDEV_LOG(ERR, "Reached max number of Emulated device\n");
+   return NULL;
+   }
+
+   emu_dev = &rte_emu_devices[dev_id];
+   strncpy(emu_dev->name, name, sizeof(emu_dev->name));
+   emu_dev->dev_id = dev_id;
+   emu_dev_globals.nb_devs++;
+
+   return emu_dev;
+}
+
+int
+rte_emudev_release(struct rte_emudev *dev)
+{
+   if (!dev)
+   return -EINVAL;
+
+   if (dev->priv_data) {
+   rte_free(dev->priv_data);
+   dev->priv_data = NULL;
+   }
+
+   memset(dev, 0, sizeof(*dev));
+   emu_dev_globals.nb_devs--;
+   return 0;
+}
+
+struct rte_emudev *
+rte_emudev_allocated(const char *

[dpdk-dev] [PATCH v2 2/8] doc: add emudev library guide

2020-12-18 Thread Chenbo Xia
Add emudev library guide and update release notes.

Signed-off-by: Chenbo Xia 
---
 doc/guides/prog_guide/emudev.rst   | 122 +
 doc/guides/prog_guide/index.rst|   1 +
 doc/guides/rel_notes/release_21_02.rst |  12 +++
 3 files changed, 135 insertions(+)
 create mode 100644 doc/guides/prog_guide/emudev.rst

diff --git a/doc/guides/prog_guide/emudev.rst b/doc/guides/prog_guide/emudev.rst
new file mode 100644
index 00..91ad520de7
--- /dev/null
+++ b/doc/guides/prog_guide/emudev.rst
@@ -0,0 +1,122 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+Copyright(c) 2020 Intel Corporation.
+
+Emulated Device Library
+=
+
+Introduction
+
+
+The DPDK Emudev library is an abstraction for emulated device. This library
+provides a generic set of APIs for device provider, data path provider and
+applications to use.
+
+A device provider could be implemented as a driver on vdev bus. It should
+expose itself as an emudev for applications to use. It is responsible for the
+device resource management and the device's internal logic. All specifics of a
+device, except data path handling, should be implemented in the device
+provider. The device provider uses emudev APIs mainly for create/destroy an
+emudev instance. The device provider should also use a tranport to communicate
+with device consumer (e.g., virtual machine monitor or container). A potential
+choice could be vfio-user library, which implements the vfio-user protocol for
+emulating devices outside of a virtual machine monitor.
+
+A data path provider could be implemented as any type of driver on vdev bus.
+If the device you want to emulate is a network device, you could implement
+it as an ethdev driver. It is responsible for all data path handling. The data
+path provider uses emudev APIs mainly for getting device-related information
+from the device provider.
+
+Applications uses emudev APIs for device lifecycle management and 
configuration.
+
+Design
+
+
+Some key objects are designed in emudev.
+
+  ``Regions`` are the device layout exposed to the data path provider.
+
+  ``Queues`` are the data path queues that the data path provider needs. Queue
+  information includes queue base address, queue size, queue-related doorbell
+  and interrupt information.
+
+  ``Memory Table`` is the DMA mapping table. The data path provider could use
+  it to perform DMA read/write on device consumer's memory.
+
+Information of above key objects could be acquired through emudev APIs. The
+following will introduce the emudev APIs which are used by data path provider
+and applications. The APIs for device provider to use are allocate/release APIs
+and will not be listed because it's similar to other device abstraction.
+
+There are five categories of APIs:
+
+1. Lifecycle management
+
+* ``rte_emu_dev_start(dev_id)``
+* ``rte_emu_dev_stop(dev_id)``
+* ``rte_emu_dev_configure(dev_id)``
+* ``rte_emu_dev_close(dev_id)``
+
+  Above APIs are respectively for device start/stop/configure/close and mainly
+  for applications to use.
+
+  ``dev_id`` is the emudev device ID.
+
+2. Notification
+
+* ``rte_emu_subscribe_event(dev_id, ev_chnl)``
+* ``rte_emu_unsubscribe_event(dev_id, ev_chnl)``
+
+  Above APIs are for data path provider and applications to register events.
+  The mechanism of event notification could be different in different device
+  providers. A possbile implementation could be event callbacks.
+
+  ``ev_chnl`` is the event channel pointer. The definition varies between
+  different devices.
+
+3. Region-related
+
+* ``rte_emu_region_map(dev_id, index, region_size, base_addr)``
+* ``rte_emu_get_attr(dev_id, attr_name, attr)``
+* ``rte_emu_set_attr(dev_id, attr_name, attr)``
+
+  Above APIs are for data path provider and applications to read/write regions.
+  ``rte_emu_region_map`` is for directly mapping the region and use the mapped
+  address to read/write it. ``rte_emu_get_attr`` and ``rte_emu_set_attr`` are
+  respectively for getting/setting certain attributes in all regions.
+
+  Applications will set attributes or write regions for device configuration.
+
+  In ``rte_emu_region_map``:
+  - ``index`` is the region index.
+  - ``region_size`` is for saving the size of mapped region.
+  - ``base_addr`` is for saving the address of mapped region.
+
+  In ``rte_emu_get_attr`` and ``rte_emu_set_attr``:
+  - ``attr_name`` is the name of attribute. Note that attribute names are 
aligned
+  between device provider and data path provider for the same device.
+  - ``attr`` is the attribute value.
+
+4. Queue-related
+
+* ``rte_emu_get_queue_info(dev_id, queue, info)``
+* ``rte_emu_get_irq_info(dev_id, irq, info)``
+* ``rte_emu_get_db_info(dev_id, doorbell, info)``
+
+  Above APIs are for data path provider to get queue/interrupt/doorbell 
information.
+
+  - ``queue``, ``irq`` and ``doorbell`` are respectively the 
queue/interrupt/doorbell
+  index.
+  - ``info`` is for s

[dpdk-dev] [PATCH v2 3/8] emu: introduce emulated iavf driver

2020-12-18 Thread Chenbo Xia
This patch introduces emulated iavf driver. It is a vdev driver
emulating all iavf device behavior except data path handling.

Signed-off-by: Chenbo Xia 
Signed-off-by: Xiuchun Lu 
---
 MAINTAINERS  |   7 +
 drivers/emu/iavf/iavf_emu.c  |  29 
 drivers/emu/iavf/iavf_emu_internal.h |  49 +++
 drivers/emu/iavf/iavf_emudev.c   | 207 +++
 drivers/emu/iavf/meson.build |   8 ++
 drivers/emu/iavf/rte_iavf_emu.h  |  43 ++
 drivers/emu/iavf/version.map |   3 +
 drivers/emu/meson.build  |   6 +
 drivers/meson.build  |   1 +
 9 files changed, 353 insertions(+)
 create mode 100644 drivers/emu/iavf/iavf_emu.c
 create mode 100644 drivers/emu/iavf/iavf_emu_internal.h
 create mode 100644 drivers/emu/iavf/iavf_emudev.c
 create mode 100644 drivers/emu/iavf/meson.build
 create mode 100644 drivers/emu/iavf/rte_iavf_emu.h
 create mode 100644 drivers/emu/iavf/version.map
 create mode 100644 drivers/emu/meson.build

diff --git a/MAINTAINERS b/MAINTAINERS
index 1b395e181d..bca206ba8f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1275,6 +1275,13 @@ F: doc/guides/rawdevs/ntb.rst
 F: examples/ntb/
 F: doc/guides/sample_app_ug/ntb.rst
 
+Emudev Drivers
+--
+
+Intel iavf
+M: Chenbo Xia 
+M: Xiuchun Lu 
+F: drivers/emulation/iavf/
 
 Packet processing
 -
diff --git a/drivers/emu/iavf/iavf_emu.c b/drivers/emu/iavf/iavf_emu.c
new file mode 100644
index 00..68d2c440e3
--- /dev/null
+++ b/drivers/emu/iavf/iavf_emu.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include "iavf_emu_internal.h"
+
+static int iavf_emu_dev_close(struct rte_emudev *dev)
+{
+   struct iavf_emudev *iavf;
+
+   /* For now, we don't support device close when data
+* path driver is attached
+*/
+   if (dev->backend_priv) {
+   EMU_IAVF_LOG(ERR, "Close failed because of "
+   "data path attached\n");
+   return -EPERM;
+   }
+
+   iavf = (struct iavf_emudev *)dev->priv_data;
+   iavf_emu_uninit_device(iavf);
+   dev->priv_data = NULL;
+
+   return 0;
+}
+
+struct rte_emudev_ops emu_iavf_ops = {
+   .dev_close = iavf_emu_dev_close,
+};
diff --git a/drivers/emu/iavf/iavf_emu_internal.h 
b/drivers/emu/iavf/iavf_emu_internal.h
new file mode 100644
index 00..a726bfe577
--- /dev/null
+++ b/drivers/emu/iavf/iavf_emu_internal.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef _IAVF_EMU_ITNL_H
+#define _IAVF_EMU_ITNL_H
+
+#include 
+
+#include 
+
+#include "rte_iavf_emu.h"
+
+extern struct rte_emudev_ops emu_iavf_ops;
+
+extern int emu_iavf_logtype;
+#define EMU_IAVF_LOG(level, ...) \
+   rte_log(RTE_LOG_ ## level, emu_iavf_logtype, "EMU_IAVF: " __VA_ARGS__)
+
+struct iavf_emu_intr_info {
+   int enable;
+   int fd;
+};
+
+struct iavf_emu_intr {
+   uint32_t intr_num;
+   struct iavf_emu_intr_info info[RTE_IAVF_EMU_MAX_INTR];
+};
+
+struct iavf_emu_lanQ {
+   uint16_t db_size;
+   void *doorbell;
+};
+
+struct iavf_emudev {
+   struct rte_emudev *edev;
+   /* Maximum LANQ queue pair that this emulated iavf has */
+   uint16_t max_lanqp;
+   /* Maximum LANQ queue pair number that back-end driver can use */
+   uint16_t max_be_lanqp;
+   unsigned int numa_node;
+   char *sock_addr;
+   struct rte_iavf_emu_mem *mem;
+   struct iavf_emu_intr *intr;
+   struct iavf_emu_lanQ *lanq;
+};
+
+void iavf_emu_uninit_device(struct iavf_emudev *dev);
+#endif
diff --git a/drivers/emu/iavf/iavf_emudev.c b/drivers/emu/iavf/iavf_emudev.c
new file mode 100644
index 00..a4cd2deb06
--- /dev/null
+++ b/drivers/emu/iavf/iavf_emudev.c
@@ -0,0 +1,207 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include 
+#include 
+#include 
+
+#include "iavf_emu_internal.h"
+
+#define EMU_IAVF_SOCK_ARG "sock"
+#define EMU_IAVF_QUEUES_ARG "queues"
+
+static const char * const emu_iavf_valid_arg[] = {
+   EMU_IAVF_SOCK_ARG,
+   EMU_IAVF_QUEUES_ARG,
+   NULL
+};
+
+static inline int
+save_sockaddr(const char *key __rte_unused, const char *value,
+   void *extra_args)
+{
+   const char **sock_addr = extra_args;
+
+   if (value == NULL)
+   return -1;
+
+   *sock_addr = value;
+
+   return 0;
+}
+
+static inline int
+save_int(const char *key __rte_unused, const char *value, void *extra_args)
+{
+   uint16_t *n = extra_args;
+
+   if (value == NULL || extra_args == NULL)
+   return -EINVAL;
+
+   *n = (uint16_t)strtoul(value, NULL, 0);
+   if (*n == USHRT_MAX && errno == ERANGE)
+   return -1;
+
+   return 0;
+}
+
+static in

[dpdk-dev] [PATCH v2 4/8] emu/iavf: add vfio-user device register and unregister

2020-12-18 Thread Chenbo Xia
This patch adds vfio-user APIs call in driver probe and remove.
rte_vfio_user_register() and rte_vfio_user_unregister() are called
to create/destroy a vfio-user device. Notify callbacks that
libvfio_user defines are also implemented.

Signed-off-by: Chenbo Xia 
Signed-off-by: Miao Li 
---
 drivers/emu/iavf/iavf_emu.c  |   3 +-
 drivers/emu/iavf/iavf_emu_internal.h |  19 ++
 drivers/emu/iavf/iavf_emudev.c   |  12 +-
 drivers/emu/iavf/iavf_vfio_user.c| 384 +++
 drivers/emu/iavf/iavf_vfio_user.h|  16 ++
 drivers/emu/iavf/meson.build |   5 +-
 drivers/emu/iavf/rte_iavf_emu.h  |  17 ++
 7 files changed, 452 insertions(+), 4 deletions(-)
 create mode 100644 drivers/emu/iavf/iavf_vfio_user.c
 create mode 100644 drivers/emu/iavf/iavf_vfio_user.h

diff --git a/drivers/emu/iavf/iavf_emu.c b/drivers/emu/iavf/iavf_emu.c
index 68d2c440e3..dfd9796920 100644
--- a/drivers/emu/iavf/iavf_emu.c
+++ b/drivers/emu/iavf/iavf_emu.c
@@ -2,7 +2,7 @@
  * Copyright(c) 2020 Intel Corporation
  */
 
-#include "iavf_emu_internal.h"
+#include "iavf_vfio_user.h"
 
 static int iavf_emu_dev_close(struct rte_emudev *dev)
 {
@@ -18,6 +18,7 @@ static int iavf_emu_dev_close(struct rte_emudev *dev)
}
 
iavf = (struct iavf_emudev *)dev->priv_data;
+   iavf_emu_unregister_vfio_user(iavf);
iavf_emu_uninit_device(iavf);
dev->priv_data = NULL;
 
diff --git a/drivers/emu/iavf/iavf_emu_internal.h 
b/drivers/emu/iavf/iavf_emu_internal.h
index a726bfe577..10197c00ba 100644
--- a/drivers/emu/iavf/iavf_emu_internal.h
+++ b/drivers/emu/iavf/iavf_emu_internal.h
@@ -17,6 +17,13 @@ extern int emu_iavf_logtype;
 #define EMU_IAVF_LOG(level, ...) \
rte_log(RTE_LOG_ ## level, emu_iavf_logtype, "EMU_IAVF: " __VA_ARGS__)
 
+struct iavf_emu_vfio_user {
+   int dev_id;
+   struct vfio_device_info *dev_info;
+   struct rte_vfio_user_regions *reg;
+   struct rte_vfio_user_irq_info *irq;
+};
+
 struct iavf_emu_intr_info {
int enable;
int fd;
@@ -27,6 +34,14 @@ struct iavf_emu_intr {
struct iavf_emu_intr_info info[RTE_IAVF_EMU_MAX_INTR];
 };
 
+struct iavf_emu_adminQ {
+   uint32_t *ring_addr_lo;
+   uint32_t *ring_addr_hi;
+   uint32_t *ring_sz;
+   uint16_t db_size;
+   void *doorbell;
+};
+
 struct iavf_emu_lanQ {
uint16_t db_size;
void *doorbell;
@@ -34,14 +49,18 @@ struct iavf_emu_lanQ {
 
 struct iavf_emudev {
struct rte_emudev *edev;
+   struct iavf_emu_vfio_user *vfio;
/* Maximum LANQ queue pair that this emulated iavf has */
uint16_t max_lanqp;
/* Maximum LANQ queue pair number that back-end driver can use */
uint16_t max_be_lanqp;
unsigned int numa_node;
+   int ready;
char *sock_addr;
+   struct rte_iavf_emu_notify_ops *ops;
struct rte_iavf_emu_mem *mem;
struct iavf_emu_intr *intr;
+   struct iavf_emu_adminQ adq[RTE_IAVF_EMU_ADMINQ_NUM];
struct iavf_emu_lanQ *lanq;
 };
 
diff --git a/drivers/emu/iavf/iavf_emudev.c b/drivers/emu/iavf/iavf_emudev.c
index a4cd2deb06..fbbe3d95a7 100644
--- a/drivers/emu/iavf/iavf_emudev.c
+++ b/drivers/emu/iavf/iavf_emudev.c
@@ -6,7 +6,7 @@
 #include 
 #include 
 
-#include "iavf_emu_internal.h"
+#include "iavf_vfio_user.h"
 
 #define EMU_IAVF_SOCK_ARG "sock"
 #define EMU_IAVF_QUEUES_ARG "queues"
@@ -170,10 +170,20 @@ rte_emu_iavf_probe(struct rte_vdev_device *dev)
iavf->max_lanqp = queues;
edev->priv_data = (void *)iavf;
 
+   ret = iavf_emu_register_vfio_user(iavf);
+   if (ret) {
+   EMU_IAVF_LOG(ERR,
+   "Emulated iavf failed to register vfio user.\n");
+   ret = -1;
+   goto err_reg;
+   }
+
edev->started = 1;
rte_kvargs_free(kvlist);
return 0;
 
+err_reg:
+   iavf_emu_uninit_device(iavf);
 err_ndev:
rte_emudev_release(edev);
 err:
diff --git a/drivers/emu/iavf/iavf_vfio_user.c 
b/drivers/emu/iavf/iavf_vfio_user.c
new file mode 100644
index 00..aae47de9f3
--- /dev/null
+++ b/drivers/emu/iavf/iavf_vfio_user.c
@@ -0,0 +1,384 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include 
+
+#include 
+
+#include "iavf_vfio_user.h"
+#include 
+
+struct iavf_emu_sock_list {
+   TAILQ_ENTRY(iavf_emu_sock_list) next;
+   struct rte_emudev *emu_dev;
+};
+
+TAILQ_HEAD(iavf_emu_sock_list_head, iavf_emu_sock_list);
+
+static struct iavf_emu_sock_list_head sock_list =
+   TAILQ_HEAD_INITIALIZER(sock_list);
+
+static pthread_mutex_t sock_list_lock = PTHREAD_MUTEX_INITIALIZER;
+
+static int iavf_emu_setup_irq(struct iavf_emudev *dev)
+{
+   struct iavf_emu_intr *intr;
+   struct rte_vfio_user_irq_info *irq;
+   int *fds = NULL;
+   uint32_t i, count;
+
+

[dpdk-dev] [PATCH v2 5/8] emu/iavf: add resource management and internal logic of iavf

2020-12-18 Thread Chenbo Xia
This patch adds the allocation and release of device resources.
Device resources include PCI BARs' memory and interrupt related
resources. Device internal logic is also added.

Signed-off-by: Chenbo Xia 
Signed-off-by: Xiuchun Lu 
---
 drivers/emu/iavf/iavf_emu.c   |   1 +
 drivers/emu/iavf/iavf_emudev.c|  20 +
 drivers/emu/iavf/iavf_vfio_user.c | 669 ++
 drivers/emu/iavf/iavf_vfio_user.h |  41 ++
 drivers/emu/iavf/meson.build  |   8 +
 5 files changed, 739 insertions(+)

diff --git a/drivers/emu/iavf/iavf_emu.c b/drivers/emu/iavf/iavf_emu.c
index dfd9796920..c1a702d744 100644
--- a/drivers/emu/iavf/iavf_emu.c
+++ b/drivers/emu/iavf/iavf_emu.c
@@ -18,6 +18,7 @@ static int iavf_emu_dev_close(struct rte_emudev *dev)
}
 
iavf = (struct iavf_emudev *)dev->priv_data;
+   iavf_emu_uninit_vfio_user(iavf);
iavf_emu_unregister_vfio_user(iavf);
iavf_emu_uninit_device(iavf);
dev->priv_data = NULL;
diff --git a/drivers/emu/iavf/iavf_emudev.c b/drivers/emu/iavf/iavf_emudev.c
index fbbe3d95a7..70cf558eef 100644
--- a/drivers/emu/iavf/iavf_emudev.c
+++ b/drivers/emu/iavf/iavf_emudev.c
@@ -178,10 +178,30 @@ rte_emu_iavf_probe(struct rte_vdev_device *dev)
goto err_reg;
}
 
+   ret = iavf_emu_init_vfio_user(iavf);
+   if (ret) {
+   EMU_IAVF_LOG(ERR,
+   "Emulated iavf failed to init vfio user.\n");
+   ret = -1;
+   goto err_init;
+   }
+
+   ret = iavf_emu_start_vfio_user(iavf);
+   if (ret) {
+   EMU_IAVF_LOG(ERR,
+   "Emulated iavf failed to start vfio user.\n");
+   ret = -1;
+   goto err_start;
+   }
+
edev->started = 1;
rte_kvargs_free(kvlist);
return 0;
 
+err_start:
+   iavf_emu_uninit_vfio_user(iavf);
+err_init:
+   iavf_emu_unregister_vfio_user(iavf);
 err_reg:
iavf_emu_uninit_device(iavf);
 err_ndev:
diff --git a/drivers/emu/iavf/iavf_vfio_user.c 
b/drivers/emu/iavf/iavf_vfio_user.c
index aae47de9f3..a4208de618 100644
--- a/drivers/emu/iavf/iavf_vfio_user.c
+++ b/drivers/emu/iavf/iavf_vfio_user.c
@@ -2,13 +2,36 @@
  * Copyright(c) 2020 Intel Corporation
  */
 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
 #include 
+#include 
 
 #include 
+#include 
+#include 
 
 #include "iavf_vfio_user.h"
 #include 
 
+#define STORE_LE16(addr, val)   (*(__u16 *)addr = val)
+#define STORE_LE32(addr, val)   (*(__u32 *)addr = val)
+
+#define IAVF_EMU_BAR0_SIZE 0x1
+#define IAVF_EMU_BAR3_SIZE 0x1000
+#define IAVF_EMU_BAR_SIZE_MASK 0x
+#define IAVF_EMU_BAR_MASK(sz) (~(sz) + 1)
+#define IAVF_EMU_MSIX_TABLE_SIZE 0x5
+
+#define PCI_VENDOR_ID_INTEL 0x8086
+#define PCI_SUBDEVICE_ID 0x1100
+#define PCI_CLASS_ETHERNET 0x0200
+
 struct iavf_emu_sock_list {
TAILQ_ENTRY(iavf_emu_sock_list) next;
struct rte_emudev *emu_dev;
@@ -174,6 +197,14 @@ static int iavf_emu_setup_queues(struct iavf_emudev *dev)
return 0;
 }
 
+static inline void iavf_emu_cleanup_queues(struct iavf_emudev *dev)
+{
+   memset(&dev->adq, 0, RTE_IAVF_EMU_ADMINQ_NUM *
+   sizeof(struct iavf_emu_adminQ));
+
+   rte_free(dev->lanq);
+}
+
 static inline void iavf_emu_reset_queues(struct iavf_emudev *dev)
 {
memset(&dev->adq, 0, RTE_IAVF_EMU_ADMINQ_NUM *
@@ -191,6 +222,576 @@ static void iavf_emu_reset_all_resources(struct 
iavf_emudev *dev)
iavf_emu_reset_regions(dev);
 }
 
+static int iavf_emu_init_dev(struct iavf_emudev *dev)
+{
+   struct iavf_emu_vfio_user *vfio;
+   struct vfio_device_info *dev_info;
+   struct rte_vfio_user_regions *reg;
+   struct rte_vfio_user_irq_info *irq;
+   struct vfio_region_info_cap_sparse_mmap *sparse;
+   int ret;
+   uint32_t i, j;
+
+   vfio = rte_zmalloc_socket("vfio", sizeof(*vfio), 0, dev->numa_node);
+   if (!vfio) {
+   EMU_IAVF_LOG(ERR, "Failed to alloc iavf_emu_vfio_user\n");
+   ret = -1;
+   goto exit;
+   }
+
+   dev_info = rte_zmalloc_socket("vfio_dev_info",
+   sizeof(*dev_info), 0, dev->numa_node);
+   if (!dev_info) {
+   EMU_IAVF_LOG(ERR, "Failed to alloc vfio dev_info\n");
+   ret = -1;
+   goto err_info;
+   }
+   dev_info->argsz = sizeof(*dev_info);
+   dev_info->flags = VFIO_DEVICE_FLAGS_PCI | VFIO_DEVICE_FLAGS_RESET;
+   dev_info->num_regions = VFIO_PCI_NUM_REGIONS;
+   dev_info->num_irqs = VFIO_PCI_NUM_IRQS;
+
+   reg = rte_zmalloc_socket("vfio_user_regions",
+   sizeof(*reg) + dev_info->num_regions *
+   sizeof(struct rte_vfio_user_reg_info), 0, dev->numa_node);
+   if (!reg) {
+   EMU_IAVF_LOG(ERR, &q

[dpdk-dev] [PATCH v2 6/8] emu/iavf: add emudev operations to fit in emudev framework

2020-12-18 Thread Chenbo Xia
This patch implements emudev opertions to make emulated iavf
fit into rte_emudev framework. Lifecycle related and device
resource related operations are both implemented.

Signed-off-by: Chenbo Xia 
Signed-off-by: Xiuchun Lu 
---
 drivers/emu/iavf/iavf_emu.c | 218 
 drivers/emu/iavf/rte_iavf_emu.h |  59 +
 2 files changed, 277 insertions(+)

diff --git a/drivers/emu/iavf/iavf_emu.c b/drivers/emu/iavf/iavf_emu.c
index c1a702d744..9ad371ca98 100644
--- a/drivers/emu/iavf/iavf_emu.c
+++ b/drivers/emu/iavf/iavf_emu.c
@@ -2,7 +2,72 @@
  * Copyright(c) 2020 Intel Corporation
  */
 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
 #include "iavf_vfio_user.h"
+#include 
+
+static int iavf_emu_dev_start(struct rte_emudev *dev)
+{
+   struct iavf_emudev *iavf = (struct iavf_emudev *)dev->priv_data;
+
+   if (iavf->ops != NULL && iavf->ops->device_start != NULL)
+   iavf->ops->device_start(dev);
+
+   return 0;
+}
+
+static void iavf_emu_dev_stop(struct rte_emudev *dev)
+{
+   struct iavf_emudev *iavf = (struct iavf_emudev *)dev->priv_data;
+
+   if (iavf->ops != NULL && iavf->ops->device_stop != NULL)
+   iavf->ops->device_stop(dev);
+}
+
+static int iavf_emu_dev_configure(struct rte_emudev *dev,
+   struct rte_emudev_info *dev_conf)
+{
+   struct iavf_emudev *iavf = (struct iavf_emudev *)dev->priv_data;
+   struct rte_iavf_emu_config *conf =
+   (struct rte_iavf_emu_config *)dev_conf->dev_priv;
+
+   if (!dev_conf->dev_priv)
+   return -EINVAL;
+
+   /* Currently emulated iavf does not support max_qp_num
+* and region num configuration
+*/
+   if (dev->dev_info.max_qp_num != dev_conf->max_qp_num ||
+   dev->dev_info.region_num != dev_conf->region_num) {
+   EMU_IAVF_LOG(ERR,
+   "Configure max_qp_num/region num not supported\n");
+   return -ENOTSUP;
+   }
+
+   if (conf->qp_num >  RTE_MAX_QUEUES_PER_PORT ||
+   conf->qp_num > RTE_IAVF_EMU_MAX_QP_NUM) {
+   EMU_IAVF_LOG(ERR, "Queue pair num exceeds max\n");
+   return -EINVAL;
+   }
+
+   /* For now, we don't support device configure when data
+* path driver is attached
+*/
+   if (dev->backend_priv) {
+   EMU_IAVF_LOG(ERR, "Configure failed because of "
+   "data path attached\n");
+   return -EPERM;
+   }
+
+   iavf->max_be_lanqp = conf->qp_num;
+   return 0;
+}
 
 static int iavf_emu_dev_close(struct rte_emudev *dev)
 {
@@ -26,6 +91,159 @@ static int iavf_emu_dev_close(struct rte_emudev *dev)
return 0;
 }
 
+static int iavf_emu_get_dev_info(struct rte_emudev *dev,
+   rte_emudev_obj_t info)
+{
+   struct iavf_emudev *iavf = (struct iavf_emudev *)dev->priv_data;
+   struct rte_iavf_emu_config *conf = (struct rte_iavf_emu_config *)info;
+
+   if (!info)
+   return -EINVAL;
+
+   conf->qp_num = iavf->max_be_lanqp;
+   return 0;
+}
+
+static int iavf_emu_get_mem_table(struct rte_emudev *dev,
+   rte_emudev_mem_table_t *tb)
+{
+   struct iavf_emudev *iavf = (struct iavf_emudev *)dev->priv_data;
+
+   *tb = iavf->mem;
+
+   return 0;
+}
+
+static int iavf_emu_get_queue_info(struct rte_emudev *dev, uint32_t queue,
+   struct rte_emudev_q_info *info)
+{
+   struct iavf_emudev *iavf = (struct iavf_emudev *)dev->priv_data;
+
+   if (queue < RTE_IAVF_EMU_ADMINQ_NUM) {
+   struct iavf_emu_adminQ *adq = &iavf->adq[queue];
+   uint64_t base, size;
+
+   if (adq->ring_addr_lo == NULL ||
+   adq->ring_addr_hi == NULL ||
+   adq->ring_sz == NULL)
+   return -1;
+   base = RTE_IAVF_EMU_32_TO_64(*adq->ring_addr_hi,
+   *adq->ring_addr_lo);
+   size = *adq->ring_sz;
+   info->base = base;
+   info->size = size;
+   info->doorbell_id = queue;
+   /* RX AdminQ should have IRQ vector 0 */
+   info->irq_vector = queue - 1;
+   } else {
+   info->base = 0;
+   info->size = 0;
+   info->doorbell_id = queue;
+   info->irq_vector = -1;
+   }
+
+   return 0;
+}
+
+static int iavf_emu_get_irq_info(struct rte_emudev *dev, uint32_t vector,
+   struct rte_emudev_irq_info *info)
+{
+   struct iavf_emudev *iavf = (struct iavf_emudev *)dev->priv_data;
+   struct iavf_emu_intr *intr = iavf->intr;
+   struct iavf_emu_intr_info *intr_info = &intr->info[vector];
+
+   i

[dpdk-dev] [PATCH v2 7/8] test/emudev: introduce functional test

2020-12-18 Thread Chenbo Xia
This patch introduces functional test for emudev. The
implementation of iavf emudev selftest is also added.

Signed-off-by: Miao Li 
Signed-off-by: Chenbo Xia 
---
 app/test/meson.build |   5 +-
 app/test/test_emudev.c   |  29 +
 drivers/emu/iavf/iavf_emu.c  |   1 +
 drivers/emu/iavf/iavf_emu_internal.h |   1 +
 drivers/emu/iavf/iavf_emu_test.c | 174 +++
 drivers/emu/iavf/meson.build |   2 +-
 6 files changed, 210 insertions(+), 2 deletions(-)
 create mode 100644 app/test/test_emudev.c
 create mode 100644 drivers/emu/iavf/iavf_emu_test.c

diff --git a/app/test/meson.build b/app/test/meson.build
index f5b15ac44c..b8b79bbc8b 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -139,6 +139,7 @@ test_sources = files('commands.c',
'test_trace_register.c',
'test_trace_perf.c',
'test_vfio_user.c',
+   'test_emudev.c',
'test_version.c',
'virtual_pmd.c'
 )
@@ -176,7 +177,8 @@ test_deps = ['acl',
'stack',
'vfio_user',
'telemetry',
-   'timer'
+   'timer',
+   'emudev'
 ]
 
 # Each test is marked with flag true/false
@@ -327,6 +329,7 @@ driver_test_names = [
 'eventdev_selftest_octeontx',
 'eventdev_selftest_sw',
 'rawdev_autotest',
+'emudev_autotest',
 ]
 
 dump_test_names = [
diff --git a/app/test/test_emudev.c b/app/test/test_emudev.c
new file mode 100644
index 00..0dfce162ed
--- /dev/null
+++ b/app/test/test_emudev.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include 
+#include 
+
+#include "test.h"
+
+static int
+test_emudev_selftest_impl(const char *pmd, const char *opts)
+{
+   int ret = 0;
+
+   if (rte_emudev_get_dev_id(pmd) == -ENODEV)
+   ret = rte_vdev_init(pmd, opts);
+   if (ret)
+   return TEST_SKIPPED;
+
+   return rte_emudev_selftest(rte_emudev_get_dev_id(pmd));
+}
+
+static int
+test_emudev_selftest(void)
+{
+   return test_emudev_selftest_impl("emu_iavf", "sock=/tmp/sock1");
+}
+
+REGISTER_TEST_COMMAND(emudev_autotest, test_emudev_selftest);
diff --git a/drivers/emu/iavf/iavf_emu.c b/drivers/emu/iavf/iavf_emu.c
index 9ad371ca98..88bf2bdf94 100644
--- a/drivers/emu/iavf/iavf_emu.c
+++ b/drivers/emu/iavf/iavf_emu.c
@@ -246,4 +246,5 @@ struct rte_emudev_ops emu_iavf_ops = {
.subscribe_event = iavf_emu_subs_ev,
.unsubscribe_event = iavf_emu_unsubs_ev,
.get_attr = iavf_emu_get_attr,
+   .dev_selftest = iavf_emu_selftest,
 };
diff --git a/drivers/emu/iavf/iavf_emu_internal.h 
b/drivers/emu/iavf/iavf_emu_internal.h
index 10197c00ba..1ac7f96566 100644
--- a/drivers/emu/iavf/iavf_emu_internal.h
+++ b/drivers/emu/iavf/iavf_emu_internal.h
@@ -65,4 +65,5 @@ struct iavf_emudev {
 };
 
 void iavf_emu_uninit_device(struct iavf_emudev *dev);
+int iavf_emu_selftest(uint16_t dev_id);
 #endif
diff --git a/drivers/emu/iavf/iavf_emu_test.c b/drivers/emu/iavf/iavf_emu_test.c
new file mode 100644
index 00..ad19134724
--- /dev/null
+++ b/drivers/emu/iavf/iavf_emu_test.c
@@ -0,0 +1,174 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "iavf_emu_internal.h"
+
+#define TEST_DEV_NAME "emu_iavf"
+#define TEST_SUCCESS 0
+#define TEST_FAILED -1
+
+#define EMUDEV_TEST_RUN(setup, teardown, test) \
+   emudev_test_run(setup, teardown, test, #test)
+static uint16_t test_dev_id;
+static int total;
+static int passed;
+static int failed;
+static int unsupported;
+
+static int
+testsuite_setup(void)
+{
+   uint8_t count;
+   count = rte_emudev_count();
+   if (!count) {
+   EMU_IAVF_LOG(INFO, "No existing emu dev; "
+ "Creating emu_iavf\n");
+   return rte_vdev_init(TEST_DEV_NAME, NULL);
+   }
+
+   return TEST_SUCCESS;
+}
+
+static void
+testsuite_teardown(void)
+{
+   rte_vdev_uninit(TEST_DEV_NAME);
+}
+
+static void emudev_test_run(int (*setup)(void),
+void (*teardown)(void),
+int (*test)(void),
+const char *name)
+{
+   int ret = 0;
+
+   if (setup) {
+   ret = setup();
+   if (ret < 0) {
+   EMU_IAVF_LOG(INFO, "Error setting up test %s\n", name);
+   unsupported++;
+   }
+   }
+
+   if (test) {
+   ret = test();
+   if (ret < 0) {
+   failed++;
+   EMU_IAVF_LOG(I

[dpdk-dev] [PATCH v2 8/8] doc: update release notes for iavf emudev driver

2020-12-18 Thread Chenbo Xia
Update release notes for emulated iavf driver.

Signed-off-by: Chenbo Xia 
---
 doc/guides/rel_notes/release_21_02.rst | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/doc/guides/rel_notes/release_21_02.rst 
b/doc/guides/rel_notes/release_21_02.rst
index 3d26b6b580..b310b67b7d 100644
--- a/doc/guides/rel_notes/release_21_02.rst
+++ b/doc/guides/rel_notes/release_21_02.rst
@@ -67,7 +67,7 @@ New Features
 
   See :doc:`../prog_guide/vfio_user_lib` for more information.
 
-* **Added emudev Library.**
+* **Added emudev Library and first emudev driver.**
 
   Added an experimental library ``librte_emudev`` to provide device abstraction
   for an emulated device.
@@ -77,6 +77,10 @@ New Features
   crypto and etc.). It can be attached to another data path driver (e.g, ethdev
   driver) to leverage the high performance of DPDK data path driver.
 
+  The first emudev driver is also introduced, which emulates software iavf
+  devices. This driver emulates all iavf device behavior except data path
+  handling.
+
   See :doc:`../prog_guide/emudev` for more information.
 
 Removed Items
-- 
2.17.1



[dpdk-dev] [PATCH v2 0/9] Introduce vfio-user library

2021-01-13 Thread Chenbo Xia
This series enables DPDK to be an alternative I/O device emulation library of
building virtualized devices in separate processes outside QEMU. It introduces
a new library for device emulation (librte_vfio_user).

*librte_vfio_user* library is an implementation of VFIO-over-socket[1] (also
known as vfio-user) which is a protocol that allows a device to be virtualized
in a separate process outside of QEMU. 

Background & Motivation 
---
The disaggregated/multi-process QEMU is using VFIO-over-socket/vfio-user
as the main transport mechanism to disaggregate IO services from QEMU[2].
Vfio-user essentially implements the VFIO device model presented to the
user process by a set of messages over a unix-domain socket. The main
difference between application using vfio-user and application using vfio
kernel module is that device manipulation is based on socket messages for
vfio-user but system calls for vfio kernel module. The vfio-user devices
consist of a generic VFIO device type, living in QEMU, which is called the
client[3], and the core device implementation (emulated device), living
outside of QEMU, which is called the server. With emulated devices removed
from QEMU enabled by vfio-user implementation, other places should be
introduced to accommodate virtualized/emulated device. This series introduces
vfio-user support in DPDK to enable DPDK as one of the living places for
emulated device except QEMU.

This series introduce the server and client implementation of vfio-user 
protocol.
The server plays the role as emulated devices and the client is the device
consumer. With this implementation, DPDK will be enabled to be both device
provider and consumer.

Design overview
---

+--+ +--+ 
| +--+ | | +--+ |
| | Generic  | | | | Emulated | |
| | vfio-dev | | | | device   | |
| +--+ | | +|-+ |
| +--+ | | +|-+ |
| | vfio-user| | | | vfio-user| |
| | client   | |<--->| | server   | |
| +--+ | | +--+ |
| QEMU/DPDK| | DPDK |
+--+ +--+

- Generic vfio-dev. 
  It is the generic vfio framework in vfio applications like QEMU or DPDK.
  Applications can keep the most of vfio device management and plug in a
  vfio-user device type. Note that in current implementation, we have not
  yet integrated client vfio-user into kernel vfio in DPDK but it is viable
  and good to do so.

- vfio-user client.
  For DPDK, it is part of librte_vfio_user implementation to provide ways to
  manipulate a vfio-user based emulated devices. This manipulation is very
  similar with kernel vfio (i.e., syscalls like ioctl, mmap and pread/pwrite).
  It is a base for vfio-user device consumer.

- vfio-user server. 
  It is server part of librte_vfio_user. It provides ways to emulate your own
  device. A device provider could only care about device layout that VFIO
  defines but does not need to know how it communicates with vfio-user client.

- Emulated device.
  It is emulated device of any type (e.g., network, crypto and etc.).

References
--
[1]: https://patchew.org/QEMU/20201130161229.23164-1-thanos.maka...@nutanix.com/
[2]: https://wiki.qemu.org/Features/MultiProcessQEMU
[3]: https://github.com/oracle/qemu/tree/vfio-user-v0.2

--
v2:
 - Clean up non-static inline function (Stephen)
 - Naturally pack vfio-user message payload and header (Stephen)
 - Make function definiton align with coding style (Beilei)
 - Clean up duplicate code in vfio-user server APIs (Beilei)
 - Fix some typos

Chenbo Xia (9):
  lib: introduce vfio-user library
  vfio_user: implement lifecycle related APIs
  vfio_user: implement device and region related APIs
  vfio_user: implement DMA table and socket address API
  vfio_user: implement interrupt related APIs
  vfio_user: add client APIs of device attach/detach
  vfio_user: add client APIs of DMA/IRQ/region
  test/vfio_user: introduce functional test
  doc: add vfio-user library guide

 MAINTAINERS |4 +
 app/test/meson.build|4 +
 app/test/test_vfio_user.c   |  665 ++
 doc/guides/prog_guide/index.rst |1 +
 doc/guides/prog_guide/vfio_user_lib.rst |  215 +++
 doc/guides/rel_notes/release_21_02.rst  |   11 +
 lib/librte_vfio_user/meson.build|   11 +
 lib/librte_vfio_user/rte_vfio_user.h|  446 +++
 lib/librte_vfio_user/version.map|   26 +
 lib/librte_vfio_user/vfio_user_base.c   |  223 
 lib/librte_vfio_user/vfio_user_base.h   |  109 ++
 lib/librte_vfio_user/vfio_user_client.c |  700 ++
 lib/librte_vfio_user/vfio_user_client.h |   26 +
 lib/librte_vfio_user/vfio_user_server.c | 1593 +++
 lib/librte_vfio_user/vfio_user_server.h |   66 +
 lib/meson.build |1 +
 16 files changed, 4101 insertions(+)
 create mode 10064

[dpdk-dev] [PATCH v2 1/9] lib: introduce vfio-user library

2021-01-13 Thread Chenbo Xia
This patch introduces vfio-user library, which follows vfio-user
protocol v1.0. As vfio-user has server and client implementation,
this patch introduces basic structures and internal functions that
will be used by both server and client.

Signed-off-by: Chenbo Xia 
Signed-off-by: Xiuchun Lu 
---
 MAINTAINERS   |   4 +
 lib/librte_vfio_user/meson.build  |   9 ++
 lib/librte_vfio_user/version.map  |   3 +
 lib/librte_vfio_user/vfio_user_base.c | 211 ++
 lib/librte_vfio_user/vfio_user_base.h |  65 
 lib/meson.build   |   1 +
 6 files changed, 293 insertions(+)
 create mode 100644 lib/librte_vfio_user/meson.build
 create mode 100644 lib/librte_vfio_user/version.map
 create mode 100644 lib/librte_vfio_user/vfio_user_base.c
 create mode 100644 lib/librte_vfio_user/vfio_user_base.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 6787b15dcc..91b8b2ccc1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1541,6 +1541,10 @@ M: Nithin Dabilpuram 
 M: Pavan Nikhilesh 
 F: lib/librte_node/
 
+Vfio-user - EXPERIMENTAL
+M: Chenbo Xia 
+M: Xiuchun Lu 
+F: lib/librte_vfio_user/
 
 Test Applications
 -
diff --git a/lib/librte_vfio_user/meson.build b/lib/librte_vfio_user/meson.build
new file mode 100644
index 00..0f6407b80f
--- /dev/null
+++ b/lib/librte_vfio_user/meson.build
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2020 Intel Corporation
+
+if not is_linux
+   build = false
+   reason = 'only supported on Linux'
+endif
+
+sources = files('vfio_user_base.c')
diff --git a/lib/librte_vfio_user/version.map b/lib/librte_vfio_user/version.map
new file mode 100644
index 00..33c1b976f1
--- /dev/null
+++ b/lib/librte_vfio_user/version.map
@@ -0,0 +1,3 @@
+EXPERIMENTAL {
+   local: *;
+};
diff --git a/lib/librte_vfio_user/vfio_user_base.c 
b/lib/librte_vfio_user/vfio_user_base.c
new file mode 100644
index 00..b9fdff5b02
--- /dev/null
+++ b/lib/librte_vfio_user/vfio_user_base.c
@@ -0,0 +1,211 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include 
+#include 
+#include 
+
+#include "vfio_user_base.h"
+
+int vfio_user_log_level;
+
+const char *vfio_user_msg_str[VFIO_USER_MAX] = {
+   [VFIO_USER_NONE] = "VFIO_USER_NONE",
+   [VFIO_USER_VERSION] = "VFIO_USER_VERSION",
+};
+
+void
+vfio_user_close_msg_fds(struct vfio_user_msg *msg)
+{
+   int i;
+
+   for (i = 0; i < msg->fd_num; i++)
+   close(msg->fds[i]);
+}
+
+int
+vfio_user_check_msg_fdnum(struct vfio_user_msg *msg, int expected_fds)
+{
+   if (msg->fd_num == expected_fds)
+   return 0;
+
+   VFIO_USER_LOG(ERR, "Expect %d FDs for request %s, received %d\n",
+   expected_fds, vfio_user_msg_str[msg->cmd], msg->fd_num);
+
+   vfio_user_close_msg_fds(msg);
+
+   return -1;
+}
+
+static int
+vfio_user_recv_fd_msg(int sockfd, char *buf, int buflen, int *fds,
+   int max_fds, int *fd_num)
+{
+   struct iovec iov;
+   struct msghdr msgh;
+   char control[CMSG_SPACE(max_fds * sizeof(int))];
+   struct cmsghdr *cmsg;
+   int fd_sz, got_fds = 0;
+   int ret, i;
+
+   *fd_num = 0;
+
+   memset(&msgh, 0, sizeof(msgh));
+   iov.iov_base = buf;
+   iov.iov_len  = buflen;
+
+   msgh.msg_iov = &iov;
+   msgh.msg_iovlen = 1;
+   msgh.msg_control = control;
+   msgh.msg_controllen = sizeof(control);
+
+   ret = recvmsg(sockfd, &msgh, 0);
+   if (ret <= 0) {
+   if (ret)
+   VFIO_USER_LOG(DEBUG, "recvmsg failed\n");
+   return ret;
+   }
+
+   if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
+   VFIO_USER_LOG(ERR, "Message is truncated\n");
+   return -1;
+   }
+
+   for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
+   cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
+   if ((cmsg->cmsg_level == SOL_SOCKET) &&
+   (cmsg->cmsg_type == SCM_RIGHTS)) {
+   fd_sz = cmsg->cmsg_len - CMSG_LEN(0);
+   got_fds = fd_sz / sizeof(int);
+   if (got_fds >= max_fds) {
+   /* Invalid message, close fds */
+   int *close_fd = (int *)CMSG_DATA(cmsg);
+   for (i = 0; i < got_fds; i++) {
+   close_fd += i;
+   close(*close_fd);
+   }
+   VFIO_USER_LOG(ERR, "fd num exceeds max "
+   "in vfio-user msg\n");
+   return -1;
+   }
+   *fd_num = g

[dpdk-dev] [PATCH v2 2/9] vfio_user: implement lifecycle related APIs

2021-01-13 Thread Chenbo Xia
This patch implements three lifecycle related APIs for vfio-user server,
which are rte_vfio_user_register(), rte_vfio_user_unregister() and
rte_vfio_user_start(). Socket an device management is implemented
along with the API introduction.

Signed-off-by: Chenbo Xia 
Signed-off-by: Xiuchun Lu 
---
 lib/librte_vfio_user/meson.build|   3 +-
 lib/librte_vfio_user/rte_vfio_user.h|  54 ++
 lib/librte_vfio_user/version.map|   6 +
 lib/librte_vfio_user/vfio_user_base.h   |   4 +
 lib/librte_vfio_user/vfio_user_server.c | 707 
 lib/librte_vfio_user/vfio_user_server.h |  55 ++
 6 files changed, 828 insertions(+), 1 deletion(-)
 create mode 100644 lib/librte_vfio_user/rte_vfio_user.h
 create mode 100644 lib/librte_vfio_user/vfio_user_server.c
 create mode 100644 lib/librte_vfio_user/vfio_user_server.h

diff --git a/lib/librte_vfio_user/meson.build b/lib/librte_vfio_user/meson.build
index 0f6407b80f..b7363f61c6 100644
--- a/lib/librte_vfio_user/meson.build
+++ b/lib/librte_vfio_user/meson.build
@@ -6,4 +6,5 @@ if not is_linux
reason = 'only supported on Linux'
 endif
 
-sources = files('vfio_user_base.c')
+sources = files('vfio_user_base.c', 'vfio_user_server.c')
+headers = files('rte_vfio_user.h')
diff --git a/lib/librte_vfio_user/rte_vfio_user.h 
b/lib/librte_vfio_user/rte_vfio_user.h
new file mode 100644
index 00..705a2f6632
--- /dev/null
+++ b/lib/librte_vfio_user/rte_vfio_user.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef _RTE_VFIO_USER_H
+#define _RTE_VFIO_USER_H
+
+#include 
+
+/**
+ *  Below APIs are for vfio-user server (device provider) to use:
+ * *rte_vfio_user_register
+ * *rte_vfio_user_unregister
+ * *rte_vfio_user_start
+ */
+
+/**
+ * Register a vfio-user device.
+ *
+ * @param sock_addr
+ *   Unix domain socket address
+ * @return
+ *   0 on success, -1 on failure
+ */
+__rte_experimental
+int
+rte_vfio_user_register(const char *sock_addr);
+
+/**
+ * Unregister a vfio-user device.
+ *
+ * @param sock_addr
+ *   Unix domain socket address
+ * @return
+ *   0 on success, -1 on failure
+ */
+__rte_experimental
+int
+rte_vfio_user_unregister(const char *sock_addr);
+
+/**
+ * Start vfio-user handling for the device.
+ *
+ * This function triggers vfio-user message handling.
+ * @param sock_addr
+ *   Unix domain socket address
+ * @return
+ *   0 on success, -1 on failure
+ */
+__rte_experimental
+int
+rte_vfio_user_start(const char *sock_addr);
+
+#endif
diff --git a/lib/librte_vfio_user/version.map b/lib/librte_vfio_user/version.map
index 33c1b976f1..e53095eda8 100644
--- a/lib/librte_vfio_user/version.map
+++ b/lib/librte_vfio_user/version.map
@@ -1,3 +1,9 @@
 EXPERIMENTAL {
+   global:
+
+   rte_vfio_user_register;
+   rte_vfio_user_unregister;
+   rte_vfio_user_start;
+
local: *;
 };
diff --git a/lib/librte_vfio_user/vfio_user_base.h 
b/lib/librte_vfio_user/vfio_user_base.h
index 34106cc606..f9b0b94665 100644
--- a/lib/librte_vfio_user/vfio_user_base.h
+++ b/lib/librte_vfio_user/vfio_user_base.h
@@ -7,6 +7,10 @@
 
 #include 
 
+#include "rte_vfio_user.h"
+
+#define VFIO_USER_VERSION_MAJOR 1
+#define VFIO_USER_VERSION_MINOR 0
 #define VFIO_USER_MAX_FD 1024
 #define VFIO_USER_MAX_VERSION_DATA 512
 
diff --git a/lib/librte_vfio_user/vfio_user_server.c 
b/lib/librte_vfio_user/vfio_user_server.c
new file mode 100644
index 00..35544c819a
--- /dev/null
+++ b/lib/librte_vfio_user/vfio_user_server.c
@@ -0,0 +1,707 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "vfio_user_server.h"
+
+#define MAX_VFIO_USER_DEVICE 1024
+
+static struct vfio_user_server *vfio_user_devices[MAX_VFIO_USER_DEVICE];
+static pthread_mutex_t vfio_dev_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static struct vfio_user_ep_sock vfio_ep_sock = {
+   .ep = {
+   .fd_mutex = PTHREAD_MUTEX_INITIALIZER,
+   .fd_num = 0
+   },
+   .sock_num = 0,
+   .mutex = PTHREAD_MUTEX_INITIALIZER,
+};
+
+static int
+vfio_user_negotiate_version(struct vfio_user_server *dev,
+   struct vfio_user_msg *msg)
+{
+   struct vfio_user_version *ver = &msg->payload.ver;
+
+   if (vfio_user_check_msg_fdnum(msg, 0) != 0)
+   return -EINVAL;
+
+   if (ver->major == dev->ver.major && ver->minor <= dev->ver.minor)
+   return 0;
+   else
+   return -ENOTSUP;
+}
+
+static vfio_user_msg_handler_t vfio_user_msg_handlers[VFIO_USER_MAX] = {
+   [VFIO_USER_NONE] = NULL,
+   [VFIO_USER_VERSION] = vfio_user_negotiate_version,
+};
+
+static struct vfio_user_server_socket *
+vfio_user_find_socket(const char *sock_addr)
+{
+   uint32_t i;
+
+   if (sock_addr == NULL)
+  

[dpdk-dev] [PATCH v2 3/9] vfio_user: implement device and region related APIs

2021-01-13 Thread Chenbo Xia
This patch introduces device and region related APIs, which are
rte_vfio_user_set_dev_info() and rte_vfio_user_set_reg_info().
The corresponding vfio-user command handling is also added with
the definition of all vfio-user command identity.

Signed-off-by: Chenbo Xia 
Signed-off-by: Xiuchun Lu 
---
 lib/librte_vfio_user/rte_vfio_user.h|  62 +++
 lib/librte_vfio_user/version.map|   2 +
 lib/librte_vfio_user/vfio_user_base.c   |  12 ++
 lib/librte_vfio_user/vfio_user_base.h   |  32 +++-
 lib/librte_vfio_user/vfio_user_server.c | 235 
 lib/librte_vfio_user/vfio_user_server.h |   2 +
 6 files changed, 344 insertions(+), 1 deletion(-)

diff --git a/lib/librte_vfio_user/rte_vfio_user.h 
b/lib/librte_vfio_user/rte_vfio_user.h
index 705a2f6632..117e994cc6 100644
--- a/lib/librte_vfio_user/rte_vfio_user.h
+++ b/lib/librte_vfio_user/rte_vfio_user.h
@@ -5,13 +5,35 @@
 #ifndef _RTE_VFIO_USER_H
 #define _RTE_VFIO_USER_H
 
+#include 
+
 #include 
 
+struct rte_vfio_user_reg_info;
+
+typedef ssize_t (*rte_vfio_user_reg_acc_t)(struct rte_vfio_user_reg_info *reg,
+   char *buf, size_t count, loff_t pos, bool iswrite);
+
+struct rte_vfio_user_reg_info {
+   rte_vfio_user_reg_acc_t rw;
+   void *base;
+   int fd;
+   struct vfio_region_info *info;
+   void *priv;
+};
+
+struct rte_vfio_user_regions {
+   uint32_t reg_num;
+   struct rte_vfio_user_reg_info reg_info[];
+};
+
 /**
  *  Below APIs are for vfio-user server (device provider) to use:
  * *rte_vfio_user_register
  * *rte_vfio_user_unregister
  * *rte_vfio_user_start
+ * *rte_vfio_user_set_dev_info
+ * *rte_vfio_user_set_reg_info
  */
 
 /**
@@ -51,4 +73,44 @@ __rte_experimental
 int
 rte_vfio_user_start(const char *sock_addr);
 
+/**
+ * Set the device information for a vfio-user device.
+ *
+ * This information must be set before calling rte_vfio_user_start, and should
+ * not be updated after start. Update after start can be done by unregistration
+ * and re-registration, and then the device-level change can be detected by
+ * vfio-user client.
+ *
+ * @param sock_addr
+ *   Unix domain socket address
+ * @param dev_info
+ *   Device information for the vfio-user device
+ * @return
+ *   0 on success, -1 on failure
+ */
+__rte_experimental
+int
+rte_vfio_user_set_dev_info(const char *sock_addr,
+   struct vfio_device_info *dev_info);
+
+/**
+ * Set the region information for a vfio-user device.
+ *
+ * This information must be set before calling rte_vfio_user_start, and should
+ * not be updated after start. Update after start can be done by unregistration
+ * and re-registration, and then the device-level change can be detected by
+ * vfio-user client.
+ *
+ * @param sock_addr
+ *   Unix domain socket address
+ * @param reg
+ *   Region information for the vfio-user device
+ * @return
+ *   0 on success, -1 on failure
+ */
+__rte_experimental
+int
+rte_vfio_user_set_reg_info(const char *sock_addr,
+   struct rte_vfio_user_regions *reg);
+
 #endif
diff --git a/lib/librte_vfio_user/version.map b/lib/librte_vfio_user/version.map
index e53095eda8..0f4f5acba5 100644
--- a/lib/librte_vfio_user/version.map
+++ b/lib/librte_vfio_user/version.map
@@ -4,6 +4,8 @@ EXPERIMENTAL {
rte_vfio_user_register;
rte_vfio_user_unregister;
rte_vfio_user_start;
+   rte_vfio_user_set_dev_info;
+   rte_vfio_user_set_reg_info;
 
local: *;
 };
diff --git a/lib/librte_vfio_user/vfio_user_base.c 
b/lib/librte_vfio_user/vfio_user_base.c
index b9fdff5b02..7badca23b7 100644
--- a/lib/librte_vfio_user/vfio_user_base.c
+++ b/lib/librte_vfio_user/vfio_user_base.c
@@ -13,6 +13,18 @@ int vfio_user_log_level;
 const char *vfio_user_msg_str[VFIO_USER_MAX] = {
[VFIO_USER_NONE] = "VFIO_USER_NONE",
[VFIO_USER_VERSION] = "VFIO_USER_VERSION",
+   [VFIO_USER_DMA_MAP] = "VFIO_USER_DMA_MAP",
+   [VFIO_USER_DMA_UNMAP] = "VFIO_USER_DMA_UNMAP",
+   [VFIO_USER_DEVICE_GET_INFO] = "VFIO_USER_DEVICE_GET_INFO",
+   [VFIO_USER_DEVICE_GET_REGION_INFO] = "VFIO_USER_GET_REGION_INFO",
+   [VFIO_USER_DEVICE_GET_IRQ_INFO] = "VFIO_USER_DEVICE_GET_IRQ_INFO",
+   [VFIO_USER_DEVICE_SET_IRQS] = "VFIO_USER_DEVICE_SET_IRQS",
+   [VFIO_USER_REGION_READ] = "VFIO_USER_REGION_READ",
+   [VFIO_USER_REGION_WRITE] = "VFIO_USER_REGION_WRITE",
+   [VFIO_USER_DMA_READ] = "VFIO_USER_DMA_READ",
+   [VFIO_USER_DMA_WRITE] = "VFIO_USER_DMA_WRITE",
+   [VFIO_USER_VM_INTERRUPT] = "VFIO_USER_VM_INTERRUPT",
+   [VFIO_USER_DEVICE_RESET] = "VFIO_USER_DEVICE_RESET",
 };
 
 void
diff --git a/lib/librte_vfio_user/vfio_user_base.h 
b/lib/librte_vfio_user/vfio_user_base.h
index f9b0b94665..f92886b56a 100644
--- a/lib/librte_vfio_user/vfio_user_base.h
+++ b/lib/librte_vfio_user/vfio_user_base.h
@@ -11,6 +11,8 @

[dpdk-dev] [PATCH v2 4/9] vfio_user: implement DMA table and socket address API

2021-01-13 Thread Chenbo Xia
This patch introduces an API called rte_vfio_user_get_mem_table()
for emulated devices to acquire DMA memory table from vfio-user
library.

Notify operations are also introduced to notify the emulated
devices of several events. Another socket address API is introduced
for translation between device ID and socket address in notify
callbacks.

Signed-off-by: Chenbo Xia 
Signed-off-by: Xiuchun Lu 
---
 lib/librte_vfio_user/rte_vfio_user.h|  77 -
 lib/librte_vfio_user/version.map|   2 +
 lib/librte_vfio_user/vfio_user_base.h   |   2 +
 lib/librte_vfio_user/vfio_user_server.c | 375 +++-
 lib/librte_vfio_user/vfio_user_server.h |   3 +
 5 files changed, 451 insertions(+), 8 deletions(-)

diff --git a/lib/librte_vfio_user/rte_vfio_user.h 
b/lib/librte_vfio_user/rte_vfio_user.h
index 117e994cc6..f575017bdf 100644
--- a/lib/librte_vfio_user/rte_vfio_user.h
+++ b/lib/librte_vfio_user/rte_vfio_user.h
@@ -5,10 +5,52 @@
 #ifndef _RTE_VFIO_USER_H
 #define _RTE_VFIO_USER_H
 
+#include 
+#include 
+#include 
 #include 
+#include 
 
 #include 
 
+#define RTE_VUSER_MAX_DMA 256
+
+struct rte_vfio_user_notify_ops {
+   /* Add device */
+   int (*new_device)(int dev_id);
+   /* Remove device */
+   void (*destroy_device)(int dev_id);
+   /* Update device status */
+   int (*update_status)(int dev_id);
+   /* Lock or unlock data path */
+   int (*lock_dp)(int dev_id, int lock);
+   /* Reset device */
+   int (*reset_device)(int dev_id);
+};
+
+struct rte_vfio_user_mem_reg {
+   uint64_t gpa;
+   uint64_t size;
+   uint64_t fd_offset;
+   uint32_t protection;/* attributes in  */
+#define RTE_VUSER_MEM_MAPPABLE (0x1 << 0)
+   uint32_t flags;
+};
+
+struct rte_vfio_user_mtb_entry {
+   uint64_t gpa;
+   uint64_t size;
+   uint64_t host_user_addr;
+   void *mmap_addr;
+   uint64_t mmap_size;
+   int fd;
+};
+
+struct rte_vfio_user_mem {
+   uint32_t entry_num;
+   struct rte_vfio_user_mtb_entry entry[RTE_VUSER_MAX_DMA];
+};
+
 struct rte_vfio_user_reg_info;
 
 typedef ssize_t (*rte_vfio_user_reg_acc_t)(struct rte_vfio_user_reg_info *reg,
@@ -32,6 +74,8 @@ struct rte_vfio_user_regions {
  * *rte_vfio_user_register
  * *rte_vfio_user_unregister
  * *rte_vfio_user_start
+ * *rte_vfio_get_sock_addr
+ * *rte_vfio_user_get_mem_table
  * *rte_vfio_user_set_dev_info
  * *rte_vfio_user_set_reg_info
  */
@@ -41,12 +85,15 @@ struct rte_vfio_user_regions {
  *
  * @param sock_addr
  *   Unix domain socket address
+ * @param ops
+ *   Notify ops for the device
  * @return
  *   0 on success, -1 on failure
  */
 __rte_experimental
 int
-rte_vfio_user_register(const char *sock_addr);
+rte_vfio_user_register(const char *sock_addr,
+   const struct rte_vfio_user_notify_ops *ops);
 
 /**
  * Unregister a vfio-user device.
@@ -73,6 +120,18 @@ __rte_experimental
 int
 rte_vfio_user_start(const char *sock_addr);
 
+/**
+ * Get the memory table of a vfio-user device.
+ *
+ * @param dev_id
+ *   Vfio-user device ID
+ * @return
+ *   Pointer to memory table on success, NULL on failure
+ */
+__rte_experimental
+const struct rte_vfio_user_mem *
+rte_vfio_user_get_mem_table(int dev_id);
+
 /**
  * Set the device information for a vfio-user device.
  *
@@ -113,4 +172,20 @@ int
 rte_vfio_user_set_reg_info(const char *sock_addr,
struct rte_vfio_user_regions *reg);
 
+/**
+ * Get the socket address for a vfio-user device.
+ *
+ * @param dev_id
+ *   Vfio-user device ID
+ * @param[out] buf
+ *   Buffer to store socket address
+ * @param len
+ *   The length of the buffer
+ * @return
+ *   0 on success, -1 on failure
+ */
+__rte_experimental
+int
+rte_vfio_get_sock_addr(int dev_id, char *buf, size_t len);
+
 #endif
diff --git a/lib/librte_vfio_user/version.map b/lib/librte_vfio_user/version.map
index 0f4f5acba5..3a50b5ef0e 100644
--- a/lib/librte_vfio_user/version.map
+++ b/lib/librte_vfio_user/version.map
@@ -4,6 +4,8 @@ EXPERIMENTAL {
rte_vfio_user_register;
rte_vfio_user_unregister;
rte_vfio_user_start;
+   rte_vfio_get_sock_addr;
+   rte_vfio_user_get_mem_table;
rte_vfio_user_set_dev_info;
rte_vfio_user_set_reg_info;
 
diff --git a/lib/librte_vfio_user/vfio_user_base.h 
b/lib/librte_vfio_user/vfio_user_base.h
index f92886b56a..dd13170298 100644
--- a/lib/librte_vfio_user/vfio_user_base.h
+++ b/lib/librte_vfio_user/vfio_user_base.h
@@ -9,6 +9,7 @@
 
 #include "rte_vfio_user.h"
 
+#define VFIO_USER_MSG_MAX_NREG 8
 #define VFIO_USER_VERSION_MAJOR 1
 #define VFIO_USER_VERSION_MINOR 0
 #define VFIO_USER_MAX_RSVD 512
@@ -79,6 +80,7 @@ struct vfio_user_msg {
uint32_t err;   /* Valid in reply, optional */
union {
struct vfio_user_version ver;
+   struct rte_vfio_user_mem_reg memory[VFIO_USER_MSG_MAX_NREG];
struct vfio_device_info dev_info;
  

[dpdk-dev] [PATCH v2 5/9] vfio_user: implement interrupt related APIs

2021-01-13 Thread Chenbo Xia
This patch implements two interrupt related APIs, which are
rte_vfio_user_get_irq() and rte_vfio_user_set_irq_info().
The former is for devices to get interrupt configuration
(e.g., irqfds). The latter is for setting interrupt information
before vfio-user starts.

Signed-off-by: Chenbo Xia 
Signed-off-by: Xiuchun Lu 
---
 lib/librte_vfio_user/rte_vfio_user.h|  46 
 lib/librte_vfio_user/version.map|   2 +
 lib/librte_vfio_user/vfio_user_base.h   |   8 +
 lib/librte_vfio_user/vfio_user_server.c | 300 +++-
 lib/librte_vfio_user/vfio_user_server.h |   6 +
 5 files changed, 357 insertions(+), 5 deletions(-)

diff --git a/lib/librte_vfio_user/rte_vfio_user.h 
b/lib/librte_vfio_user/rte_vfio_user.h
index f575017bdf..472ca15529 100644
--- a/lib/librte_vfio_user/rte_vfio_user.h
+++ b/lib/librte_vfio_user/rte_vfio_user.h
@@ -69,6 +69,11 @@ struct rte_vfio_user_regions {
struct rte_vfio_user_reg_info reg_info[];
 };
 
+struct rte_vfio_user_irq_info {
+   uint32_t irq_num;
+   struct vfio_irq_info irq_info[];
+};
+
 /**
  *  Below APIs are for vfio-user server (device provider) to use:
  * *rte_vfio_user_register
@@ -76,8 +81,10 @@ struct rte_vfio_user_regions {
  * *rte_vfio_user_start
  * *rte_vfio_get_sock_addr
  * *rte_vfio_user_get_mem_table
+ * *rte_vfio_user_get_irq
  * *rte_vfio_user_set_dev_info
  * *rte_vfio_user_set_reg_info
+ * *rte_vfio_user_set_irq_info
  */
 
 /**
@@ -188,4 +195,43 @@ __rte_experimental
 int
 rte_vfio_get_sock_addr(int dev_id, char *buf, size_t len);
 
+/**
+ * Get the irqfds of a vfio-user device.
+ *
+ * @param dev_id
+ *   Vfio-user device ID
+ * @param index
+ *   irq index
+ * @param count
+ *   irq count
+ * @param[out] fds
+ *   Pointer to the irqfds
+ * @return
+ *   0 on success, -1 on failure
+ */
+__rte_experimental
+int
+rte_vfio_user_get_irq(int dev_id, uint32_t index, uint32_t count,
+   int *fds);
+
+/**
+ * Set the irq information for a vfio-user device.
+ *
+ * This information must be set before calling rte_vfio_user_start, and should
+ * not be updated after start. Update after start can be done by unregistration
+ * and re-registration, and then the device-level change can be detected by
+ * vfio-user client.
+ *
+ * @param sock_addr
+ *   Unix domain socket address
+ * @param irq
+ *   IRQ information for the vfio-user device
+ * @return
+ *   0 on success, -1 on failure
+ */
+__rte_experimental
+int
+rte_vfio_user_set_irq_info(const char *sock_addr,
+   struct rte_vfio_user_irq_info *irq);
+
 #endif
diff --git a/lib/librte_vfio_user/version.map b/lib/librte_vfio_user/version.map
index 3a50b5ef0e..621a51a9fc 100644
--- a/lib/librte_vfio_user/version.map
+++ b/lib/librte_vfio_user/version.map
@@ -6,8 +6,10 @@ EXPERIMENTAL {
rte_vfio_user_start;
rte_vfio_get_sock_addr;
rte_vfio_user_get_mem_table;
+   rte_vfio_user_get_irq;
rte_vfio_user_set_dev_info;
rte_vfio_user_set_reg_info;
+   rte_vfio_user_set_irq_info;
 
local: *;
 };
diff --git a/lib/librte_vfio_user/vfio_user_base.h 
b/lib/librte_vfio_user/vfio_user_base.h
index dd13170298..1780db4322 100644
--- a/lib/librte_vfio_user/vfio_user_base.h
+++ b/lib/librte_vfio_user/vfio_user_base.h
@@ -61,6 +61,12 @@ struct vfio_user_reg {
uint8_t rsvd[VFIO_USER_MAX_RSVD];
 };
 
+struct vfio_user_irq_set {
+   struct vfio_irq_set set;
+   /* Reserved for data of irq set */
+   uint8_t rsvd[VFIO_USER_MAX_RSVD];
+};
+
 struct vfio_user_reg_rw {
uint64_t reg_offset;
uint32_t reg_idx;
@@ -83,6 +89,8 @@ struct vfio_user_msg {
struct rte_vfio_user_mem_reg memory[VFIO_USER_MSG_MAX_NREG];
struct vfio_device_info dev_info;
struct vfio_user_reg reg_info;
+   struct vfio_irq_info irq_info;
+   struct vfio_user_irq_set irq_set;
struct vfio_user_reg_rw reg_rw;
} payload;
int fds[VFIO_USER_MAX_FD];
diff --git a/lib/librte_vfio_user/vfio_user_server.c 
b/lib/librte_vfio_user/vfio_user_server.c
index 9e98b4ec81..104a0abb77 100644
--- a/lib/librte_vfio_user/vfio_user_server.c
+++ b/lib/librte_vfio_user/vfio_user_server.c
@@ -9,6 +9,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "vfio_user_server.h"
 
@@ -310,6 +311,150 @@ vfio_user_device_get_reg_info(struct vfio_user_server 
*dev,
return 0;
 }
 
+static int
+vfio_user_device_get_irq_info(struct vfio_user_server *dev,
+   struct vfio_user_msg *msg)
+{
+   struct vfio_irq_info *irq_info = &msg->payload.irq_info;
+   struct rte_vfio_user_irq_info *info = dev->irqs.info;
+   uint32_t i;
+
+   if (vfio_user_check_msg_fdnum(msg, 0) != 0)
+   return -EINVAL;
+
+   for (i = 0; i < info->irq_num; i++) {
+   if (irq_info->index == info->irq_info[i].index) {
+   irq_info-

[dpdk-dev] [PATCH v2 6/9] vfio_user: add client APIs of device attach/detach

2021-01-13 Thread Chenbo Xia
This patch implements two APIs, rte_vfio_user_attach_dev() and
rte_vfio_user_detach_dev() for vfio-user client to connect to
or disconnect from a vfio-user device on server side.

Signed-off-by: Chenbo Xia 
Signed-off-by: Xiuchun Lu 
---
 lib/librte_vfio_user/meson.build|   3 +-
 lib/librte_vfio_user/rte_vfio_user.h|  32 +++
 lib/librte_vfio_user/version.map|   2 +
 lib/librte_vfio_user/vfio_user_client.c | 281 
 lib/librte_vfio_user/vfio_user_client.h |  26 +++
 5 files changed, 343 insertions(+), 1 deletion(-)
 create mode 100644 lib/librte_vfio_user/vfio_user_client.c
 create mode 100644 lib/librte_vfio_user/vfio_user_client.h

diff --git a/lib/librte_vfio_user/meson.build b/lib/librte_vfio_user/meson.build
index b7363f61c6..5761f0edd1 100644
--- a/lib/librte_vfio_user/meson.build
+++ b/lib/librte_vfio_user/meson.build
@@ -6,5 +6,6 @@ if not is_linux
reason = 'only supported on Linux'
 endif
 
-sources = files('vfio_user_base.c', 'vfio_user_server.c')
+sources = files('vfio_user_base.c', 'vfio_user_server.c',
+   'vfio_user_client.c')
 headers = files('rte_vfio_user.h')
diff --git a/lib/librte_vfio_user/rte_vfio_user.h 
b/lib/librte_vfio_user/rte_vfio_user.h
index 472ca15529..adafa552e2 100644
--- a/lib/librte_vfio_user/rte_vfio_user.h
+++ b/lib/librte_vfio_user/rte_vfio_user.h
@@ -234,4 +234,36 @@ int
 rte_vfio_user_set_irq_info(const char *sock_addr,
struct rte_vfio_user_irq_info *irq);
 
+/**
+ *  Below APIs are for vfio-user client (device consumer) to use:
+ * *rte_vfio_user_attach_dev
+ * *rte_vfio_user_detach_dev
+ */
+
+/**
+ * Attach to a vfio-user device.
+ *
+ * @param sock_addr
+ *   Unix domain socket address
+ * @return
+ *   - >=0: Success, device attached. Returned value is the device ID.
+ *   - <0: Failure on device attach
+ */
+__rte_experimental
+int
+rte_vfio_user_attach_dev(const char *sock_addr);
+
+/**
+ * Detach from a vfio-user device.
+ *
+ * @param dev_id
+ *   Device ID of the vfio-user device
+ * @return
+ *   - 0: Success, device detached
+ *   - <0: Failure on device detach
+ */
+__rte_experimental
+int
+rte_vfio_user_detach_dev(int dev_id);
+
 #endif
diff --git a/lib/librte_vfio_user/version.map b/lib/librte_vfio_user/version.map
index 621a51a9fc..a0cda2b49c 100644
--- a/lib/librte_vfio_user/version.map
+++ b/lib/librte_vfio_user/version.map
@@ -10,6 +10,8 @@ EXPERIMENTAL {
rte_vfio_user_set_dev_info;
rte_vfio_user_set_reg_info;
rte_vfio_user_set_irq_info;
+   rte_vfio_user_attach_dev;
+   rte_vfio_user_detach_dev;
 
local: *;
 };
diff --git a/lib/librte_vfio_user/vfio_user_client.c 
b/lib/librte_vfio_user/vfio_user_client.c
new file mode 100644
index 00..f288cf70f5
--- /dev/null
+++ b/lib/librte_vfio_user/vfio_user_client.c
@@ -0,0 +1,281 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "vfio_user_client.h"
+#include "rte_vfio_user.h"
+
+#define REPLY_USEC 1000
+#define RECV_MAX_TRY 50
+
+static struct vfio_user_client_devs vfio_client_devs = {
+   .cl_num = 0,
+   .mutex = PTHREAD_MUTEX_INITIALIZER,
+};
+
+/* Check if the sock_addr exists. If not, alloc and return index */
+static int
+vfio_user_client_allocate(const char *sock_addr)
+{
+   uint32_t i, count = 0;
+   int index = -1;
+
+   if (sock_addr == NULL)
+   return -1;
+
+   if (vfio_client_devs.cl_num == 0)
+   return 0;
+
+   for (i = 0; i < MAX_VFIO_USER_CLIENT; i++) {
+   struct vfio_user_client *cl = vfio_client_devs.cl[i];
+
+   if (!cl) {
+   if (index == -1)
+   index = i;
+   continue;
+   }
+
+   if (!strcmp(cl->sock.sock_addr, sock_addr))
+   return -1;
+
+   count++;
+   if (count == vfio_client_devs.cl_num)
+   break;
+   }
+
+   return index;
+}
+
+static struct vfio_user_client *
+vfio_user_client_create_dev(const char *sock_addr)
+{
+   struct vfio_user_client *cl;
+   struct vfio_user_socket *sock;
+   int fd, idx;
+   struct sockaddr_un un = { 0 };
+
+   pthread_mutex_lock(&vfio_client_devs.mutex);
+   if (vfio_client_devs.cl_num == MAX_VFIO_USER_CLIENT) {
+   VFIO_USER_LOG(ERR, "Failed to create client:"
+   " client num reaches max\n");
+   goto err;
+   }
+
+   idx = vfio_user_client_allocate(sock_addr);
+   if (idx < 0) {
+   VFIO_USER_LOG(ERR, "Failed to alloc a slot for client\n");
+   goto err;
+   }
+
+   cl = malloc(sizeof(*cl));
+   if (!cl) {
+

  1   2   >