This patch is to add vhost async dequeue data-path in vhost sample.
vswitch can leverage IOAT to accelerate vhost async dequeue data-path.

Signed-off-by: Wenwu Ma <wenwux...@intel.com>
---
 doc/guides/sample_app_ug/vhost.rst |   9 +-
 examples/vhost/ioat.c              |  61 ++++++++++---
 examples/vhost/ioat.h              |  25 ++++++
 examples/vhost/main.c              | 140 ++++++++++++++++++++---------
 4 files changed, 177 insertions(+), 58 deletions(-)

diff --git a/doc/guides/sample_app_ug/vhost.rst 
b/doc/guides/sample_app_ug/vhost.rst
index 9afde9c7f5..63dcf181e1 100644
--- a/doc/guides/sample_app_ug/vhost.rst
+++ b/doc/guides/sample_app_ug/vhost.rst
@@ -169,9 +169,12 @@ demonstrates how to use the async vhost APIs. It's used in 
combination with dmas
 **--dmas**
 This parameter is used to specify the assigned DMA device of a vhost device.
 Async vhost-user net driver will be used if --dmas is set. For example
---dmas [txd0@00:04.0,txd1@00:04.1] means use DMA channel 00:04.0 for vhost
-device 0 enqueue operation and use DMA channel 00:04.1 for vhost device 1
-enqueue operation.
+--dmas [txd0@00:04.0,txd1@00:04.1,rxd0@00:04.2,rxd1@00:04.3] means use
+DMA channel 00:04.0/00:04.2 for vhost device 0 enqueue/dequeue operation
+and use DMA channel 00:04.1/00:04.3 for vhost device 1 enqueue/dequeue
+operation. The index of the device corresponds to the socket file in order,
+that means vhost device 0 is created through the first socket file, vhost
+device 1 is created through the second socket file, and so on.
 
 Common Issues
 -------------
diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c
index bf4e033bdb..a305100b47 100644
--- a/examples/vhost/ioat.c
+++ b/examples/vhost/ioat.c
@@ -21,6 +21,8 @@ struct packet_tracker {
 
 struct packet_tracker cb_tracker[MAX_VHOST_DEVICE];
 
+int vid2socketid[MAX_VHOST_DEVICE];
+
 int
 open_ioat(const char *value)
 {
@@ -29,7 +31,7 @@ open_ioat(const char *value)
        char *addrs = input;
        char *ptrs[2];
        char *start, *end, *substr;
-       int64_t vid, vring_id;
+       int64_t socketid, vring_id;
        struct rte_ioat_rawdev_config config;
        struct rte_rawdev_info info = { .dev_private = &config };
        char name[32];
@@ -60,6 +62,8 @@ open_ioat(const char *value)
                goto out;
        }
        while (i < args_nr) {
+               char *txd, *rxd;
+               bool is_txd;
                char *arg_temp = dma_arg[i];
                uint8_t sub_nr;
                sub_nr = rte_strsplit(arg_temp, strlen(arg_temp), ptrs, 2, '@');
@@ -68,27 +72,38 @@ open_ioat(const char *value)
                        goto out;
                }
 
-               start = strstr(ptrs[0], "txd");
-               if (start == NULL) {
+               int async_flag;
+               txd = strstr(ptrs[0], "txd");
+               rxd = strstr(ptrs[0], "rxd");
+               if (txd == NULL && rxd == NULL) {
                        ret = -1;
                        goto out;
+               } else if (txd) {
+                       is_txd = true;
+                       start = txd;
+                       async_flag = ASYNC_RX_VHOST;
+               } else {
+                       is_txd = false;
+                       start = rxd;
+                       async_flag = ASYNC_TX_VHOST;
                }
 
                start += 3;
-               vid = strtol(start, &end, 0);
+               socketid = strtol(start, &end, 0);
                if (end == start) {
                        ret = -1;
                        goto out;
                }
 
-               vring_id = 0 + VIRTIO_RXQ;
+               vring_id = is_txd ? VIRTIO_RXQ : VIRTIO_TXQ;
+
                if (rte_pci_addr_parse(ptrs[1],
-                               &(dma_info + vid)->dmas[vring_id].addr) < 0) {
+                       &(dma_info + socketid)->dmas[vring_id].addr) < 0) {
                        ret = -1;
                        goto out;
                }
 
-               rte_pci_device_name(&(dma_info + vid)->dmas[vring_id].addr,
+               rte_pci_device_name(&(dma_info + socketid)->dmas[vring_id].addr,
                                name, sizeof(name));
                dev_id = rte_rawdev_get_dev_id(name);
                if (dev_id == (uint16_t)(-ENODEV) ||
@@ -103,8 +118,9 @@ open_ioat(const char *value)
                        goto out;
                }
 
-               (dma_info + vid)->dmas[vring_id].dev_id = dev_id;
-               (dma_info + vid)->dmas[vring_id].is_valid = true;
+               (dma_info + socketid)->dmas[vring_id].dev_id = dev_id;
+               (dma_info + socketid)->dmas[vring_id].is_valid = true;
+               (dma_info + socketid)->async_flag |= async_flag;
                config.ring_size = IOAT_RING_SIZE;
                config.hdls_disable = true;
                if (rte_rawdev_configure(dev_id, &info, sizeof(config)) < 0) {
@@ -126,13 +142,16 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
                struct rte_vhost_async_status *opaque_data, uint16_t count)
 {
        uint32_t i_desc;
-       uint16_t dev_id = dma_bind[vid].dmas[queue_id * 2 + VIRTIO_RXQ].dev_id;
        struct rte_vhost_iov_iter *src = NULL;
        struct rte_vhost_iov_iter *dst = NULL;
        unsigned long i_seg;
        unsigned short mask = MAX_ENQUEUED_SIZE - 1;
-       unsigned short write = cb_tracker[dev_id].next_write;
 
+       if (queue_id >= MAX_RING_COUNT)
+               return -1;
+
+       uint16_t dev_id = dma_bind[vid2socketid[vid]].dmas[queue_id].dev_id;
+       unsigned short write = cb_tracker[dev_id].next_write;
        if (!opaque_data) {
                for (i_desc = 0; i_desc < count; i_desc++) {
                        src = descs[i_desc].src;
@@ -170,16 +189,16 @@ ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
                struct rte_vhost_async_status *opaque_data,
                uint16_t max_packets)
 {
-       if (!opaque_data) {
+       if (!opaque_data && (queue_id < MAX_RING_COUNT)) {
                uintptr_t dump[255];
                int n_seg;
                unsigned short read, write;
                unsigned short nb_packet = 0;
                unsigned short mask = MAX_ENQUEUED_SIZE - 1;
                unsigned short i;
+               uint16_t dev_id;
 
-               uint16_t dev_id = dma_bind[vid].dmas[queue_id * 2
-                               + VIRTIO_RXQ].dev_id;
+               dev_id = dma_bind[vid2socketid[vid]].dmas[queue_id].dev_id;
                n_seg = rte_ioat_completed_ops(dev_id, 255, NULL, NULL, dump, 
dump);
                if (n_seg < 0) {
                        RTE_LOG(ERR,
@@ -215,4 +234,18 @@ ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
        return -1;
 }
 
+uint32_t get_async_flag_by_vid(int vid)
+{
+       return dma_bind[vid2socketid[vid]].async_flag;
+}
+
+uint32_t get_async_flag_by_socketid(int socketid)
+{
+       return dma_bind[socketid].async_flag;
+}
+
+void init_vid2socketid_array(int vid, int socketid)
+{
+       vid2socketid[vid] = socketid;
+}
 #endif /* RTE_RAW_IOAT */
diff --git a/examples/vhost/ioat.h b/examples/vhost/ioat.h
index 1aa28ed6a3..51111d65af 100644
--- a/examples/vhost/ioat.h
+++ b/examples/vhost/ioat.h
@@ -12,6 +12,9 @@
 #define MAX_VHOST_DEVICE 1024
 #define IOAT_RING_SIZE 4096
 #define MAX_ENQUEUED_SIZE 4096
+#define MAX_RING_COUNT 2
+#define ASYNC_RX_VHOST 1
+#define ASYNC_TX_VHOST 2
 
 struct dma_info {
        struct rte_pci_addr addr;
@@ -20,6 +23,7 @@ struct dma_info {
 };
 
 struct dma_for_vhost {
+       int async_flag;
        struct dma_info dmas[RTE_MAX_QUEUES_PER_PORT * 2];
        uint16_t nr;
 };
@@ -36,6 +40,10 @@ uint32_t
 ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
                struct rte_vhost_async_status *opaque_data,
                uint16_t max_packets);
+
+uint32_t get_async_flag_by_vid(int vid);
+uint32_t get_async_flag_by_socketid(int socketid);
+void init_vid2socketid_array(int vid, int socketid);
 #else
 static int open_ioat(const char *value __rte_unused)
 {
@@ -59,5 +67,22 @@ ioat_check_completed_copies_cb(int vid __rte_unused,
 {
        return -1;
 }
+
+static uint32_t
+get_async_flag_by_vid(int vid __rte_unused)
+{
+       return 0;
+}
+
+static uint32_t
+get_async_flag_by_socketid(int socketid __rte_unused)
+{
+       return 0;
+}
+
+static void
+init_vid2socketid_array(int vid __rte_unused, int socketid __rte_unused)
+{
+}
 #endif
 #endif /* _IOAT_H_ */
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index aebdc3a566..81d7e4cbd3 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -93,8 +93,6 @@ static int client_mode;
 
 static int builtin_net_driver;
 
-static int async_vhost_driver;
-
 static char *dma_type;
 
 /* Specify timeout (in useconds) between retries on RX. */
@@ -679,7 +677,6 @@ us_vhost_parse_args(int argc, char **argv)
                                us_vhost_usage(prgname);
                                return -1;
                        }
-                       async_vhost_driver = 1;
                        break;
 
                case OPT_CLIENT_NUM:
@@ -897,7 +894,7 @@ drain_vhost(struct vhost_dev *vdev)
                                __ATOMIC_SEQ_CST);
        }
 
-       if (!async_vhost_driver)
+       if ((get_async_flag_by_vid(vdev->vid) & ASYNC_RX_VHOST) == 0)
                free_pkts(m, nr_xmit);
 }
 
@@ -1237,10 +1234,19 @@ drain_eth_rx(struct vhost_dev *vdev)
                                __ATOMIC_SEQ_CST);
        }
 
-       if (!async_vhost_driver)
+       if ((get_async_flag_by_vid(vdev->vid) & ASYNC_RX_VHOST) == 0)
                free_pkts(pkts, rx_count);
 }
 
+uint16_t async_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
+                               struct rte_mempool *mbuf_pool,
+                               struct rte_mbuf **pkts, uint16_t count)
+{
+       int nr_inflight;
+       return rte_vhost_async_try_dequeue_burst(dev->vid, queue_id,
+                       mbuf_pool, pkts, count, &nr_inflight);
+}
+
 uint16_t sync_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
                        struct rte_mempool *mbuf_pool,
                        struct rte_mbuf **pkts, uint16_t count)
@@ -1392,12 +1398,90 @@ destroy_device(int vid)
                "(%d) device has been removed from data core\n",
                vdev->vid);
 
-       if (async_vhost_driver)
+       if (get_async_flag_by_vid(vid) & ASYNC_RX_VHOST)
                rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
+       if (get_async_flag_by_vid(vid) & ASYNC_TX_VHOST)
+               rte_vhost_async_channel_unregister(vid, VIRTIO_TXQ);
 
        rte_free(vdev);
 }
 
+static int
+get_socketid_by_vid(int vid)
+{
+       int i;
+       char ifname[PATH_MAX];
+       rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
+
+       for (i = 0; i < nb_sockets; i++) {
+               char *file = socket_files + i * PATH_MAX;
+               if (strcmp(file, ifname) == 0)
+                       return i;
+       }
+
+       return -1;
+}
+
+static int
+init_vhost_queue_ops(int vid)
+{
+       int socketid = get_socketid_by_vid(vid);
+       if (socketid == -1)
+               return -1;
+
+       init_vid2socketid_array(vid, socketid);
+       if (builtin_net_driver) {
+               vdev_queue_ops[vid].enqueue_pkt_burst = builtin_enqueue_pkts;
+               vdev_queue_ops[vid].dequeue_pkt_burst = builtin_dequeue_pkts;
+       } else {
+               if (get_async_flag_by_vid(vid) & ASYNC_RX_VHOST) {
+                       vdev_queue_ops[vid].enqueue_pkt_burst =
+                                               async_enqueue_pkts;
+               } else {
+                       vdev_queue_ops[vid].enqueue_pkt_burst =
+                                               sync_enqueue_pkts;
+               }
+
+               if (get_async_flag_by_vid(vid) & ASYNC_TX_VHOST) {
+                       vdev_queue_ops[vid].dequeue_pkt_burst =
+                                               async_dequeue_pkts;
+               } else {
+                       vdev_queue_ops[vid].dequeue_pkt_burst =
+                                               sync_dequeue_pkts;
+               }
+       }
+
+       return 0;
+}
+
+static int
+vhost_async_channel_register(int vid)
+{
+       int ret = 0;
+       struct rte_vhost_async_features f;
+       struct rte_vhost_async_channel_ops channel_ops;
+
+       if (dma_type != NULL && strncmp(dma_type, "ioat", 4) == 0) {
+               channel_ops.transfer_data = ioat_transfer_data_cb;
+               channel_ops.check_completed_copies =
+                       ioat_check_completed_copies_cb;
+
+               f.async_inorder = 1;
+               f.async_threshold = 256;
+
+               if (get_async_flag_by_vid(vid) & ASYNC_RX_VHOST) {
+                       ret |= rte_vhost_async_channel_register(vid, VIRTIO_RXQ,
+                                       f.intval, &channel_ops);
+               }
+               if (get_async_flag_by_vid(vid) & ASYNC_TX_VHOST) {
+                       ret |= rte_vhost_async_channel_register(vid, VIRTIO_TXQ,
+                                       f.intval, &channel_ops);
+               }
+       }
+
+       return ret;
+}
+
 /*
  * A new device is added to a data core. First the device is added to the main 
linked list
  * and then allocated to a specific data core.
@@ -1431,20 +1515,8 @@ new_device(int vid)
                }
        }
 
-       if (builtin_net_driver) {
-               vdev_queue_ops[vid].enqueue_pkt_burst = builtin_enqueue_pkts;
-               vdev_queue_ops[vid].dequeue_pkt_burst = builtin_dequeue_pkts;
-       } else {
-               if (async_vhost_driver) {
-                       vdev_queue_ops[vid].enqueue_pkt_burst =
-                                                       async_enqueue_pkts;
-               } else {
-                       vdev_queue_ops[vid].enqueue_pkt_burst =
-                                                       sync_enqueue_pkts;
-               }
-
-               vdev_queue_ops[vid].dequeue_pkt_burst = sync_dequeue_pkts;
-       }
+       if (init_vhost_queue_ops(vid) != 0)
+               return -1;
 
        if (builtin_net_driver)
                vs_vhost_net_setup(vdev);
@@ -1473,28 +1545,13 @@ new_device(int vid)
        rte_vhost_enable_guest_notification(vid, VIRTIO_RXQ, 0);
        rte_vhost_enable_guest_notification(vid, VIRTIO_TXQ, 0);
 
+       int ret = vhost_async_channel_register(vid);
+
        RTE_LOG(INFO, VHOST_DATA,
                "(%d) device has been added to data core %d\n",
                vid, vdev->coreid);
 
-       if (async_vhost_driver) {
-               struct rte_vhost_async_features f;
-               struct rte_vhost_async_channel_ops channel_ops;
-
-               if (dma_type != NULL && strncmp(dma_type, "ioat", 4) == 0) {
-                       channel_ops.transfer_data = ioat_transfer_data_cb;
-                       channel_ops.check_completed_copies =
-                               ioat_check_completed_copies_cb;
-
-                       f.async_inorder = 1;
-                       f.async_threshold = 256;
-
-                       return rte_vhost_async_channel_register(vid, VIRTIO_RXQ,
-                               f.intval, &channel_ops);
-               }
-       }
-
-       return 0;
+       return ret;
 }
 
 /*
@@ -1735,10 +1792,11 @@ main(int argc, char *argv[])
        for (i = 0; i < nb_sockets; i++) {
                char *file = socket_files + i * PATH_MAX;
 
-               if (async_vhost_driver)
-                       flags = flags | RTE_VHOST_USER_ASYNC_COPY;
+               uint64_t flag = flags;
+               if (get_async_flag_by_socketid(i) != 0)
+                       flag |= RTE_VHOST_USER_ASYNC_COPY;
 
-               ret = rte_vhost_driver_register(file, flags);
+               ret = rte_vhost_driver_register(file, flag);
                if (ret != 0) {
                        unregister_drivers(i);
                        rte_exit(EXIT_FAILURE,
-- 
2.25.1

Reply via email to