tx packets using GPU memory

eagostini Thu, 18 Nov 2021 02:46:01 -0800

From: Elena Agostini <eagost...@nvidia.com>

This patch introduces ethdev in test-gpudev app to provide:
- an example to show how GPU memory can be used to send and receive packets
- an useful tool to measure network metrics when using GPU memory with
io forwarding


With this feature test-gpudev can:
- RX packets in CPU or GPU memory
- Store packets in the gpudev communication list
- TX receive packets from the communication list

It's a simulation of a multi-core application.

Signed-off-by: Elena Agostini <eagost...@nvidia.com>
---
 app/test-gpudev/main.c | 477 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 458 insertions(+), 19 deletions(-)

diff --git a/app/test-gpudev/main.c b/app/test-gpudev/main.c
index 250fba6427..18de023208 100644
--- a/app/test-gpudev/main.c
+++ b/app/test-gpudev/main.c
@@ -10,6 +10,8 @@
 #include <stdarg.h>
 #include <errno.h>
 #include <getopt.h>
+#include <stdbool.h>
+#include <signal.h>
 
 #include <rte_common.h>
 #include <rte_malloc.h>
@@ -19,22 +21,90 @@
 #include <rte_ethdev.h>
 #include <rte_mempool.h>
 #include <rte_mbuf.h>
+#include <rte_launch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
 
 #include <rte_gpudev.h>
 
+#define GPU_PAGE_SHIFT   16
+#define GPU_PAGE_SIZE    (1UL << GPU_PAGE_SHIFT)
+#define GPU_PAGE_OFFSET  (GPU_PAGE_SIZE-1)
+#define GPU_PAGE_MASK    (~GPU_PAGE_OFFSET)
+
+#define MAX_QUEUES 16
+#define NUM_COMM_ITEMS 2048
+#define PKT_GAP 4
+
+// #define DEBUG_PRINT 1
+
 enum app_args {
        ARG_HELP,
-       ARG_MEMPOOL
+       ARG_BURST,
+       ARG_GPU,
+       ARG_MBUFD,
+       ARG_MEMORY,
+       ARG_QUEUES,
+       ARG_TESTAPI,
+};
+
+enum mem_type {
+       MEMORY_CPU,
+       MEMORY_GPU
+};
+
+/* Options configurable from cmd line */
+static uint32_t conf_burst = 64;
+static uint16_t conf_gpu_id = 0;
+static enum mem_type conf_mtype = MEMORY_CPU;
+static uint32_t conf_mbuf_dataroom = 2048;
+static uint32_t conf_queues = 1;
+static bool conf_testapi = false;
+static uint16_t conf_nb_descriptors = 2048;
+
+/* Options statically defined */
+static uint32_t conf_nb_mbuf = 16384;
+static uint16_t conf_port_id = 0;
+
+/* Other variables */
+static volatile bool force_quit;
+static struct rte_mempool *mpool;
+static struct rte_pktmbuf_extmem ext_mem;
+struct rte_gpu_comm_list *comm_list_fwd[MAX_QUEUES];
+struct rte_ether_addr port_eth_addr;
+static struct rte_eth_conf port_conf = {
+       .rxmode = {
+               .mq_mode = ETH_MQ_RX_RSS,
+               .split_hdr_size = 0,
+               .offloads = 0,
+       },
+       .txmode = {
+               .mq_mode = ETH_MQ_TX_NONE,
+               .offloads = 0,
+       },
+       .rx_adv_conf = {
+               .rss_conf = {
+                       .rss_key = NULL,
+                       .rss_hf = ETH_RSS_IP
+               },
+       },
 };
 
 static void
 usage(const char *prog_name)
 {
-       printf("%s [EAL options] --\n",
+       printf("%s [EAL options] --\n"
+               " --help\n"
+               " --burst N: number of packets per rx burst\n"
+               " --gpu N: GPU ID to use\n"
+               " --memory N: external mempool memory type, 0 CPU, 1 GPU\n"
+               " --mbufd N: mbuf dataroom size\n"
+               " --testapi: test gpudev function\n"
+               " --queues N: number of RX queues\n",
                prog_name);
 }
 
-static void
+static int
 args_parse(int argc, char **argv)
 {
        char **argvopt;
@@ -42,7 +112,19 @@ args_parse(int argc, char **argv)
        int opt_idx;
 
        static struct option lgopts[] = {
-               { "help", 0, 0, ARG_HELP},
+               { "help",  0, 0, ARG_HELP},
+               /* Packets per burst. */
+               { "burst",  1, 0, ARG_BURST},
+               /* GPU to use. */
+               { "gpu",  1, 0, ARG_GPU},
+               /* Type of memory for the mempool. */
+               { "memory",  1, 0, ARG_MEMORY},
+               /* Size of mbufs dataroom */
+               { "mbufd", 1, 0, ARG_MBUFD},
+               /* Number of RX queues */
+               { "queues", 1, 0, ARG_QUEUES},
+               /* Test only gpudev functions */
+               { "testapi", 0, 0, ARG_TESTAPI},
                /* End of options */
                { 0, 0, 0, 0 }
        };
@@ -51,6 +133,24 @@ args_parse(int argc, char **argv)
        while ((opt = getopt_long(argc, argvopt, "",
                                lgopts, &opt_idx)) != EOF) {
                switch (opt) {
+               case ARG_BURST:
+                       conf_burst = (uint32_t) atoi(optarg);
+                       break;
+               case ARG_GPU:
+                       conf_gpu_id = (uint16_t) atoi(optarg);
+                       break;
+               case ARG_MEMORY:
+                       conf_mtype = (atoi(optarg) == 1 ? MEMORY_GPU : 
MEMORY_CPU);
+                       break;
+               case ARG_MBUFD:
+                       conf_mbuf_dataroom = (uint32_t) atoi(optarg);
+                       break;
+               case ARG_QUEUES:
+                       conf_queues = (uint32_t) atoi(optarg);
+                       break;
+               case ARG_TESTAPI:
+                       conf_testapi = (atoi(optarg) == 1 ? true : false);
+                       break;
                case ARG_HELP:
                        usage(argv[0]);
                        break;
@@ -60,6 +160,19 @@ args_parse(int argc, char **argv)
                        break;
                }
        }
+
+       if (conf_queues > MAX_QUEUES) {
+               fprintf(stderr, "Can't support more than %d queues\n", 
MAX_QUEUES);
+               return -1;
+       }
+
+       if (conf_queues * 2 > rte_lcore_count()) {
+               fprintf(stderr, "Need to use at least %d cores to support %d 
RX/TX queues (EAL cores %d)\n",
+                               conf_queues * 2, conf_queues, 
rte_lcore_count());
+               return -1;
+       }
+
+       return 0;
 }
 
 static int
@@ -342,13 +455,130 @@ create_update_comm_list(uint16_t gpu_id)
        return -1;
 }
 
+static void
+signal_handler(int signum)
+{
+       if (signum == SIGINT || signum == SIGTERM) {
+               printf("\n\nSignal %d received, preparing to exit...\n",
+                               signum);
+               force_quit = true;
+       }
+}
+
+static int
+rx_core(__rte_unused void *arg)
+{
+       uint32_t queue_id;
+       uint32_t nb_rx = 0;
+       int ret = 0;
+       int comm_list_item = 0;
+       struct rte_mbuf *rx_mbufs[RTE_GPU_COMM_LIST_PKTS_MAX];
+
+       queue_id = (rte_lcore_index(rte_lcore_id()) - 1) / 2;
+
+       if (queue_id > conf_queues) {
+               fprintf(stderr, "Please specify the right list of cores (%d 
cores) in EAL params to support %d queues.\n",
+                               conf_queues*2, conf_queues);
+               RTE_GPU_VOLATILE(force_quit) = true;
+               return -1;
+       }
+
+       printf("RX core started on queue %d.\n", queue_id);
+
+       while (force_quit == false) {
+
+               nb_rx = 0;
+               while (nb_rx < RTE_GPU_COMM_LIST_PKTS_MAX &&
+                               nb_rx < (conf_burst - PKT_GAP) &&
+                               force_quit == false) {
+                       nb_rx += rte_eth_rx_burst(conf_port_id, queue_id,
+                                       &(rx_mbufs[nb_rx]),
+                                       (conf_burst - nb_rx));
+               }
+
+               ret = rte_gpu_comm_populate_list_pkts(
+                               &(comm_list_fwd[queue_id][comm_list_item]), 
rx_mbufs, nb_rx);
+               if (ret) {
+                       fprintf(stderr, "rte_gpu_comm_populate_list_pkts error 
%d.\n", ret);
+                       return -1;
+               }
+
+#ifdef DEBUG_PRINT
+               printf("RX %d pkts from item %d\n",
+                       comm_list_fwd[queue_id][comm_list_item].num_pkts,
+                       comm_list_item);
+#endif
+
+               
RTE_GPU_VOLATILE(comm_list_fwd[queue_id][comm_list_item].status) = 
RTE_GPU_COMM_LIST_DONE;
+
+               comm_list_item = (comm_list_item+1) % NUM_COMM_ITEMS;
+       }
+
+       return 0;
+}
+
+static int
+tx_core(__rte_unused void *arg)
+{
+       uint32_t queue_id = 0;
+       uint32_t nb_tx = 0;
+       int ret = 0;
+       int comm_list_item = 0;
+
+       queue_id = (rte_lcore_index(rte_lcore_id()) - 1) / 2;
+       if (queue_id > conf_queues) {
+               fprintf(stderr, "Please specify the right list of cores (%d 
cores) in EAL params to support %d queues.\n",
+                               conf_queues*2, conf_queues);
+               RTE_GPU_VOLATILE(force_quit) = true;
+               return -1;
+       }
+       printf("TX core started on queue %d.\n", queue_id);
+
+       while (force_quit == false) {
+
+#ifdef DEBUG_PRINT
+               printf("Waiting on item %d\n", comm_list_item);
+#endif
+               while 
(RTE_GPU_VOLATILE(comm_list_fwd[queue_id][comm_list_item].status) !=
+                               RTE_GPU_COMM_LIST_DONE && force_quit == false);
+
+               nb_tx = 0;
+               while (nb_tx < 
comm_list_fwd[queue_id][comm_list_item].num_pkts) {
+                       nb_tx += rte_eth_tx_burst(conf_port_id, queue_id,
+                                       
&(comm_list_fwd[queue_id][comm_list_item].mbufs[nb_tx]),
+                                       
comm_list_fwd[queue_id][comm_list_item].num_pkts - nb_tx);
+               }
+               rte_wmb();
+
+#ifdef DEBUG_PRINT
+               printf("TX %d/%d pkts from item %d\n",
+                               nb_tx, 
comm_list_fwd[queue_id][comm_list_item].num_pkts,
+                               comm_list_item);
+#endif
+               ret = 
rte_gpu_comm_cleanup_list(&(comm_list_fwd[queue_id][comm_list_item]));
+               if (ret) {
+                       fprintf(stderr, "rte_gpu_comm_cleanup_list error 
%d.\n", ret);
+                       return -1;
+               }
+
+               rte_mb();
+
+               comm_list_item = (comm_list_item+1) % NUM_COMM_ITEMS;
+       }
+
+       return 0;
+}
+
 int
 main(int argc, char **argv)
 {
-       int ret;
+       int ret, core_id;
        int nb_gpus = 0;
+       int nb_ports = 0;
        int16_t gpu_id = 0;
+       uint32_t idx_q = 0;
        struct rte_gpu_info ginfo;
+       struct rte_eth_dev_info dev_info;
 
        /* Init EAL. */
        ret = rte_eal_init(argc, argv);
@@ -356,8 +586,14 @@ main(int argc, char **argv)
                rte_exit(EXIT_FAILURE, "EAL init failed\n");
        argc -= ret;
        argv += ret;
-       if (argc > 1)
-               args_parse(argc, argv);
+       if (argc > 1) {
+               ret = args_parse(argc, argv);
+               if (ret) {
+                       fprintf(stderr, "Input args error.\n");
+                       goto exit;
+               }
+       }
+
        argc -= ret;
        argv += ret;
 
@@ -381,25 +617,228 @@ main(int argc, char **argv)
 
        if (nb_gpus == 0) {
                fprintf(stderr, "Need at least one GPU on the system to run the 
example\n");
-               return EXIT_FAILURE;
+               goto exit;
        }
 
-       gpu_id = 0;
+       if (nb_gpus < conf_gpu_id) {
+               fprintf(stderr, "Not enough GPUs in the system (%d / %d).\n", 
nb_gpus, conf_gpu_id);
+               goto exit;
+       }
 
-       /**
-        * Memory tests
-        */
-       alloc_gpu_memory(gpu_id);
-       register_cpu_memory(gpu_id);
+       if (conf_testapi == true) {
+               /* Memory tests */
+               alloc_gpu_memory(gpu_id);
+               register_cpu_memory(gpu_id);
 
-       /**
-        * Communication items test
-        */
-       create_update_comm_flag(gpu_id);
-       create_update_comm_list(gpu_id);
+               /* Communication items test */
+               create_update_comm_flag(gpu_id);
+               create_update_comm_list(gpu_id);
+
+               goto exit;
+       }
+
+       force_quit = false;
+       signal(SIGINT, signal_handler);
+       signal(SIGTERM, signal_handler);
+
+       nb_ports = rte_eth_dev_count_avail();
+       if (nb_ports == 0)
+               rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n");
+
+       ret = rte_eth_dev_info_get(conf_port_id, &dev_info);
+       if (ret) {
+               fprintf(stderr, "rte_eth_dev_info_get failed with %d.\n", ret);
+               goto exit;
+       }
+
+       /* Create external memory mempool. */
+       ext_mem.elt_size = conf_mbuf_dataroom + RTE_PKTMBUF_HEADROOM;
+       ext_mem.buf_len = RTE_ALIGN_CEIL(conf_nb_mbuf * ext_mem.elt_size, 
GPU_PAGE_SIZE);
+
+       if (conf_mtype == MEMORY_CPU) {
+               ext_mem.buf_ptr = rte_malloc("extmem", ext_mem.buf_len, 0);
+               if (ext_mem.buf_ptr == NULL) {
+                       fprintf(stderr, "Could not allocate CPU DPDK 
memory.\n");
+                       goto exit;
+               }
+
+               ret = rte_gpu_mem_register(conf_gpu_id, ext_mem.buf_len, 
ext_mem.buf_ptr);
+               if (ret < 0) {
+                       fprintf(stderr,
+                                       "rte_gpu_mem_register CPU memory 
returned error %d.\n", ret);
+                       return -1;
+               }
+       } else {
+               ext_mem.buf_iova = RTE_BAD_IOVA;
+
+               ext_mem.buf_ptr = rte_gpu_mem_alloc(conf_gpu_id, 
ext_mem.buf_len);
+               if (ext_mem.buf_ptr == NULL) {
+                       fprintf(stderr, "Could not allocate GPU device 
memory.\n");
+                       goto exit;
+               }
+
+               ret = rte_extmem_register(ext_mem.buf_ptr, ext_mem.buf_len,
+                               NULL, ext_mem.buf_iova, GPU_PAGE_SIZE);
+               if (ret) {
+                       fprintf(stderr, "Unable to register addr 0x%p, ret 
%d.\n", ext_mem.buf_ptr, ret);
+                       goto exit;
+               }
+       }
+
+       /* DMA map the external memory. */
+       ret = rte_dev_dma_map(dev_info.device, ext_mem.buf_ptr,
+                       ext_mem.buf_iova, ext_mem.buf_len);
+       if (ret) {
+               fprintf(stderr, "Could not DMA map EXT memory.\n");
+               goto exit;
+       }
+
+       /* Create external memory mempool. */
+       mpool = rte_pktmbuf_pool_create_extbuf("payload_mpool", conf_nb_mbuf,
+                       0, 0, ext_mem.elt_size,
+                       rte_socket_id(), &ext_mem, 1);
+       if (mpool == NULL) {
+               fprintf(stderr, "Could not create EXT memory mempool.\n");
+               goto exit;
+       }
+
+       /* Queues configuration. */
+       ret = rte_eth_dev_configure(conf_port_id, conf_queues,
+                       conf_queues, &port_conf);
+       if (ret < 0) {
+               fprintf(stderr,
+                               "Cannot configure device: err=%d, port=%u 
queues=%u\n",
+                               ret, conf_port_id, conf_queues);
+               goto exit;
+       }
+
+       ret = rte_eth_dev_adjust_nb_rx_tx_desc(conf_port_id,
+                       &conf_nb_descriptors, &conf_nb_descriptors);
+       if (ret) {
+               fprintf(stderr,
+                               "Cannot adjust number of descriptors: err=%d, 
port=%u\n",
+                               ret, conf_port_id);
+               goto exit;
+       }
+
+       for (idx_q = 0; idx_q < conf_queues; idx_q++) {
+
+               ret = rte_eth_rx_queue_setup(conf_port_id, idx_q,
+                               conf_nb_descriptors, 
rte_lcore_to_socket_id(idx_q),
+                               NULL, mpool);
+
+               if (ret) {
+                       fprintf(stderr, "rte_eth_rx_queue_setup: err=%d, 
port=%u\n",
+                                       ret, conf_port_id);
+                       goto exit;
+               }
+
+               ret = rte_eth_tx_queue_setup(conf_port_id, idx_q,
+                               conf_nb_descriptors, 
rte_lcore_to_socket_id(idx_q), NULL);
+               if (ret) {
+                       fprintf(stderr, "rte_eth_tx_queue_setup: err=%d, 
port=%u\n",
+                                       ret, conf_port_id);
+                       goto exit;
+               }
+       }
+
+       rte_eth_macaddr_get(conf_port_id, &port_eth_addr);
+
+       ret = rte_eth_dev_start(conf_port_id);
+       if (ret) {
+               fprintf(stderr, "rte_eth_dev_start: err=%d, port=%u\n",
+                               ret, conf_port_id);
+                       goto exit;
+       }
+
+       printf("Port %d: %02x:%02x:%02x:%02x:%02x:%02x started!\n",
+                               conf_port_id,
+                               (uint8_t)port_eth_addr.addr_bytes[0],
+                               (uint8_t)port_eth_addr.addr_bytes[1],
+                               port_eth_addr.addr_bytes[2],
+                               port_eth_addr.addr_bytes[3],
+                               port_eth_addr.addr_bytes[4],
+                               port_eth_addr.addr_bytes[5]);
+
+       rte_eth_promiscuous_enable(conf_port_id);
+
+       /* Create communication lists, one per queue. */
+       for (idx_q = 0; idx_q < MAX_QUEUES; idx_q++) {
+               comm_list_fwd[idx_q] = NULL;
+
+               if (idx_q < conf_queues) {
+                       comm_list_fwd[idx_q] = 
rte_gpu_comm_create_list(conf_gpu_id,
+                                       NUM_COMM_ITEMS);
+                       if (comm_list_fwd[idx_q] == NULL) {
+                               fprintf(stderr, "comm_create_list returned 
error %d\n",
+                                               ret);
+                               goto exit;
+                       }
+                       ret = 
rte_gpu_comm_cleanup_list(&(comm_list_fwd[idx_q][0]));
+                       if (ret < 0) {
+                               fprintf(stderr, "comm_cleanup_list returned 
error %d\n",
+                                               ret);
+                               goto exit;
+                       }
+               }
+       }
+
+       core_id = 0;
+       for (idx_q = 0; idx_q < conf_queues; idx_q++) {
+               core_id = rte_get_next_lcore(core_id, 1, 0);
+               rte_eal_remote_launch(tx_core, NULL, core_id);
+
+               core_id = rte_get_next_lcore(core_id, 1, 0);
+               rte_eal_remote_launch(rx_core, NULL, core_id);
+       }
+
+       core_id = 0;
+       RTE_LCORE_FOREACH_WORKER(core_id) {
+               if (rte_eal_wait_lcore(core_id) < 0) {
+                       fprintf(stderr, "bad exit for core %d.\n",
+                                       core_id);
+                       break;
+               }
+       }
+
+       force_quit = true;
+
+       ret = rte_dev_dma_unmap(dev_info.device, (void *)ext_mem.buf_ptr,
+                       RTE_BAD_IOVA, ext_mem.buf_len);
+       if (ret) {
+               fprintf(stderr,
+                               "rte_dev_dma_unmap 0x%p -> %d (rte_errno = 
%d)\n",
+                               (uint8_t *)ext_mem.buf_ptr, ret, rte_errno);
+               goto exit;
+       }
+
+       if (conf_mtype == MEMORY_CPU) {
+               ret = rte_gpu_mem_unregister(conf_gpu_id, ext_mem.buf_ptr);
+               if (ret < 0) {
+                       fprintf(stderr, "rte_gpu_mem_unregister returned error 
%d\n", ret);
+                       goto exit;
+               }
+
+               rte_free(ext_mem.buf_ptr);
+
+       } else {
+
+               ret = rte_extmem_unregister(ext_mem.buf_ptr, ext_mem.buf_len);
+               if (ret) {
+                       fprintf(stderr, "rte_extmem_unregister failed with 
%d.\n", ret);
+                       goto exit;
+               }
+
+               rte_gpu_mem_free(conf_gpu_id, (void *)ext_mem.buf_ptr);
+       }
+
+       rte_eth_dev_stop(conf_port_id);
+       rte_eth_dev_close(conf_port_id);
 
+exit:
        /* clean up the EAL */
        rte_eal_cleanup();
 
+       printf("Bye...\n");
        return EXIT_SUCCESS;
 }
-- 
2.17.1

[PATCH v2 1/1] app/test-gpudev: introduce ethdev to rx/tx packets using GPU memory

Reply via email to