From: Elena Agostini <eagost...@nvidia.com> This patch introduces ethdev in test-gpudev app to provide: - an example to show how GPU memory can be used to send and receive packets - an useful tool to measure network metrics when using GPU memory with io forwarding
With this feature test-gpudev can: - RX packets in CPU or GPU memory - Store packets in the gpudev communication list - TX receive packets from the communication list It's a simulation of a multi-core application. Signed-off-by: Elena Agostini <eagost...@nvidia.com> --- app/test-gpudev/main.c | 477 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 458 insertions(+), 19 deletions(-) diff --git a/app/test-gpudev/main.c b/app/test-gpudev/main.c index 250fba6427..18de023208 100644 --- a/app/test-gpudev/main.c +++ b/app/test-gpudev/main.c @@ -10,6 +10,8 @@ #include <stdarg.h> #include <errno.h> #include <getopt.h> +#include <stdbool.h> +#include <signal.h> #include <rte_common.h> #include <rte_malloc.h> @@ -19,22 +21,90 @@ #include <rte_ethdev.h> #include <rte_mempool.h> #include <rte_mbuf.h> +#include <rte_launch.h> +#include <rte_lcore.h> +#include <rte_per_lcore.h> #include <rte_gpudev.h> +#define GPU_PAGE_SHIFT 16 +#define GPU_PAGE_SIZE (1UL << GPU_PAGE_SHIFT) +#define GPU_PAGE_OFFSET (GPU_PAGE_SIZE-1) +#define GPU_PAGE_MASK (~GPU_PAGE_OFFSET) + +#define MAX_QUEUES 16 +#define NUM_COMM_ITEMS 2048 +#define PKT_GAP 4 + +// #define DEBUG_PRINT 1 + enum app_args { ARG_HELP, - ARG_MEMPOOL + ARG_BURST, + ARG_GPU, + ARG_MBUFD, + ARG_MEMORY, + ARG_QUEUES, + ARG_TESTAPI, +}; + +enum mem_type { + MEMORY_CPU, + MEMORY_GPU +}; + +/* Options configurable from cmd line */ +static uint32_t conf_burst = 64; +static uint16_t conf_gpu_id = 0; +static enum mem_type conf_mtype = MEMORY_CPU; +static uint32_t conf_mbuf_dataroom = 2048; +static uint32_t conf_queues = 1; +static bool conf_testapi = false; +static uint16_t conf_nb_descriptors = 2048; + +/* Options statically defined */ +static uint32_t conf_nb_mbuf = 16384; +static uint16_t conf_port_id = 0; + +/* Other variables */ +static volatile bool force_quit; +static struct rte_mempool *mpool; +static struct rte_pktmbuf_extmem ext_mem; +struct rte_gpu_comm_list *comm_list_fwd[MAX_QUEUES]; +struct rte_ether_addr port_eth_addr; +static struct rte_eth_conf port_conf = { + .rxmode = { + .mq_mode = ETH_MQ_RX_RSS, + .split_hdr_size = 0, + .offloads = 0, + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + .offloads = 0, + }, + .rx_adv_conf = { + .rss_conf = { + .rss_key = NULL, + .rss_hf = ETH_RSS_IP + }, + }, }; static void usage(const char *prog_name) { - printf("%s [EAL options] --\n", + printf("%s [EAL options] --\n" + " --help\n" + " --burst N: number of packets per rx burst\n" + " --gpu N: GPU ID to use\n" + " --memory N: external mempool memory type, 0 CPU, 1 GPU\n" + " --mbufd N: mbuf dataroom size\n" + " --testapi: test gpudev function\n" + " --queues N: number of RX queues\n", prog_name); } -static void +static int args_parse(int argc, char **argv) { char **argvopt; @@ -42,7 +112,19 @@ args_parse(int argc, char **argv) int opt_idx; static struct option lgopts[] = { - { "help", 0, 0, ARG_HELP}, + { "help", 0, 0, ARG_HELP}, + /* Packets per burst. */ + { "burst", 1, 0, ARG_BURST}, + /* GPU to use. */ + { "gpu", 1, 0, ARG_GPU}, + /* Type of memory for the mempool. */ + { "memory", 1, 0, ARG_MEMORY}, + /* Size of mbufs dataroom */ + { "mbufd", 1, 0, ARG_MBUFD}, + /* Number of RX queues */ + { "queues", 1, 0, ARG_QUEUES}, + /* Test only gpudev functions */ + { "testapi", 0, 0, ARG_TESTAPI}, /* End of options */ { 0, 0, 0, 0 } }; @@ -51,6 +133,24 @@ args_parse(int argc, char **argv) while ((opt = getopt_long(argc, argvopt, "", lgopts, &opt_idx)) != EOF) { switch (opt) { + case ARG_BURST: + conf_burst = (uint32_t) atoi(optarg); + break; + case ARG_GPU: + conf_gpu_id = (uint16_t) atoi(optarg); + break; + case ARG_MEMORY: + conf_mtype = (atoi(optarg) == 1 ? MEMORY_GPU : MEMORY_CPU); + break; + case ARG_MBUFD: + conf_mbuf_dataroom = (uint32_t) atoi(optarg); + break; + case ARG_QUEUES: + conf_queues = (uint32_t) atoi(optarg); + break; + case ARG_TESTAPI: + conf_testapi = (atoi(optarg) == 1 ? true : false); + break; case ARG_HELP: usage(argv[0]); break; @@ -60,6 +160,19 @@ args_parse(int argc, char **argv) break; } } + + if (conf_queues > MAX_QUEUES) { + fprintf(stderr, "Can't support more than %d queues\n", MAX_QUEUES); + return -1; + } + + if (conf_queues * 2 > rte_lcore_count()) { + fprintf(stderr, "Need to use at least %d cores to support %d RX/TX queues (EAL cores %d)\n", + conf_queues * 2, conf_queues, rte_lcore_count()); + return -1; + } + + return 0; } static int @@ -342,13 +455,130 @@ create_update_comm_list(uint16_t gpu_id) return -1; } +static void +signal_handler(int signum) +{ + if (signum == SIGINT || signum == SIGTERM) { + printf("\n\nSignal %d received, preparing to exit...\n", + signum); + force_quit = true; + } +} + +static int +rx_core(__rte_unused void *arg) +{ + uint32_t queue_id; + uint32_t nb_rx = 0; + int ret = 0; + int comm_list_item = 0; + struct rte_mbuf *rx_mbufs[RTE_GPU_COMM_LIST_PKTS_MAX]; + + queue_id = (rte_lcore_index(rte_lcore_id()) - 1) / 2; + + if (queue_id > conf_queues) { + fprintf(stderr, "Please specify the right list of cores (%d cores) in EAL params to support %d queues.\n", + conf_queues*2, conf_queues); + RTE_GPU_VOLATILE(force_quit) = true; + return -1; + } + + printf("RX core started on queue %d.\n", queue_id); + + while (force_quit == false) { + + nb_rx = 0; + while (nb_rx < RTE_GPU_COMM_LIST_PKTS_MAX && + nb_rx < (conf_burst - PKT_GAP) && + force_quit == false) { + nb_rx += rte_eth_rx_burst(conf_port_id, queue_id, + &(rx_mbufs[nb_rx]), + (conf_burst - nb_rx)); + } + + ret = rte_gpu_comm_populate_list_pkts( + &(comm_list_fwd[queue_id][comm_list_item]), rx_mbufs, nb_rx); + if (ret) { + fprintf(stderr, "rte_gpu_comm_populate_list_pkts error %d.\n", ret); + return -1; + } + +#ifdef DEBUG_PRINT + printf("RX %d pkts from item %d\n", + comm_list_fwd[queue_id][comm_list_item].num_pkts, + comm_list_item); +#endif + + RTE_GPU_VOLATILE(comm_list_fwd[queue_id][comm_list_item].status) = RTE_GPU_COMM_LIST_DONE; + + comm_list_item = (comm_list_item+1) % NUM_COMM_ITEMS; + } + + return 0; +} + +static int +tx_core(__rte_unused void *arg) +{ + uint32_t queue_id = 0; + uint32_t nb_tx = 0; + int ret = 0; + int comm_list_item = 0; + + queue_id = (rte_lcore_index(rte_lcore_id()) - 1) / 2; + if (queue_id > conf_queues) { + fprintf(stderr, "Please specify the right list of cores (%d cores) in EAL params to support %d queues.\n", + conf_queues*2, conf_queues); + RTE_GPU_VOLATILE(force_quit) = true; + return -1; + } + printf("TX core started on queue %d.\n", queue_id); + + while (force_quit == false) { + +#ifdef DEBUG_PRINT + printf("Waiting on item %d\n", comm_list_item); +#endif + while (RTE_GPU_VOLATILE(comm_list_fwd[queue_id][comm_list_item].status) != + RTE_GPU_COMM_LIST_DONE && force_quit == false); + + nb_tx = 0; + while (nb_tx < comm_list_fwd[queue_id][comm_list_item].num_pkts) { + nb_tx += rte_eth_tx_burst(conf_port_id, queue_id, + &(comm_list_fwd[queue_id][comm_list_item].mbufs[nb_tx]), + comm_list_fwd[queue_id][comm_list_item].num_pkts - nb_tx); + } + rte_wmb(); + +#ifdef DEBUG_PRINT + printf("TX %d/%d pkts from item %d\n", + nb_tx, comm_list_fwd[queue_id][comm_list_item].num_pkts, + comm_list_item); +#endif + ret = rte_gpu_comm_cleanup_list(&(comm_list_fwd[queue_id][comm_list_item])); + if (ret) { + fprintf(stderr, "rte_gpu_comm_cleanup_list error %d.\n", ret); + return -1; + } + + rte_mb(); + + comm_list_item = (comm_list_item+1) % NUM_COMM_ITEMS; + } + + return 0; +} + int main(int argc, char **argv) { - int ret; + int ret, core_id; int nb_gpus = 0; + int nb_ports = 0; int16_t gpu_id = 0; + uint32_t idx_q = 0; struct rte_gpu_info ginfo; + struct rte_eth_dev_info dev_info; /* Init EAL. */ ret = rte_eal_init(argc, argv); @@ -356,8 +586,14 @@ main(int argc, char **argv) rte_exit(EXIT_FAILURE, "EAL init failed\n"); argc -= ret; argv += ret; - if (argc > 1) - args_parse(argc, argv); + if (argc > 1) { + ret = args_parse(argc, argv); + if (ret) { + fprintf(stderr, "Input args error.\n"); + goto exit; + } + } + argc -= ret; argv += ret; @@ -381,25 +617,228 @@ main(int argc, char **argv) if (nb_gpus == 0) { fprintf(stderr, "Need at least one GPU on the system to run the example\n"); - return EXIT_FAILURE; + goto exit; } - gpu_id = 0; + if (nb_gpus < conf_gpu_id) { + fprintf(stderr, "Not enough GPUs in the system (%d / %d).\n", nb_gpus, conf_gpu_id); + goto exit; + } - /** - * Memory tests - */ - alloc_gpu_memory(gpu_id); - register_cpu_memory(gpu_id); + if (conf_testapi == true) { + /* Memory tests */ + alloc_gpu_memory(gpu_id); + register_cpu_memory(gpu_id); - /** - * Communication items test - */ - create_update_comm_flag(gpu_id); - create_update_comm_list(gpu_id); + /* Communication items test */ + create_update_comm_flag(gpu_id); + create_update_comm_list(gpu_id); + + goto exit; + } + + force_quit = false; + signal(SIGINT, signal_handler); + signal(SIGTERM, signal_handler); + + nb_ports = rte_eth_dev_count_avail(); + if (nb_ports == 0) + rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n"); + + ret = rte_eth_dev_info_get(conf_port_id, &dev_info); + if (ret) { + fprintf(stderr, "rte_eth_dev_info_get failed with %d.\n", ret); + goto exit; + } + + /* Create external memory mempool. */ + ext_mem.elt_size = conf_mbuf_dataroom + RTE_PKTMBUF_HEADROOM; + ext_mem.buf_len = RTE_ALIGN_CEIL(conf_nb_mbuf * ext_mem.elt_size, GPU_PAGE_SIZE); + + if (conf_mtype == MEMORY_CPU) { + ext_mem.buf_ptr = rte_malloc("extmem", ext_mem.buf_len, 0); + if (ext_mem.buf_ptr == NULL) { + fprintf(stderr, "Could not allocate CPU DPDK memory.\n"); + goto exit; + } + + ret = rte_gpu_mem_register(conf_gpu_id, ext_mem.buf_len, ext_mem.buf_ptr); + if (ret < 0) { + fprintf(stderr, + "rte_gpu_mem_register CPU memory returned error %d.\n", ret); + return -1; + } + } else { + ext_mem.buf_iova = RTE_BAD_IOVA; + + ext_mem.buf_ptr = rte_gpu_mem_alloc(conf_gpu_id, ext_mem.buf_len); + if (ext_mem.buf_ptr == NULL) { + fprintf(stderr, "Could not allocate GPU device memory.\n"); + goto exit; + } + + ret = rte_extmem_register(ext_mem.buf_ptr, ext_mem.buf_len, + NULL, ext_mem.buf_iova, GPU_PAGE_SIZE); + if (ret) { + fprintf(stderr, "Unable to register addr 0x%p, ret %d.\n", ext_mem.buf_ptr, ret); + goto exit; + } + } + + /* DMA map the external memory. */ + ret = rte_dev_dma_map(dev_info.device, ext_mem.buf_ptr, + ext_mem.buf_iova, ext_mem.buf_len); + if (ret) { + fprintf(stderr, "Could not DMA map EXT memory.\n"); + goto exit; + } + + /* Create external memory mempool. */ + mpool = rte_pktmbuf_pool_create_extbuf("payload_mpool", conf_nb_mbuf, + 0, 0, ext_mem.elt_size, + rte_socket_id(), &ext_mem, 1); + if (mpool == NULL) { + fprintf(stderr, "Could not create EXT memory mempool.\n"); + goto exit; + } + + /* Queues configuration. */ + ret = rte_eth_dev_configure(conf_port_id, conf_queues, + conf_queues, &port_conf); + if (ret < 0) { + fprintf(stderr, + "Cannot configure device: err=%d, port=%u queues=%u\n", + ret, conf_port_id, conf_queues); + goto exit; + } + + ret = rte_eth_dev_adjust_nb_rx_tx_desc(conf_port_id, + &conf_nb_descriptors, &conf_nb_descriptors); + if (ret) { + fprintf(stderr, + "Cannot adjust number of descriptors: err=%d, port=%u\n", + ret, conf_port_id); + goto exit; + } + + for (idx_q = 0; idx_q < conf_queues; idx_q++) { + + ret = rte_eth_rx_queue_setup(conf_port_id, idx_q, + conf_nb_descriptors, rte_lcore_to_socket_id(idx_q), + NULL, mpool); + + if (ret) { + fprintf(stderr, "rte_eth_rx_queue_setup: err=%d, port=%u\n", + ret, conf_port_id); + goto exit; + } + + ret = rte_eth_tx_queue_setup(conf_port_id, idx_q, + conf_nb_descriptors, rte_lcore_to_socket_id(idx_q), NULL); + if (ret) { + fprintf(stderr, "rte_eth_tx_queue_setup: err=%d, port=%u\n", + ret, conf_port_id); + goto exit; + } + } + + rte_eth_macaddr_get(conf_port_id, &port_eth_addr); + + ret = rte_eth_dev_start(conf_port_id); + if (ret) { + fprintf(stderr, "rte_eth_dev_start: err=%d, port=%u\n", + ret, conf_port_id); + goto exit; + } + + printf("Port %d: %02x:%02x:%02x:%02x:%02x:%02x started!\n", + conf_port_id, + (uint8_t)port_eth_addr.addr_bytes[0], + (uint8_t)port_eth_addr.addr_bytes[1], + port_eth_addr.addr_bytes[2], + port_eth_addr.addr_bytes[3], + port_eth_addr.addr_bytes[4], + port_eth_addr.addr_bytes[5]); + + rte_eth_promiscuous_enable(conf_port_id); + + /* Create communication lists, one per queue. */ + for (idx_q = 0; idx_q < MAX_QUEUES; idx_q++) { + comm_list_fwd[idx_q] = NULL; + + if (idx_q < conf_queues) { + comm_list_fwd[idx_q] = rte_gpu_comm_create_list(conf_gpu_id, + NUM_COMM_ITEMS); + if (comm_list_fwd[idx_q] == NULL) { + fprintf(stderr, "comm_create_list returned error %d\n", + ret); + goto exit; + } + ret = rte_gpu_comm_cleanup_list(&(comm_list_fwd[idx_q][0])); + if (ret < 0) { + fprintf(stderr, "comm_cleanup_list returned error %d\n", + ret); + goto exit; + } + } + } + + core_id = 0; + for (idx_q = 0; idx_q < conf_queues; idx_q++) { + core_id = rte_get_next_lcore(core_id, 1, 0); + rte_eal_remote_launch(tx_core, NULL, core_id); + + core_id = rte_get_next_lcore(core_id, 1, 0); + rte_eal_remote_launch(rx_core, NULL, core_id); + } + + core_id = 0; + RTE_LCORE_FOREACH_WORKER(core_id) { + if (rte_eal_wait_lcore(core_id) < 0) { + fprintf(stderr, "bad exit for core %d.\n", + core_id); + break; + } + } + + force_quit = true; + + ret = rte_dev_dma_unmap(dev_info.device, (void *)ext_mem.buf_ptr, + RTE_BAD_IOVA, ext_mem.buf_len); + if (ret) { + fprintf(stderr, + "rte_dev_dma_unmap 0x%p -> %d (rte_errno = %d)\n", + (uint8_t *)ext_mem.buf_ptr, ret, rte_errno); + goto exit; + } + + if (conf_mtype == MEMORY_CPU) { + ret = rte_gpu_mem_unregister(conf_gpu_id, ext_mem.buf_ptr); + if (ret < 0) { + fprintf(stderr, "rte_gpu_mem_unregister returned error %d\n", ret); + goto exit; + } + + rte_free(ext_mem.buf_ptr); + + } else { + + ret = rte_extmem_unregister(ext_mem.buf_ptr, ext_mem.buf_len); + if (ret) { + fprintf(stderr, "rte_extmem_unregister failed with %d.\n", ret); + goto exit; + } + + rte_gpu_mem_free(conf_gpu_id, (void *)ext_mem.buf_ptr); + } + + rte_eth_dev_stop(conf_port_id); + rte_eth_dev_close(conf_port_id); +exit: /* clean up the EAL */ rte_eal_cleanup(); + printf("Bye...\n"); return EXIT_SUCCESS; } -- 2.17.1