From: Tomasz Jozwiak <tomaszx.jozw...@intel.com> This patch adds a template functions for multi-cores performance version of compress-perf-tool and updates release notes for 19.08
Signed-off-by: Tomasz Jozwiak <tjozwia...@gmail.com> Signed-off-by: Tomasz Jozwiak <tomaszx.jozw...@intel.com> --- app/test-compress-perf/Makefile | 3 +- app/test-compress-perf/comp_perf.h | 50 ++ app/test-compress-perf/comp_perf_options.h | 44 +- app/test-compress-perf/comp_perf_options_parse.c | 24 +- app/test-compress-perf/comp_perf_test_common.c | 284 +++++++++++ app/test-compress-perf/comp_perf_test_common.h | 41 ++ app/test-compress-perf/main.c | 599 +++++++++-------------- app/test-compress-perf/meson.build | 3 +- doc/guides/rel_notes/release_19_08.rst | 3 + 9 files changed, 649 insertions(+), 402 deletions(-) create mode 100644 app/test-compress-perf/comp_perf.h create mode 100644 app/test-compress-perf/comp_perf_test_common.c create mode 100644 app/test-compress-perf/comp_perf_test_common.h diff --git a/app/test-compress-perf/Makefile b/app/test-compress-perf/Makefile index d20e17e..de74129 100644 --- a/app/test-compress-perf/Makefile +++ b/app/test-compress-perf/Makefile @@ -12,7 +12,6 @@ CFLAGS += -O3 # all source are stored in SRCS-y SRCS-y := main.c SRCS-y += comp_perf_options_parse.c -SRCS-y += comp_perf_test_verify.c -SRCS-y += comp_perf_test_benchmark.c +SRCS-y += comp_perf_test_common.c include $(RTE_SDK)/mk/rte.app.mk diff --git a/app/test-compress-perf/comp_perf.h b/app/test-compress-perf/comp_perf.h new file mode 100644 index 0000000..57289b0 --- /dev/null +++ b/app/test-compress-perf/comp_perf.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2019 Intel Corporation + */ + +#ifndef _COMP_PERF_ +#define _COMP_PERF_ + +#include <rte_mempool.h> + +struct comp_test_data; + +typedef void *(*cperf_constructor_t)( + uint8_t dev_id, + uint16_t qp_id, + struct comp_test_data *options); + +typedef int (*cperf_runner_t)(void *test_ctx); +typedef void (*cperf_destructor_t)(void *test_ctx); + +struct cperf_test { + cperf_constructor_t constructor; + cperf_runner_t runner; + cperf_destructor_t destructor; +}; + +/* Needed for weak functions*/ + +void * +cperf_benchmark_test_constructor(uint8_t dev_id __rte_unused, + uint16_t qp_id __rte_unused, + struct comp_test_data *options __rte_unused); + +void +cperf_benchmark_test_destructor(void *arg __rte_unused); + +int +cperf_benchmark_test_runner(void *test_ctx __rte_unused); + +void * +cperf_verify_test_constructor(uint8_t dev_id __rte_unused, + uint16_t qp_id __rte_unused, + struct comp_test_data *options __rte_unused); + +void +cperf_verify_test_destructor(void *arg __rte_unused); + +int +cperf_verify_test_runner(void *test_ctx __rte_unused); + +#endif /* _COMP_PERF_ */ diff --git a/app/test-compress-perf/comp_perf_options.h b/app/test-compress-perf/comp_perf_options.h index f87751d..0aa29a5 100644 --- a/app/test-compress-perf/comp_perf_options.h +++ b/app/test-compress-perf/comp_perf_options.h @@ -13,6 +13,23 @@ #define MAX_MBUF_DATA_SIZE (UINT16_MAX - RTE_PKTMBUF_HEADROOM) #define MAX_SEG_SIZE ((int)(MAX_MBUF_DATA_SIZE / EXPANSE_RATIO)) +extern const char *cperf_test_type_strs[]; + +/* Cleanup state machine */ +enum cleanup_st { + ST_CLEAR = 0, + ST_TEST_DATA, + ST_COMPDEV, + ST_INPUT_DATA, + ST_MEMORY_ALLOC, + ST_DURING_TEST +}; + +enum cperf_perf_test_type { + CPERF_TEST_TYPE_BENCHMARK, + CPERF_TEST_TYPE_VERIFY +}; + enum comp_operation { COMPRESS_ONLY, DECOMPRESS_ONLY, @@ -30,37 +47,26 @@ struct range_list { struct comp_test_data { char driver_name[64]; char input_file[64]; - struct rte_mbuf **comp_bufs; - struct rte_mbuf **decomp_bufs; - uint32_t total_bufs; + enum cperf_perf_test_type test; + uint8_t *input_data; size_t input_data_sz; - uint8_t *compressed_data; - uint8_t *decompressed_data; - struct rte_mempool *comp_buf_pool; - struct rte_mempool *decomp_buf_pool; - struct rte_mempool *op_pool; - int8_t cdev_id; + uint16_t nb_qps; uint16_t seg_sz; uint16_t out_seg_sz; uint16_t burst_sz; uint32_t pool_sz; uint32_t num_iter; uint16_t max_sgl_segs; + enum rte_comp_huffman huffman_enc; enum comp_operation test_op; int window_sz; - struct range_list level; - /* Store TSC duration for all levels (including level 0) */ - uint64_t comp_tsc_duration[RTE_COMP_LEVEL_MAX + 1]; - uint64_t decomp_tsc_duration[RTE_COMP_LEVEL_MAX + 1]; - size_t comp_data_sz; - size_t decomp_data_sz; + struct range_list level_lst; + uint8_t level; + double ratio; - double comp_gbps; - double decomp_gbps; - double comp_tsc_byte; - double decomp_tsc_byte; + enum cleanup_st cleanup; }; int diff --git a/app/test-compress-perf/comp_perf_options_parse.c b/app/test-compress-perf/comp_perf_options_parse.c index 2fb6fb4..bc4b98a 100644 --- a/app/test-compress-perf/comp_perf_options_parse.c +++ b/app/test-compress-perf/comp_perf_options_parse.c @@ -466,19 +466,20 @@ parse_level(struct comp_test_data *test_data, const char *arg) * Try parsing the argument as a range, if it fails, * arse it as a list */ - if (parse_range(arg, &test_data->level.min, &test_data->level.max, - &test_data->level.inc) < 0) { - ret = parse_list(arg, test_data->level.list, - &test_data->level.min, - &test_data->level.max); + if (parse_range(arg, &test_data->level_lst.min, + &test_data->level_lst.max, + &test_data->level_lst.inc) < 0) { + ret = parse_list(arg, test_data->level_lst.list, + &test_data->level_lst.min, + &test_data->level_lst.max); if (ret < 0) { RTE_LOG(ERR, USER1, "Failed to parse compression level/s\n"); return -1; } - test_data->level.count = ret; + test_data->level_lst.count = ret; - if (test_data->level.max > RTE_COMP_LEVEL_MAX) { + if (test_data->level_lst.max > RTE_COMP_LEVEL_MAX) { RTE_LOG(ERR, USER1, "Level cannot be higher than %u\n", RTE_COMP_LEVEL_MAX); return -1; @@ -498,7 +499,6 @@ struct long_opt_parser { }; static struct option lgopts[] = { - { CPERF_DRIVER_NAME, required_argument, 0, 0 }, { CPERF_TEST_FILE, required_argument, 0, 0 }, { CPERF_SEG_SIZE, required_argument, 0, 0 }, @@ -572,7 +572,6 @@ comp_perf_options_parse(struct comp_test_data *test_data, int argc, char **argv) void comp_perf_options_default(struct comp_test_data *test_data) { - test_data->cdev_id = -1; test_data->seg_sz = 2048; test_data->burst_sz = 32; test_data->pool_sz = 8192; @@ -581,9 +580,10 @@ comp_perf_options_default(struct comp_test_data *test_data) test_data->huffman_enc = RTE_COMP_HUFFMAN_DYNAMIC; test_data->test_op = COMPRESS_DECOMPRESS; test_data->window_sz = -1; - test_data->level.min = 1; - test_data->level.max = 9; - test_data->level.inc = 1; + test_data->level_lst.min = 1; + test_data->level_lst.max = 9; + test_data->level_lst.inc = 1; + test_data->test = CPERF_TEST_TYPE_BENCHMARK; } int diff --git a/app/test-compress-perf/comp_perf_test_common.c b/app/test-compress-perf/comp_perf_test_common.c new file mode 100644 index 0000000..dc9d0b0 --- /dev/null +++ b/app/test-compress-perf/comp_perf_test_common.c @@ -0,0 +1,284 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2019 Intel Corporation + */ + +#include <rte_malloc.h> +#include <rte_eal.h> +#include <rte_log.h> +#include <rte_compressdev.h> + +#include "comp_perf_options.h" +#include "comp_perf_test_verify.h" +#include "comp_perf_test_benchmark.h" +#include "comp_perf.h" +#include "comp_perf_test_common.h" + +#define DIV_CEIL(a, b) ((a) / (b) + ((a) % (b) != 0)) + +int +param_range_check(uint16_t size, const struct rte_param_log2_range *range) +{ + unsigned int next_size; + + /* Check lower/upper bounds */ + if (size < range->min) + return -1; + + if (size > range->max) + return -1; + + /* If range is actually only one value, size is correct */ + if (range->increment == 0) + return 0; + + /* Check if value is one of the supported sizes */ + for (next_size = range->min; next_size <= range->max; + next_size += range->increment) + if (size == next_size) + return 0; + + return -1; +} + +static uint32_t +find_buf_size(uint32_t input_size) +{ + uint32_t i; + + /* From performance point of view the buffer size should be a + * power of 2 but also should be enough to store incompressible data + */ + + /* We're looking for nearest power of 2 buffer size, which is greather + * than input_size + */ + uint32_t size = + !input_size ? MIN_COMPRESSED_BUF_SIZE : (input_size << 1); + + for (i = UINT16_MAX + 1; !(i & size); i >>= 1) + ; + + return i > ((UINT16_MAX + 1) >> 1) + ? (uint32_t)((float)input_size * EXPANSE_RATIO) + : i; +} + +void +comp_perf_free_memory(struct cperf_mem_resources *mem) +{ + uint32_t i; + + for (i = 0; i < mem->total_bufs; i++) { + rte_pktmbuf_free(mem->comp_bufs[i]); + rte_pktmbuf_free(mem->decomp_bufs[i]); + } + + rte_free(mem->decomp_bufs); + rte_free(mem->comp_bufs); + rte_free(mem->decompressed_data); + rte_free(mem->compressed_data); + rte_mempool_free(mem->op_pool); + rte_mempool_free(mem->decomp_buf_pool); + rte_mempool_free(mem->comp_buf_pool); +} + +int +comp_perf_allocate_memory(struct comp_test_data *test_data, + struct cperf_mem_resources *mem) +{ + test_data->out_seg_sz = find_buf_size(test_data->seg_sz); + /* Number of segments for input and output + * (compression and decompression) + */ + uint32_t total_segs = DIV_CEIL(test_data->input_data_sz, + test_data->seg_sz); + char pool_name[32] = ""; + + snprintf(pool_name, sizeof(pool_name), "comp_buf_pool_%u_qp_%u", + mem->dev_id, mem->qp_id); + mem->comp_buf_pool = rte_pktmbuf_pool_create(pool_name, + total_segs, + 0, 0, + test_data->out_seg_sz + RTE_PKTMBUF_HEADROOM, + rte_socket_id()); + if (mem->comp_buf_pool == NULL) { + RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n"); + return -1; + } + + snprintf(pool_name, sizeof(pool_name), "decomp_buf_pool_%u_qp_%u", + mem->dev_id, mem->qp_id); + mem->decomp_buf_pool = rte_pktmbuf_pool_create(pool_name, + total_segs, + 0, 0, test_data->seg_sz + RTE_PKTMBUF_HEADROOM, + rte_socket_id()); + if (mem->decomp_buf_pool == NULL) { + RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n"); + return -1; + } + + mem->total_bufs = DIV_CEIL(total_segs, test_data->max_sgl_segs); + + snprintf(pool_name, sizeof(pool_name), "op_pool_%u_qp_%u", + mem->dev_id, mem->qp_id); + mem->op_pool = rte_comp_op_pool_create(pool_name, + mem->total_bufs, + 0, 0, rte_socket_id()); + if (mem->op_pool == NULL) { + RTE_LOG(ERR, USER1, "Comp op mempool could not be created\n"); + return -1; + } + + /* + * Compressed data might be a bit larger than input data, + * if data cannot be compressed + */ + mem->compressed_data = rte_zmalloc_socket(NULL, + test_data->input_data_sz * EXPANSE_RATIO + + MIN_COMPRESSED_BUF_SIZE, 0, + rte_socket_id()); + if (mem->compressed_data == NULL) { + RTE_LOG(ERR, USER1, "Memory to hold the data from the input " + "file could not be allocated\n"); + return -1; + } + + mem->decompressed_data = rte_zmalloc_socket(NULL, + test_data->input_data_sz, 0, + rte_socket_id()); + if (mem->decompressed_data == NULL) { + RTE_LOG(ERR, USER1, "Memory to hold the data from the input " + "file could not be allocated\n"); + return -1; + } + + mem->comp_bufs = rte_zmalloc_socket(NULL, + mem->total_bufs * sizeof(struct rte_mbuf *), + 0, rte_socket_id()); + if (mem->comp_bufs == NULL) { + RTE_LOG(ERR, USER1, "Memory to hold the compression mbufs" + " could not be allocated\n"); + return -1; + } + + mem->decomp_bufs = rte_zmalloc_socket(NULL, + mem->total_bufs * sizeof(struct rte_mbuf *), + 0, rte_socket_id()); + if (mem->decomp_bufs == NULL) { + RTE_LOG(ERR, USER1, "Memory to hold the decompression mbufs" + " could not be allocated\n"); + return -1; + } + return 0; +} + +int +prepare_bufs(struct comp_test_data *test_data, struct cperf_mem_resources *mem) +{ + uint32_t remaining_data = test_data->input_data_sz; + uint8_t *input_data_ptr = test_data->input_data; + size_t data_sz; + uint8_t *data_addr; + uint32_t i, j; + + for (i = 0; i < mem->total_bufs; i++) { + /* Allocate data in input mbuf and copy data from input file */ + mem->decomp_bufs[i] = + rte_pktmbuf_alloc(mem->decomp_buf_pool); + if (mem->decomp_bufs[i] == NULL) { + RTE_LOG(ERR, USER1, "Could not allocate mbuf\n"); + return -1; + } + + data_sz = RTE_MIN(remaining_data, test_data->seg_sz); + data_addr = (uint8_t *) rte_pktmbuf_append( + mem->decomp_bufs[i], data_sz); + if (data_addr == NULL) { + RTE_LOG(ERR, USER1, "Could not append data\n"); + return -1; + } + rte_memcpy(data_addr, input_data_ptr, data_sz); + + input_data_ptr += data_sz; + remaining_data -= data_sz; + + /* Already one segment in the mbuf */ + uint16_t segs_per_mbuf = 1; + + /* Chain mbufs if needed for input mbufs */ + while (segs_per_mbuf < test_data->max_sgl_segs + && remaining_data > 0) { + struct rte_mbuf *next_seg = + rte_pktmbuf_alloc(mem->decomp_buf_pool); + + if (next_seg == NULL) { + RTE_LOG(ERR, USER1, + "Could not allocate mbuf\n"); + return -1; + } + + data_sz = RTE_MIN(remaining_data, test_data->seg_sz); + data_addr = (uint8_t *)rte_pktmbuf_append(next_seg, + data_sz); + + if (data_addr == NULL) { + RTE_LOG(ERR, USER1, "Could not append data\n"); + return -1; + } + + rte_memcpy(data_addr, input_data_ptr, data_sz); + input_data_ptr += data_sz; + remaining_data -= data_sz; + + if (rte_pktmbuf_chain(mem->decomp_bufs[i], + next_seg) < 0) { + RTE_LOG(ERR, USER1, "Could not chain mbufs\n"); + return -1; + } + segs_per_mbuf++; + } + + /* Allocate data in output mbuf */ + mem->comp_bufs[i] = + rte_pktmbuf_alloc(mem->comp_buf_pool); + if (mem->comp_bufs[i] == NULL) { + RTE_LOG(ERR, USER1, "Could not allocate mbuf\n"); + return -1; + } + data_addr = (uint8_t *) rte_pktmbuf_append( + mem->comp_bufs[i], + test_data->out_seg_sz); + if (data_addr == NULL) { + RTE_LOG(ERR, USER1, "Could not append data\n"); + return -1; + } + + /* Chain mbufs if needed for output mbufs */ + for (j = 1; j < segs_per_mbuf; j++) { + struct rte_mbuf *next_seg = + rte_pktmbuf_alloc(mem->comp_buf_pool); + + if (next_seg == NULL) { + RTE_LOG(ERR, USER1, + "Could not allocate mbuf\n"); + return -1; + } + + data_addr = (uint8_t *)rte_pktmbuf_append(next_seg, + test_data->out_seg_sz); + + if (data_addr == NULL) { + RTE_LOG(ERR, USER1, "Could not append data\n"); + return -1; + } + + if (rte_pktmbuf_chain(mem->comp_bufs[i], + next_seg) < 0) { + RTE_LOG(ERR, USER1, "Could not chain mbufs\n"); + return -1; + } + } + } + + return 0; +} diff --git a/app/test-compress-perf/comp_perf_test_common.h b/app/test-compress-perf/comp_perf_test_common.h new file mode 100644 index 0000000..9c11e3a --- /dev/null +++ b/app/test-compress-perf/comp_perf_test_common.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2019 Intel Corporation + */ + +#ifndef _COMP_PERF_TEST_COMMON_H_ +#define _COMP_PERF_TEST_COMMON_H_ + +#include <stdint.h> + +#include <rte_mempool.h> + +struct cperf_mem_resources { + uint8_t dev_id; + uint16_t qp_id; + uint8_t lcore_id; + uint32_t total_bufs; + uint8_t *compressed_data; + uint8_t *decompressed_data; + + struct rte_mbuf **comp_bufs; + struct rte_mbuf **decomp_bufs; + + struct rte_mempool *comp_buf_pool; + struct rte_mempool *decomp_buf_pool; + struct rte_mempool *op_pool; +}; + +int +param_range_check(uint16_t size, const struct rte_param_log2_range *range); + +void +comp_perf_free_memory(struct cperf_mem_resources *mem); + +int +comp_perf_allocate_memory(struct comp_test_data *test_data, + struct cperf_mem_resources *mem); + +int +prepare_bufs(struct comp_test_data *test_data, struct cperf_mem_resources *mem); + +#endif /* _COMP_PERF_TEST_COMMON_H_ */ diff --git a/app/test-compress-perf/main.c b/app/test-compress-perf/main.c index 7b9ee74..3a3da4c 100644 --- a/app/test-compress-perf/main.c +++ b/app/test-compress-perf/main.c @@ -8,56 +8,38 @@ #include <rte_compressdev.h> #include "comp_perf_options.h" -#include "comp_perf_test_verify.h" -#include "comp_perf_test_benchmark.h" +#include "comp_perf.h" +#include "comp_perf_test_common.h" #define NUM_MAX_XFORMS 16 #define NUM_MAX_INFLIGHT_OPS 512 -#define DIV_CEIL(a, b) ((a) / (b) + ((a) % (b) != 0)) - -/* Cleanup state machine */ -static enum cleanup_st { - ST_CLEAR = 0, - ST_TEST_DATA, - ST_COMPDEV, - ST_INPUT_DATA, - ST_MEMORY_ALLOC, - ST_PREPARE_BUF, - ST_DURING_TEST -} cleanup = ST_CLEAR; - -static int -param_range_check(uint16_t size, const struct rte_param_log2_range *range) -{ - unsigned int next_size; - - /* Check lower/upper bounds */ - if (size < range->min) - return -1; - - if (size > range->max) - return -1; - - /* If range is actually only one value, size is correct */ - if (range->increment == 0) - return 0; - - /* Check if value is one of the supported sizes */ - for (next_size = range->min; next_size <= range->max; - next_size += range->increment) - if (size == next_size) - return 0; - - return -1; -} +__extension__ +const char *cperf_test_type_strs[] = { + [CPERF_TEST_TYPE_BENCHMARK] = "benchmark", + [CPERF_TEST_TYPE_VERIFY] = "verify" +}; + +__extension__ +static const struct cperf_test cperf_testmap[] = { + [CPERF_TEST_TYPE_BENCHMARK] = { + cperf_benchmark_test_constructor, + cperf_benchmark_test_runner, + cperf_benchmark_test_destructor + }, + [CPERF_TEST_TYPE_VERIFY] = { + cperf_verify_test_constructor, + cperf_verify_test_runner, + cperf_verify_test_destructor + } +}; static int -comp_perf_check_capabilities(struct comp_test_data *test_data) +comp_perf_check_capabilities(struct comp_test_data *test_data, uint8_t cdev_id) { const struct rte_compressdev_capabilities *cap; - cap = rte_compressdev_capability_get(test_data->cdev_id, + cap = rte_compressdev_capability_get(cdev_id, RTE_COMP_ALGO_DEFLATE); if (cap == NULL) { @@ -105,7 +87,7 @@ comp_perf_check_capabilities(struct comp_test_data *test_data) } /* Level 0 support */ - if (test_data->level.min == 0 && + if (test_data->level_lst.min == 0 && (comp_flags & RTE_COMP_FF_NONCOMPRESSED_BLOCKS) == 0) { RTE_LOG(ERR, USER1, "Compress device does not support " "level 0 (no compression)\n"); @@ -115,110 +97,108 @@ comp_perf_check_capabilities(struct comp_test_data *test_data) return 0; } -static uint32_t -find_buf_size(uint32_t input_size) +static int +comp_perf_initialize_compressdev(struct comp_test_data *test_data, + uint8_t *enabled_cdevs) { - uint32_t i; + uint8_t enabled_cdev_count, nb_lcores, cdev_id; + unsigned int i, j; + int ret; - /* From performance point of view the buffer size should be a - * power of 2 but also should be enough to store incompressible data - */ + enabled_cdev_count = rte_compressdev_devices_get(test_data->driver_name, + enabled_cdevs, RTE_COMPRESS_MAX_DEVS); + if (enabled_cdev_count == 0) { + RTE_LOG(ERR, USER1, "No compress devices type %s available\n", + test_data->driver_name); + return -EINVAL; + } - /* We're looking for nearest power of 2 buffer size, which is greather - * than input_size + nb_lcores = rte_lcore_count() - 1; + /* + * Use fewer devices, + * if there are more available than cores. */ - uint32_t size = - !input_size ? MIN_COMPRESSED_BUF_SIZE : (input_size << 1); - - for (i = UINT16_MAX + 1; !(i & size); i >>= 1) - ; - - return i > ((UINT16_MAX + 1) >> 1) - ? (uint32_t)((float)input_size * EXPANSE_RATIO) - : i; -} - -static int -comp_perf_allocate_memory(struct comp_test_data *test_data) -{ + if (enabled_cdev_count > nb_lcores) { + enabled_cdev_count = nb_lcores; + RTE_LOG(INFO, USER1, + " There's more available devices than cores!" + " The number of devices has been aligned to %d cores\n", + nb_lcores); + } - test_data->out_seg_sz = find_buf_size(test_data->seg_sz); - /* Number of segments for input and output - * (compression and decompression) + /* + * Calculate number of needed queue pairs, based on the amount + * of available number of logical cores and compression devices. + * For instance, if there are 4 cores and 2 compression devices, + * 2 queue pairs will be set up per device. + * One queue pair per one core. + * if e.g.: there're 3 cores and 2 compression devices, + * 2 queue pairs will be set up per device but one queue pair + * will left unused in the last one device */ - uint32_t total_segs = DIV_CEIL(test_data->input_data_sz, - test_data->seg_sz); - test_data->comp_buf_pool = rte_pktmbuf_pool_create("comp_buf_pool", - total_segs, - 0, 0, - test_data->out_seg_sz + RTE_PKTMBUF_HEADROOM, - rte_socket_id()); - if (test_data->comp_buf_pool == NULL) { - RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n"); - return -1; - } + test_data->nb_qps = (nb_lcores % enabled_cdev_count) ? + (nb_lcores / enabled_cdev_count) + 1 : + nb_lcores / enabled_cdev_count; - cleanup = ST_MEMORY_ALLOC; - test_data->decomp_buf_pool = rte_pktmbuf_pool_create("decomp_buf_pool", - total_segs, - 0, 0, test_data->seg_sz + RTE_PKTMBUF_HEADROOM, - rte_socket_id()); - if (test_data->decomp_buf_pool == NULL) { - RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n"); - return -1; - } + for (i = 0; i < enabled_cdev_count && + i < RTE_COMPRESS_MAX_DEVS; i++, + nb_lcores -= test_data->nb_qps) { + cdev_id = enabled_cdevs[i]; - test_data->total_bufs = DIV_CEIL(total_segs, test_data->max_sgl_segs); + struct rte_compressdev_info cdev_info; + uint8_t socket_id = rte_compressdev_socket_id(cdev_id); - test_data->op_pool = rte_comp_op_pool_create("op_pool", - test_data->total_bufs, - 0, 0, rte_socket_id()); - if (test_data->op_pool == NULL) { - RTE_LOG(ERR, USER1, "Comp op mempool could not be created\n"); - return -1; - } + rte_compressdev_info_get(cdev_id, &cdev_info); + if (cdev_info.max_nb_queue_pairs && + test_data->nb_qps > cdev_info.max_nb_queue_pairs) { + RTE_LOG(ERR, USER1, + "Number of needed queue pairs is higher " + "than the maximum number of queue pairs " + "per device.\n"); + RTE_LOG(ERR, USER1, + "Lower the number of cores or increase " + "the number of crypto devices\n"); + return -EINVAL; + } - /* - * Compressed data might be a bit larger than input data, - * if data cannot be compressed - */ - test_data->compressed_data = rte_zmalloc_socket(NULL, - test_data->input_data_sz * EXPANSE_RATIO - + MIN_COMPRESSED_BUF_SIZE, 0, - rte_socket_id()); - if (test_data->compressed_data == NULL) { - RTE_LOG(ERR, USER1, "Memory to hold the data from the input " - "file could not be allocated\n"); - return -1; - } + if (comp_perf_check_capabilities(test_data, cdev_id) < 0) + return -EINVAL; + + /* Configure compressdev */ + struct rte_compressdev_config config = { + .socket_id = socket_id, + .nb_queue_pairs = nb_lcores > test_data->nb_qps + ? test_data->nb_qps : nb_lcores, + .max_nb_priv_xforms = NUM_MAX_XFORMS, + .max_nb_streams = 0 + }; + + if (rte_compressdev_configure(cdev_id, &config) < 0) { + RTE_LOG(ERR, USER1, "Device configuration failed\n"); + return -EINVAL; + } - test_data->decompressed_data = rte_zmalloc_socket(NULL, - test_data->input_data_sz, 0, - rte_socket_id()); - if (test_data->decompressed_data == NULL) { - RTE_LOG(ERR, USER1, "Memory to hold the data from the input " - "file could not be allocated\n"); - return -1; - } + for (j = 0; j < test_data->nb_qps; j++) { + ret = rte_compressdev_queue_pair_setup(cdev_id, j, + NUM_MAX_INFLIGHT_OPS, socket_id); + if (ret < 0) { + RTE_LOG(ERR, USER1, + "Failed to setup queue pair %u on compressdev %u", + j, cdev_id); + return -EINVAL; + } + } - test_data->comp_bufs = rte_zmalloc_socket(NULL, - test_data->total_bufs * sizeof(struct rte_mbuf *), - 0, rte_socket_id()); - if (test_data->comp_bufs == NULL) { - RTE_LOG(ERR, USER1, "Memory to hold the compression mbufs" - " could not be allocated\n"); - return -1; + ret = rte_compressdev_start(cdev_id); + if (ret < 0) { + RTE_LOG(ERR, USER1, + "Failed to start device %u: error %d\n", + cdev_id, ret); + return -EPERM; + } } - test_data->decomp_bufs = rte_zmalloc_socket(NULL, - test_data->total_bufs * sizeof(struct rte_mbuf *), - 0, rte_socket_id()); - if (test_data->decomp_bufs == NULL) { - RTE_LOG(ERR, USER1, "Memory to hold the decompression mbufs" - " could not be allocated\n"); - return -1; - } - return 0; + return enabled_cdev_count; } static int @@ -295,187 +275,18 @@ comp_perf_dump_input_data(struct comp_test_data *test_data) return ret; } -static int -comp_perf_initialize_compressdev(struct comp_test_data *test_data) -{ - uint8_t enabled_cdev_count; - uint8_t enabled_cdevs[RTE_COMPRESS_MAX_DEVS]; - - enabled_cdev_count = rte_compressdev_devices_get(test_data->driver_name, - enabled_cdevs, RTE_COMPRESS_MAX_DEVS); - if (enabled_cdev_count == 0) { - RTE_LOG(ERR, USER1, "No compress devices type %s available\n", - test_data->driver_name); - return -EINVAL; - } - - if (enabled_cdev_count > 1) - RTE_LOG(INFO, USER1, - "Only the first compress device will be used\n"); - - test_data->cdev_id = enabled_cdevs[0]; - - if (comp_perf_check_capabilities(test_data) < 0) - return -1; - - /* Configure compressdev (one device, one queue pair) */ - struct rte_compressdev_config config = { - .socket_id = rte_socket_id(), - .nb_queue_pairs = 1, - .max_nb_priv_xforms = NUM_MAX_XFORMS, - .max_nb_streams = 0 - }; - - if (rte_compressdev_configure(test_data->cdev_id, &config) < 0) { - RTE_LOG(ERR, USER1, "Device configuration failed\n"); - return -1; - } - - if (rte_compressdev_queue_pair_setup(test_data->cdev_id, 0, - NUM_MAX_INFLIGHT_OPS, rte_socket_id()) < 0) { - RTE_LOG(ERR, USER1, "Queue pair setup failed\n"); - return -1; - } - - if (rte_compressdev_start(test_data->cdev_id) < 0) { - RTE_LOG(ERR, USER1, "Device could not be started\n"); - return -1; - } - - return 0; -} - -static int -prepare_bufs(struct comp_test_data *test_data) -{ - uint32_t remaining_data = test_data->input_data_sz; - uint8_t *input_data_ptr = test_data->input_data; - size_t data_sz; - uint8_t *data_addr; - uint32_t i, j; - - for (i = 0; i < test_data->total_bufs; i++) { - /* Allocate data in input mbuf and copy data from input file */ - test_data->decomp_bufs[i] = - rte_pktmbuf_alloc(test_data->decomp_buf_pool); - if (test_data->decomp_bufs[i] == NULL) { - RTE_LOG(ERR, USER1, "Could not allocate mbuf\n"); - return -1; - } - - cleanup = ST_PREPARE_BUF; - data_sz = RTE_MIN(remaining_data, test_data->seg_sz); - data_addr = (uint8_t *) rte_pktmbuf_append( - test_data->decomp_bufs[i], data_sz); - if (data_addr == NULL) { - RTE_LOG(ERR, USER1, "Could not append data\n"); - return -1; - } - rte_memcpy(data_addr, input_data_ptr, data_sz); - - input_data_ptr += data_sz; - remaining_data -= data_sz; - - /* Already one segment in the mbuf */ - uint16_t segs_per_mbuf = 1; - - /* Chain mbufs if needed for input mbufs */ - while (segs_per_mbuf < test_data->max_sgl_segs - && remaining_data > 0) { - struct rte_mbuf *next_seg = - rte_pktmbuf_alloc(test_data->decomp_buf_pool); - - if (next_seg == NULL) { - RTE_LOG(ERR, USER1, - "Could not allocate mbuf\n"); - return -1; - } - - data_sz = RTE_MIN(remaining_data, test_data->seg_sz); - data_addr = (uint8_t *)rte_pktmbuf_append(next_seg, - data_sz); - - if (data_addr == NULL) { - RTE_LOG(ERR, USER1, "Could not append data\n"); - return -1; - } - - rte_memcpy(data_addr, input_data_ptr, data_sz); - input_data_ptr += data_sz; - remaining_data -= data_sz; - - if (rte_pktmbuf_chain(test_data->decomp_bufs[i], - next_seg) < 0) { - RTE_LOG(ERR, USER1, "Could not chain mbufs\n"); - return -1; - } - segs_per_mbuf++; - } - - /* Allocate data in output mbuf */ - test_data->comp_bufs[i] = - rte_pktmbuf_alloc(test_data->comp_buf_pool); - if (test_data->comp_bufs[i] == NULL) { - RTE_LOG(ERR, USER1, "Could not allocate mbuf\n"); - return -1; - } - data_addr = (uint8_t *) rte_pktmbuf_append( - test_data->comp_bufs[i], - test_data->out_seg_sz); - if (data_addr == NULL) { - RTE_LOG(ERR, USER1, "Could not append data\n"); - return -1; - } - - /* Chain mbufs if needed for output mbufs */ - for (j = 1; j < segs_per_mbuf; j++) { - struct rte_mbuf *next_seg = - rte_pktmbuf_alloc(test_data->comp_buf_pool); - - if (next_seg == NULL) { - RTE_LOG(ERR, USER1, - "Could not allocate mbuf\n"); - return -1; - } - - data_addr = (uint8_t *)rte_pktmbuf_append(next_seg, - test_data->out_seg_sz); - - if (data_addr == NULL) { - RTE_LOG(ERR, USER1, "Could not append data\n"); - return -1; - } - - if (rte_pktmbuf_chain(test_data->comp_bufs[i], - next_seg) < 0) { - RTE_LOG(ERR, USER1, "Could not chain mbufs\n"); - return -1; - } - } - } - - return 0; -} - -static void -free_bufs(struct comp_test_data *test_data) -{ - uint32_t i; - - for (i = 0; i < test_data->total_bufs; i++) { - rte_pktmbuf_free(test_data->comp_bufs[i]); - rte_pktmbuf_free(test_data->decomp_bufs[i]); - } -} - - - int main(int argc, char **argv) { - uint8_t level, level_idx = 0; + uint8_t level_idx = 0; int ret, i; struct comp_test_data *test_data; + void *ctx[RTE_MAX_LCORE] = {}; + uint8_t enabled_cdevs[RTE_COMPRESS_MAX_DEVS]; + int nb_compressdevs = 0; + uint16_t total_nb_qps = 0; + uint8_t cdev_id; + uint32_t lcore_id; /* Initialise DPDK EAL */ ret = rte_eal_init(argc, argv); @@ -492,7 +303,7 @@ main(int argc, char **argv) rte_socket_id()); ret = EXIT_SUCCESS; - cleanup = ST_TEST_DATA; + test_data->cleanup = ST_TEST_DATA; comp_perf_options_default(test_data); if (comp_perf_options_parse(test_data, argc, argv) < 0) { @@ -507,99 +318,112 @@ main(int argc, char **argv) goto end; } - if (comp_perf_initialize_compressdev(test_data) < 0) { - ret = EXIT_FAILURE; - goto end; - } + nb_compressdevs = + comp_perf_initialize_compressdev(test_data, enabled_cdevs); - cleanup = ST_COMPDEV; - if (comp_perf_dump_input_data(test_data) < 0) { + if (nb_compressdevs < 1) { ret = EXIT_FAILURE; goto end; } - cleanup = ST_INPUT_DATA; - if (comp_perf_allocate_memory(test_data) < 0) { + test_data->cleanup = ST_COMPDEV; + if (comp_perf_dump_input_data(test_data) < 0) { ret = EXIT_FAILURE; goto end; } - if (prepare_bufs(test_data) < 0) { - ret = EXIT_FAILURE; - goto end; - } + test_data->cleanup = ST_INPUT_DATA; - if (test_data->level.inc != 0) - level = test_data->level.min; + if (test_data->level_lst.inc != 0) + test_data->level = test_data->level_lst.min; else - level = test_data->level.list[0]; + test_data->level = test_data->level_lst.list[0]; printf("App uses socket: %u\n", rte_socket_id()); - printf("Driver uses socket: %u\n", - rte_compressdev_socket_id(test_data->cdev_id)); printf("Burst size = %u\n", test_data->burst_sz); printf("File size = %zu\n", test_data->input_data_sz); - printf("%6s%12s%17s%19s%21s%15s%21s%23s%16s\n", - "Level", "Comp size", "Comp ratio [%]", - "Comp [Cycles/it]", "Comp [Cycles/Byte]", "Comp [Gbps]", - "Decomp [Cycles/it]", "Decomp [Cycles/Byte]", "Decomp [Gbps]"); + test_data->cleanup = ST_DURING_TEST; + total_nb_qps = nb_compressdevs * test_data->nb_qps; - cleanup = ST_DURING_TEST; - while (level <= test_data->level.max) { + i = 0; + uint8_t qp_id = 0, cdev_index = 0; - /* - * Run a first iteration, to verify compression and - * get the compression ratio for the level - */ - if (cperf_verification(test_data, level) != EXIT_SUCCESS) - break; + RTE_LCORE_FOREACH_SLAVE(lcore_id) { - /* - * Run benchmarking test - */ - if (cperf_benchmark(test_data, level) != EXIT_SUCCESS) + if (i == total_nb_qps) break; - printf("%6u%12zu%17.2f%19"PRIu64"%21.2f" - "%15.2f%21"PRIu64"%23.2f%16.2f\n", - level, test_data->comp_data_sz, test_data->ratio, - test_data->comp_tsc_duration[level], - test_data->comp_tsc_byte, test_data->comp_gbps, - test_data->decomp_tsc_duration[level], - test_data->decomp_tsc_byte, test_data->decomp_gbps); + cdev_id = enabled_cdevs[cdev_index]; + ctx[i] = cperf_testmap[test_data->test].constructor( + cdev_id, qp_id, + test_data); + if (ctx[i] == NULL) { + RTE_LOG(ERR, USER1, "Test run constructor failed\n"); + goto end; + } + qp_id = (qp_id + 1) % test_data->nb_qps; + if (qp_id == 0) + cdev_index++; + i++; + } + + while (test_data->level <= test_data->level_lst.max) { - if (test_data->level.inc != 0) - level += test_data->level.inc; + i = 0; + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + + if (i == total_nb_qps) + break; + + rte_eal_remote_launch( + cperf_testmap[test_data->test].runner, + ctx[i], lcore_id); + i++; + } + i = 0; + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + + if (i == total_nb_qps) + break; + ret |= rte_eal_wait_lcore(lcore_id); + i++; + } + + if (ret != EXIT_SUCCESS) + break; + + if (test_data->level_lst.inc != 0) + test_data->level += test_data->level_lst.inc; else { - if (++level_idx == test_data->level.count) + if (++level_idx == test_data->level_lst.count) break; - level = test_data->level.list[level_idx]; + test_data->level = test_data->level_lst.list[level_idx]; } } end: - switch (cleanup) { + switch (test_data->cleanup) { case ST_DURING_TEST: - case ST_PREPARE_BUF: - free_bufs(test_data); - /* fallthrough */ - case ST_MEMORY_ALLOC: - rte_free(test_data->decomp_bufs); - rte_free(test_data->comp_bufs); - rte_free(test_data->decompressed_data); - rte_free(test_data->compressed_data); - rte_mempool_free(test_data->op_pool); - rte_mempool_free(test_data->decomp_buf_pool); - rte_mempool_free(test_data->comp_buf_pool); + i = 0; + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + if (i == total_nb_qps) + break; + + if (ctx[i] && cperf_testmap[test_data->test].destructor) + cperf_testmap[test_data->test].destructor( + ctx[i]); + i++; + } /* fallthrough */ case ST_INPUT_DATA: rte_free(test_data->input_data); /* fallthrough */ case ST_COMPDEV: - if (test_data->cdev_id != -1) - rte_compressdev_stop(test_data->cdev_id); + for (i = 0; i < nb_compressdevs && + i < RTE_COMPRESS_MAX_DEVS; i++) + rte_compressdev_stop(enabled_cdevs[i]); /* fallthrough */ case ST_TEST_DATA: rte_free(test_data); @@ -616,3 +440,44 @@ main(int argc, char **argv) } return ret; } + +__rte_weak void * +cperf_benchmark_test_constructor(uint8_t dev_id __rte_unused, + uint16_t qp_id __rte_unused, + struct comp_test_data *options __rte_unused) +{ + RTE_LOG(INFO, USER1, "Benchmark test is not supported yet\n"); + return NULL; +} + +__rte_weak void +cperf_benchmark_test_destructor(void *arg __rte_unused) +{ + +} + +__rte_weak int +cperf_benchmark_test_runner(void *test_ctx __rte_unused) +{ + return 0; +} +__rte_weak void * +cperf_verify_test_constructor(uint8_t dev_id __rte_unused, + uint16_t qp_id __rte_unused, + struct comp_test_data *options __rte_unused) +{ + RTE_LOG(INFO, USER1, "Verify test is not supported yet\n"); + return NULL; +} + +__rte_weak void +cperf_verify_test_destructor(void *arg __rte_unused) +{ + +} + +__rte_weak int +cperf_verify_test_runner(void *test_ctx __rte_unused) +{ + return 0; +} diff --git a/app/test-compress-perf/meson.build b/app/test-compress-perf/meson.build index ec73e5e..00413c6 100644 --- a/app/test-compress-perf/meson.build +++ b/app/test-compress-perf/meson.build @@ -4,6 +4,5 @@ allow_experimental_apis = true sources = files('comp_perf_options_parse.c', 'main.c', - 'comp_perf_test_verify.c', - 'comp_perf_test_benchmark.c') + 'comp_perf_test_common.c') deps = ['compressdev'] diff --git a/doc/guides/rel_notes/release_19_08.rst b/doc/guides/rel_notes/release_19_08.rst index 7c0435a..2b2e029 100644 --- a/doc/guides/rel_notes/release_19_08.rst +++ b/doc/guides/rel_notes/release_19_08.rst @@ -99,6 +99,9 @@ New Features Updated ``librte_telemetry`` to fetch the global metrics from the ``librte_metrics`` library. +* **Updated test-compress-perf tool application.** + + Added multiple cores feature to compression perf tool application. Removed Items ------------- -- 2.7.4