> On Nov 2, 2020, at 10:21 PM, Honnappa Nagarahalli
> <honnappa.nagaraha...@arm.com> wrote:
>
> <snip>
>>
>> Avoid code duplication by combining single and multi threaded tests
>>
>> Signed-off-by: Dharmik Thakkar <dharmik.thak...@arm.com>
>> Reviewed-by: Ruifeng Wang <ruifeng.w...@arm.com>
>> ---
>> app/test/test_lpm_perf.c | 362 ++++++++++-----------------------------
>> 1 file changed, 91 insertions(+), 271 deletions(-)
>>
>> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index
>> 224c92fa3d65..229c835c23f7 100644
>> --- a/app/test/test_lpm_perf.c
>> +++ b/app/test/test_lpm_perf.c
>> @@ -67,6 +67,12 @@ enum {
>> IP_CLASS_C
>> };
>>
>> +enum {
>> + SINGLE_WRITER = 1,
>> + MULTI_WRITER_1,
>> + MULTI_WRITER_2
>> +};
> Do we need this? Can we use the number of cores instead?
>
There are 3 combinations of writes (adds/deletes):
1. Write all the entries - in case of a single writer
2. Write half of the entries - in case of multiple writers
3. Write remaining half of the entries - in case of multiple writers
So, I think this is required.
>> +
>> /* struct route_rule_count defines the total number of rules in following
>> a/b/c
>> * each item in a[]/b[]/c[] is the number of common IP address class A/B/C,
>> not
>> * including the ones for private local network.
>> @@ -430,11 +436,16 @@ test_lpm_rcu_qsbr_writer(void *arg) {
>> unsigned int i, j, si, ei;
>> uint64_t begin, total_cycles;
>> - uint8_t core_id = (uint8_t)((uintptr_t)arg);
>> + uint8_t writer_id = (uint8_t)((uintptr_t)arg);
>> uint32_t next_hop_add = 0xAA;
>>
>> - /* 2 writer threads are used */
>> - if (core_id % 2 == 0) {
>> + /* Single writer (writer_id = 1) */
>> + if (writer_id == SINGLE_WRITER) {
>> + si = 0;
>> + ei = NUM_LDEPTH_ROUTE_ENTRIES;
>> + }
>> + /* 2 Writers (writer_id = 2/3)*/
>> + else if (writer_id == MULTI_WRITER_1) {
>> si = 0;
>> ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
>> } else {
>> @@ -447,29 +458,35 @@ test_lpm_rcu_qsbr_writer(void *arg)
>> for (i = 0; i < RCU_ITERATIONS; i++) {
>> /* Add all the entries */
>> for (j = si; j < ei; j++) {
>> - pthread_mutex_lock(&lpm_mutex);
>> + if (writer_id != SINGLE_WRITER)
>> + pthread_mutex_lock(&lpm_mutex);
>> if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>> large_ldepth_route_table[j].depth,
>> next_hop_add) != 0) {
>> printf("Failed to add iteration %d, route#
>> %d\n",
>> i, j);
>> - pthread_mutex_unlock(&lpm_mutex);
>> + if (writer_id != SINGLE_WRITER)
>> +
>> pthread_mutex_unlock(&lpm_mutex);
>> return -1;
>> }
>> - pthread_mutex_unlock(&lpm_mutex);
>> + if (writer_id != SINGLE_WRITER)
>> + pthread_mutex_unlock(&lpm_mutex);
>> }
>>
>> /* Delete all the entries */
>> for (j = si; j < ei; j++) {
>> - pthread_mutex_lock(&lpm_mutex);
>> + if (writer_id != SINGLE_WRITER)
>> + pthread_mutex_lock(&lpm_mutex);
>> if (rte_lpm_delete(lpm,
>> large_ldepth_route_table[j].ip,
>> large_ldepth_route_table[j].depth) != 0) {
>> printf("Failed to delete iteration %d, route#
>> %d\n",
>> i, j);
>> - pthread_mutex_unlock(&lpm_mutex);
>> + if (writer_id != SINGLE_WRITER)
>> +
>> pthread_mutex_unlock(&lpm_mutex);
>> return -1;
>> }
>> - pthread_mutex_unlock(&lpm_mutex);
>> + if (writer_id != SINGLE_WRITER)
>> + pthread_mutex_unlock(&lpm_mutex);
>> }
>> }
>>
>> @@ -482,16 +499,17 @@ test_lpm_rcu_qsbr_writer(void *arg)
>>
>> /*
>> * Functional test:
>> - * 2 writers, rest are readers
>> + * 1/2 writers, rest are readers
>> */
>> static int
>> -test_lpm_rcu_perf_multi_writer(void)
>> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
>> {
>> struct rte_lpm_config config;
>> size_t sz;
>> - unsigned int i;
>> + unsigned int i, j;
>> uint16_t core_id;
>> struct rte_lpm_rcu_config rcu_cfg = {0};
>> + int (*reader_f)(void *arg) = NULL;
>>
>> if (rte_lcore_count() < 3) {
>> printf("Not enough cores for lpm_rcu_perf_autotest,
>> expecting at least 3\n"); @@ -504,273 +522,76 @@
>> test_lpm_rcu_perf_multi_writer(void)
>> num_cores++;
>> }
>>
>> - printf("\nPerf test: 2 writers, %d readers, RCU integration
>> enabled\n",
>> - num_cores - 2);
>> -
>> - /* Create LPM table */
>> - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> - config.flags = 0;
>> - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> - TEST_LPM_ASSERT(lpm != NULL);
>> -
>> - /* Init RCU variable */
>> - sz = rte_rcu_qsbr_get_memsize(num_cores);
>> - rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>> - RTE_CACHE_LINE_SIZE);
>> - rte_rcu_qsbr_init(rv, num_cores);
>> -
>> - rcu_cfg.v = rv;
>> - /* Assign the RCU variable to LPM */
>> - if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
>> - printf("RCU variable assignment failed\n");
>> - goto error;
>> - }
>> -
>> - writer_done = 0;
>> - __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>> -
>> - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> -
>> - /* Launch reader threads */
>> - for (i = 2; i < num_cores; i++)
>> - rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
>> - enabled_core_ids[i]);
>> -
>> - /* Launch writer threads */
>> - for (i = 0; i < 2; i++)
>> - rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>> - (void *)(uintptr_t)i,
>> - enabled_core_ids[i]);
>> -
>> - /* Wait for writer threads */
>> - for (i = 0; i < 2; i++)
>> - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>> - goto error;
>> -
>> - printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> - printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> - printf("Average LPM Add/Del: %"PRIu64" cycles\n",
>> - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
>> - / TOTAL_WRITES);
>> -
>> - writer_done = 1;
>> - /* Wait until all readers have exited */
>> - for (i = 2; i < num_cores; i++)
>> - rte_eal_wait_lcore(enabled_core_ids[i]);
>> -
>> - rte_lpm_free(lpm);
>> - rte_free(rv);
>> - lpm = NULL;
>> - rv = NULL;
>> -
>> - /* Test without RCU integration */
>> - printf("\nPerf test: 2 writers, %d readers, RCU integration
>> disabled\n",
>> - num_cores - 2);
>> -
>> - /* Create LPM table */
>> - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> - config.flags = 0;
>> - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> - TEST_LPM_ASSERT(lpm != NULL);
>> -
>> - writer_done = 0;
>> - __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>> - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> -
>> - /* Launch reader threads */
>> - for (i = 2; i < num_cores; i++)
>> - rte_eal_remote_launch(test_lpm_reader, NULL,
>> - enabled_core_ids[i]);
>> -
>> - /* Launch writer threads */
>> - for (i = 0; i < 2; i++)
>> - rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>> - (void *)(uintptr_t)i,
>> - enabled_core_ids[i]);
>> -
>> - /* Wait for writer threads */
>> - for (i = 0; i < 2; i++)
>> - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>> - goto error;
>> -
>> - printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> - printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> - printf("Average LPM Add/Del: %"PRIu64" cycles\n",
>> - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
>> - / TOTAL_WRITES);
>> -
>> - writer_done = 1;
>> - /* Wait until all readers have exited */
>> - for (i = 2; i < num_cores; i++)
>> - rte_eal_wait_lcore(enabled_core_ids[i]);
>> -
>> - rte_lpm_free(lpm);
>> -
>> - return 0;
>> -
>> -error:
>> - writer_done = 1;
>> - /* Wait until all readers have exited */
>> - rte_eal_mp_wait_lcore();
>> -
>> - rte_lpm_free(lpm);
>> - rte_free(rv);
>> -
>> - return -1;
>> -}
>> -
>> -/*
>> - * Functional test:
>> - * Single writer, rest are readers
>> - */
>> -static int
>> -test_lpm_rcu_perf(void)
>> -{
>> - struct rte_lpm_config config;
>> - uint64_t begin, total_cycles;
>> - size_t sz;
>> - unsigned int i, j;
>> - uint16_t core_id;
>> - uint32_t next_hop_add = 0xAA;
>> - struct rte_lpm_rcu_config rcu_cfg = {0};
>> -
>> - if (rte_lcore_count() < 2) {
>> - printf("Not enough cores for lpm_rcu_perf_autotest,
>> expecting at least 2\n");
>> - return TEST_SKIPPED;
>> - }
>> -
>> - num_cores = 0;
>> - RTE_LCORE_FOREACH_WORKER(core_id) {
>> - enabled_core_ids[num_cores] = core_id;
>> - num_cores++;
>> - }
>> -
>> - printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n",
>> - num_cores);
>> -
>> - /* Create LPM table */
>> - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> - config.flags = 0;
>> - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> - TEST_LPM_ASSERT(lpm != NULL);
>> -
>> - /* Init RCU variable */
>> - sz = rte_rcu_qsbr_get_memsize(num_cores);
>> - rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>> - RTE_CACHE_LINE_SIZE);
>> - rte_rcu_qsbr_init(rv, num_cores);
>> -
>> - rcu_cfg.v = rv;
>> - /* Assign the RCU variable to LPM */
>> - if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
>> - printf("RCU variable assignment failed\n");
>> - goto error;
>> - }
>> -
>> - writer_done = 0;
>> - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> -
>> - /* Launch reader threads */
>> - for (i = 0; i < num_cores; i++)
>> - rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
>> - enabled_core_ids[i]);
>> -
>> - /* Measure add/delete. */
>> - begin = rte_rdtsc_precise();
>> - for (i = 0; i < RCU_ITERATIONS; i++) {
>> - /* Add all the entries */
>> - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> - if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>> - large_ldepth_route_table[j].depth,
>> - next_hop_add) != 0) {
>> - printf("Failed to add iteration %d, route#
>> %d\n",
>> - i, j);
>> + for (j = 1; j < 3; j++) {
>> + if (use_rcu)
>> + printf("\nPerf test: %d writer(s), %d reader(s),"
>> + " RCU integration enabled\n", j, num_cores - j);
>> + else
>> + printf("\nPerf test: %d writer(s), %d reader(s),"
>> + " RCU integration disabled\n", j, num_cores - j);
>> +
>> + /* Create LPM table */
>> + config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> + config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> + config.flags = 0;
>> + lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> + TEST_LPM_ASSERT(lpm != NULL);
>> +
>> + /* Init RCU variable */
>> + if (use_rcu) {
>> + sz = rte_rcu_qsbr_get_memsize(num_cores);
>> + rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>> +
>> RTE_CACHE_LINE_SIZE);
>> + rte_rcu_qsbr_init(rv, num_cores);
>> +
>> + rcu_cfg.v = rv;
>> + /* Assign the RCU variable to LPM */
>> + if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
>> + printf("RCU variable assignment failed\n");
>> goto error;
>> }
>>
>> - /* Delete all the entries */
>> - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> - if (rte_lpm_delete(lpm,
>> large_ldepth_route_table[j].ip,
>> - large_ldepth_route_table[j].depth) != 0) {
>> - printf("Failed to delete iteration %d, route#
>> %d\n",
>> - i, j);
>> - goto error;
>> - }
>> - }
>> - total_cycles = rte_rdtsc_precise() - begin;
>> + reader_f = test_lpm_rcu_qsbr_reader;
>> + } else
>> + reader_f = test_lpm_reader;
>>
>> - printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> - printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> - printf("Average LPM Add/Del: %g cycles\n",
>> - (double)total_cycles / TOTAL_WRITES);
>> + writer_done = 0;
>> + __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>>
>> - writer_done = 1;
>> - /* Wait until all readers have exited */
>> - for (i = 0; i < num_cores; i++)
>> - if (rte_eal_wait_lcore(enabled_core_ids[i]);
>> -
>> - rte_lpm_free(lpm);
>> - rte_free(rv);
>> - lpm = NULL;
>> - rv = NULL;
>> -
>> - /* Test without RCU integration */
>> - printf("\nPerf test: 1 writer, %d readers, RCU integration
>> disabled\n",
>> - num_cores);
>> -
>> - /* Create LPM table */
>> - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> - config.flags = 0;
>> - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> - TEST_LPM_ASSERT(lpm != NULL);
>> + __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>
>> - writer_done = 0;
>> - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> + /* Launch reader threads */
>> + for (i = j; i < num_cores; i++)
>> + rte_eal_remote_launch(reader_f, NULL,
>> + enabled_core_ids[i]);
>>
>> - /* Launch reader threads */
>> - for (i = 0; i < num_cores; i++)
>> - rte_eal_remote_launch(test_lpm_reader, NULL,
>> - enabled_core_ids[i]);
>> + /* Launch writer threads */
>> + for (i = 0; i < j; i++)
>> + rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>> + (void *)(uintptr_t)(i + j),
> This can be just 'j'?
>
>> + enabled_core_ids[i]);
>>
>> - /* Measure add/delete. */
>> - begin = rte_rdtsc_precise();
>> - for (i = 0; i < RCU_ITERATIONS; i++) {
>> - /* Add all the entries */
>> - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> - if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>> - large_ldepth_route_table[j].depth,
>> - next_hop_add) != 0) {
>> - printf("Failed to add iteration %d, route#
>> %d\n",
>> - i, j);
>> + /* Wait for writer threads */
>> + for (i = 0; i < j; i++)
>> + if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>> goto error;
>> - }
>>
>> - /* Delete all the entries */
>> - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> - if (rte_lpm_delete(lpm,
>> large_ldepth_route_table[j].ip,
>> - large_ldepth_route_table[j].depth) != 0) {
>> - printf("Failed to delete iteration %d, route#
>> %d\n",
>> - i, j);
>> - goto error;
>> - }
>> + printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> + printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> + printf("Average LPM Add/Del: %"PRIu64" cycles\n",
>> + __atomic_load_n(&gwrite_cycles,
>> __ATOMIC_RELAXED)
>> + / TOTAL_WRITES);
>> +
>> + writer_done = 1;
>> + /* Wait until all readers have exited */
>> + for (i = j; i < num_cores; i++)
>> + rte_eal_wait_lcore(enabled_core_ids[i]);
>> +
>> + rte_lpm_free(lpm);
>> + rte_free(rv);
>> + lpm = NULL;
>> + rv = NULL;
>> }
>> - total_cycles = rte_rdtsc_precise() - begin;
>> -
>> - printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> - printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> - printf("Average LPM Add/Del: %g cycles\n",
>> - (double)total_cycles / TOTAL_WRITES);
>> -
>> - writer_done = 1;
>> - /* Wait until all readers have exited */
>> - for (i = 0; i < num_cores; i++)
>> - rte_eal_wait_lcore(enabled_core_ids[i]);
>> -
>> - rte_lpm_free(lpm);
>>
>> return 0;
>>
>> @@ -946,9 +767,8 @@ test_lpm_perf(void)
>> rte_lpm_delete_all(lpm);
>> rte_lpm_free(lpm);
>>
>> - test_lpm_rcu_perf();
>> -
>> - test_lpm_rcu_perf_multi_writer();
>> + test_lpm_rcu_perf_multi_writer(0);
>> + test_lpm_rcu_perf_multi_writer(1);
>>
>> return 0;
>> }
>> --
>> 2.17.1