Re: [lng-odp] [PATCH v2 1/2] test: perf: add new scheduling latency test

Brian Brooks Wed, 21 Sep 2016 15:14:19 -0700

For series:

Reviewed-and-tested-by: Brian Brooks <brian.bro...@linaro.org>


On 09/14 11:53:06, Matias Elo wrote:
> Add new scheduling latency benchmark application. The application
> measures delays (avg, min, max) for high and low priority events.
> 
> The test has a configurable number of TRAFFIC events and few SAMPLE events
> (one common or one per priority). The scheduling latency is only measured
> from the SAMPLE events to minimize measurement overhead.
> 
> The application's command line arguments enable configuring:
> - Number of processing threads
> - Number of high/low priority queues
> - Number of high/low priority events
> - Use separate SAMPLE events for each priority
> - Scheduled queue type (PARALLEL, ATOMIC, ORDERED)
> 
> Signed-off-by: Matias Elo <matias....@nokia.com>
> ---
> 
> V2:
> - Remove unnecessary 'num_workers' initialization (Maxim)
> 
>  test/common_plat/performance/.gitignore          |   1 +
>  test/common_plat/performance/Makefile.am         |   4 +
>  test/common_plat/performance/odp_sched_latency.c | 767 
> +++++++++++++++++++++++
>  3 files changed, 772 insertions(+)
>  create mode 100644 test/common_plat/performance/odp_sched_latency.c
> 
> diff --git a/test/common_plat/performance/.gitignore 
> b/test/common_plat/performance/.gitignore
> index edcc832..1527d25 100644
> --- a/test/common_plat/performance/.gitignore
> +++ b/test/common_plat/performance/.gitignore
> @@ -4,4 +4,5 @@ odp_atomic
>  odp_crypto
>  odp_l2fwd
>  odp_pktio_perf
> +odp_sched_latency
>  odp_scheduling
> diff --git a/test/common_plat/performance/Makefile.am 
> b/test/common_plat/performance/Makefile.am
> index d23bb3e..f5dd8dd 100644
> --- a/test/common_plat/performance/Makefile.am
> +++ b/test/common_plat/performance/Makefile.am
> @@ -5,6 +5,7 @@ TESTS_ENVIRONMENT += TEST_DIR=${builddir}
>  EXECUTABLES = odp_crypto$(EXEEXT) odp_pktio_perf$(EXEEXT)
>  
>  COMPILE_ONLY = odp_l2fwd$(EXEEXT) \
> +            odp_sched_latency$(EXEEXT) \
>              odp_scheduling$(EXEEXT)
>  
>  TESTSCRIPTS = odp_l2fwd_run.sh \
> @@ -20,6 +21,8 @@ bin_PROGRAMS = $(EXECUTABLES) $(COMPILE_ONLY)
>  
>  odp_crypto_LDFLAGS = $(AM_LDFLAGS) -static
>  odp_crypto_CFLAGS = $(AM_CFLAGS) -I${top_srcdir}/test
> +odp_sched_latency_LDFLAGS = $(AM_LDFLAGS) -static
> +odp_sched_latency_CFLAGS = $(AM_CFLAGS) -I${top_srcdir}/test
>  odp_scheduling_LDFLAGS = $(AM_LDFLAGS) -static
>  odp_scheduling_CFLAGS = $(AM_CFLAGS) -I${top_srcdir}/test
>  
> @@ -27,6 +30,7 @@ noinst_HEADERS = \
>                 $(top_srcdir)/test/test_debug.h
>  
>  dist_odp_crypto_SOURCES = odp_crypto.c
> +dist_odp_sched_latency_SOURCES = odp_sched_latency.c
>  dist_odp_scheduling_SOURCES = odp_scheduling.c
>  dist_odp_pktio_perf_SOURCES = odp_pktio_perf.c
>  
> diff --git a/test/common_plat/performance/odp_sched_latency.c 
> b/test/common_plat/performance/odp_sched_latency.c
> new file mode 100644
> index 0000000..063fb21
> --- /dev/null
> +++ b/test/common_plat/performance/odp_sched_latency.c
> @@ -0,0 +1,767 @@
> +/* Copyright (c) 2016, Linaro Limited
> + * All rights reserved.
> + *
> + * SPDX-License-Identifier:     BSD-3-Clause
> + */
> +
> +/**
> + * @file
> + *
> + * @example odp_sched_latency.c  ODP scheduling latency benchmark application
> + */
> +
> +#include <string.h>
> +#include <stdlib.h>
> +#include <inttypes.h>
> +
> +#include <test_debug.h>
> +
> +/* ODP main header */
> +#include <odp_api.h>
> +
> +/* ODP helper for Linux apps */
> +#include <odp/helper/linux.h>
> +
> +/* GNU lib C */
> +#include <getopt.h>
> +
> +#define MAX_WORKERS    64            /**< Maximum number of worker threads */
> +#define MAX_QUEUES     4096          /**< Maximum number of queues */
> +#define EVENT_POOL_SIZE        (1024 * 1024) /**< Event pool size */
> +#define TEST_ROUNDS (4 * 1024 * 1024)        /**< Test rounds for each 
> thread */
> +#define MAIN_THREAD     1 /**< Thread ID performing maintenance tasks */
> +
> +/* Default values for command line arguments */
> +#define SAMPLE_EVENT_PER_PRIO          0 /**< Allocate a separate sample 
> event for
> +                                      each priority */
> +#define HI_PRIO_EVENTS                 0 /**< Number of high priority events 
> */
> +#define LO_PRIO_EVENTS                32 /**< Number of low priority events 
> */
> +#define HI_PRIO_QUEUES                16 /**< Number of high priority queues 
> */
> +#define LO_PRIO_QUEUES                64 /**< Number of low priority queues 
> */
> +
> +#define EVENTS_PER_HI_PRIO_QUEUE 0  /**< Alloc HI_PRIO_QUEUES x 
> HI_PRIO_EVENTS
> +                                      events */
> +#define EVENTS_PER_LO_PRIO_QUEUE 1  /**< Alloc LO_PRIO_QUEUES x 
> LO_PRIO_EVENTS
> +                                      events */
> +ODP_STATIC_ASSERT(HI_PRIO_QUEUES <= MAX_QUEUES, "Too many HI priority 
> queues");
> +ODP_STATIC_ASSERT(LO_PRIO_QUEUES <= MAX_QUEUES, "Too many LO priority 
> queues");
> +
> +#define CACHE_ALIGN_ROUNDUP(x)\
> +     ((ODP_CACHE_LINE_SIZE) * \
> +      (((x) + ODP_CACHE_LINE_SIZE - 1) / (ODP_CACHE_LINE_SIZE)))
> +
> +/* Test priorities */
> +#define NUM_PRIOS 2 /**< Number of tested priorities */
> +#define HI_PRIO        0
> +#define LO_PRIO        1
> +
> +/** Test event types */
> +typedef enum {
> +     WARM_UP, /**< Warm up event */
> +     TRAFFIC, /**< Event used only as traffic load */
> +     SAMPLE   /**< Event used to measure latency */
> +} event_type_t;
> +
> +/** Test event */
> +typedef struct {
> +     uint64_t ts;            /**< Send timestamp */
> +     event_type_t type;      /**< Message type */
> +     int src_idx[NUM_PRIOS]; /**< Source ODP queue */
> +     int prio;               /**< Source queue priority */
> +} test_event_t;
> +
> +/** Test arguments */
> +typedef struct {
> +     int cpu_count;                  /**< CPU count */
> +     odp_schedule_sync_t sync_type;  /**< Scheduler sync type */
> +     struct {
> +             int queues;     /**< Number of scheduling queues */
> +             int events;     /**< Number of events */
> +             odp_bool_t events_per_queue; /**< Allocate 'queues' x 'events'
> +                                               test events */
> +     } prio[NUM_PRIOS];
> +     odp_bool_t sample_per_prio; /**< Allocate a separate sample event for
> +                                      each priority */
> +} test_args_t;
> +
> +/** Latency measurements statistics */
> +typedef struct {
> +     uint64_t events;   /**< Total number of received events */
> +     uint64_t sample_events;  /**< Number of received sample events */
> +     uint64_t tot;      /**< Total event latency. Sum of all events. */
> +     uint64_t min;      /**< Minimum event latency */
> +     uint64_t max;      /**< Maximum event latency */
> +} test_stat_t;
> +
> +/** Performance test statistics (per core) */
> +typedef union {
> +     test_stat_t prio[NUM_PRIOS]; /**< Test statistics per priority */
> +
> +     uint8_t pad[CACHE_ALIGN_ROUNDUP(NUM_PRIOS * sizeof(test_stat_t))];
> +} core_stat_t ODP_ALIGNED_CACHE;
> +
> +/** Test global variables */
> +typedef struct {
> +     core_stat_t      core_stat[MAX_WORKERS]; /**< Core specific stats */
> +     odp_barrier_t    barrier; /**< Barrier for thread synchronization */
> +     odp_pool_t       pool;    /**< Pool for allocating test events */
> +     test_args_t      args;    /**< Parsed command line arguments */
> +     odp_queue_t      queue[NUM_PRIOS][MAX_QUEUES]; /**< Scheduled queues */
> +} test_globals_t;
> +
> +/**
> + * Clear all scheduled queues.
> + *
> + * Retry to be sure that all buffers have been scheduled.
> + */
> +static void clear_sched_queues(void)
> +{
> +     odp_event_t ev;
> +
> +     while (1) {
> +             ev = odp_schedule(NULL, ODP_SCHED_NO_WAIT);
> +
> +             if (ev == ODP_EVENT_INVALID)
> +                     break;
> +
> +             odp_event_free(ev);
> +     }
> +}
> +
> +/**
> + * Enqueue events into queues
> + *
> + * @param prio        Queue priority (HI_PRIO/LO_PRIO)
> + * @param num_queues  Number of queues
> + * @param num_events  Number of 'TRAFFIC' events
> + * @param num_samples Number of 'SAMPLE' events
> + * @param div_events  If true, divide 'num_events' between 'num_queues'. if
> + *                 false, enqueue 'num_events' to each queue.
> + * @param globals     Test shared data
> + *
> + * @retval 0 on success
> + * @retval -1 on failure
> + */
> +static int enqueue_events(int prio, int num_queues, int num_events,
> +                       int num_samples, odp_bool_t div_events,
> +                       test_globals_t *globals)
> +{
> +     odp_buffer_t buf[num_events + num_samples];
> +     odp_event_t ev[num_events + num_samples];
> +     odp_queue_t queue;
> +     test_event_t *event;
> +     int i, j, ret;
> +     int enq_events;
> +     int events_per_queue;
> +     int tot_events;
> +     int rdy_events = 0;
> +
> +     tot_events = num_events + num_samples;
> +
> +     if (!num_queues || !tot_events)
> +             return 0;
> +
> +     events_per_queue = tot_events;
> +     if (div_events)
> +             events_per_queue = (tot_events + num_queues - 1) / num_queues;
> +
> +     for (i = 0; i < num_queues; i++) {
> +             queue = globals->queue[prio][i];
> +
> +             ret = odp_buffer_alloc_multi(globals->pool, buf,
> +                                          events_per_queue);
> +             if (ret != events_per_queue) {
> +                     LOG_ERR("Buffer alloc failed. Try increasing 
> EVENT_POOL_SIZE.\n");
> +                     ret = ret < 0 ? 0 : ret;
> +                     odp_buffer_free_multi(buf, ret);
> +                     return -1;
> +             }
> +             for (j = 0; j < events_per_queue; j++) {
> +                     if (!odp_buffer_is_valid(buf[j])) {
> +                             LOG_ERR("Buffer alloc failed\n");
> +                             odp_buffer_free_multi(buf, events_per_queue);
> +                             return -1;
> +                     }
> +
> +                     event = odp_buffer_addr(buf[j]);
> +                     memset(event, 0, sizeof(test_event_t));
> +
> +                     /* Latency isn't measured from the first processing
> +                      * round. */
> +                     if (num_samples > 0) {
> +                             event->type = WARM_UP;
> +                             num_samples--;
> +                     } else {
> +                             event->type = TRAFFIC;
> +                     }
> +                     event->src_idx[prio] = i;
> +                     event->prio = prio;
> +                     ev[j] = odp_buffer_to_event(buf[j]);
> +             }
> +
> +             enq_events = 0;
> +             do {
> +                     ret = odp_queue_enq_multi(queue, &ev[enq_events],
> +                                               events_per_queue -
> +                                               enq_events);
> +                     if (ret < 0) {
> +                             LOG_ERR("Queue enqueue failed.\n");
> +                             return -1;
> +                     }
> +                     enq_events += ret;
> +             } while (enq_events < events_per_queue);
> +
> +             rdy_events += events_per_queue;
> +             if (div_events && rdy_events >= tot_events)
> +                     return 0;
> +     }
> +     return 0;
> +}
> +
> +/**
> + * Print latency measurement results
> + *
> + * @param globals  Test shared data
> + */
> +static void print_results(test_globals_t *globals)
> +{
> +     test_stat_t *lat;
> +     odp_schedule_sync_t stype;
> +     test_stat_t total;
> +     test_args_t *args;
> +     uint64_t avg;
> +     int i, j;
> +
> +     args = &globals->args;
> +     stype = globals->args.sync_type;
> +
> +     printf("\n%s queue scheduling latency\n",
> +            (stype == ODP_SCHED_SYNC_ATOMIC) ? "ATOMIC" :
> +            ((stype == ODP_SCHED_SYNC_ORDERED) ? "ORDERED" : "PARALLEL"));
> +
> +     printf("  LO_PRIO queues: %i\n", args->prio[LO_PRIO].queues);
> +     if (args->prio[LO_PRIO].events_per_queue)
> +             printf("  LO_PRIO event per queue: %i\n",
> +                    args->prio[LO_PRIO].events);
> +     else
> +             printf("  LO_PRIO events: %i\n", args->prio[LO_PRIO].events);
> +
> +     printf("  HI_PRIO queues: %i\n", args->prio[HI_PRIO].queues);
> +     if (args->prio[HI_PRIO].events_per_queue)
> +             printf("  HI_PRIO event per queue: %i\n\n",
> +                    args->prio[HI_PRIO].events);
> +     else
> +             printf("  HI_PRIO events: %i\n\n", args->prio[HI_PRIO].events);
> +
> +     for (i = 0; i < NUM_PRIOS; i++) {
> +             memset(&total, 0, sizeof(test_stat_t));
> +             total.min = UINT64_MAX;
> +
> +             printf("%s priority\n"
> +                    "Thread   Avg[ns]    Min[ns]    Max[ns]    Samples    
> Total\n"
> +                    
> "---------------------------------------------------------------\n",
> +                    i == HI_PRIO ? "HIGH" : "LOW");
> +             for (j = 1; j <= args->cpu_count; j++) {
> +                     lat = &globals->core_stat[j].prio[i];
> +
> +                     if (lat->sample_events == 0) {
> +                             printf("%-8d N/A\n", j);
> +                             continue;
> +                     }
> +
> +                     if (lat->max > total.max)
> +                             total.max = lat->max;
> +                     if (lat->min < total.min)
> +                             total.min = lat->min;
> +                     total.tot += lat->tot;
> +                     total.sample_events += lat->sample_events;
> +                     total.events += lat->events;
> +
> +                     avg = lat->events ? lat->tot / lat->sample_events : 0;
> +                     printf("%-8d %-10" PRIu64 " %-10" PRIu64 " "
> +                            "%-10" PRIu64 " %-10" PRIu64 " %-10" PRIu64 "\n",
> +                            j, avg, lat->min, lat->max, lat->sample_events,
> +                            lat->events);
> +             }
> +             
> printf("---------------------------------------------------------------\n");
> +             if (total.sample_events == 0) {
> +                     printf("Total    N/A\n\n");
> +                     continue;
> +             }
> +             avg = total.events ? total.tot / total.sample_events : 0;
> +             printf("Total    %-10" PRIu64 " %-10" PRIu64 " %-10" PRIu64 " "
> +                    "%-10" PRIu64 " %-10" PRIu64 "\n\n", avg, total.min,
> +                    total.max, total.sample_events, total.events);
> +     }
> +}
> +
> +/**
> + * Measure latency of scheduled ODP events
> + *
> + * Schedule and enqueue events until 'TEST_ROUNDS' events have been 
> processed.
> + * Scheduling latency is measured only from type 'SAMPLE' events. Other 
> events
> + * are simply enqueued back to the scheduling queues.
> + *
> + * For 'TRAFFIC' type events the destination queue is selected from the same
> + * priority class as source queue. 'SAMPLE' type event may change priority
> + * depending on the command line arguments.
> + *
> + * @param thr      Thread ID
> + * @param globals  Test shared data
> + *
> + * @retval 0 on success
> + * @retval -1 on failure
> + */
> +static int test_schedule(int thr, test_globals_t *globals)
> +{
> +     odp_event_t ev;
> +     odp_buffer_t buf;
> +     odp_queue_t src_queue;
> +     odp_queue_t dst_queue;
> +     uint64_t latency;
> +     uint32_t i;
> +     test_event_t *event;
> +     test_stat_t *stats;
> +     int dst_idx;
> +
> +     memset(&globals->core_stat[thr], 0, sizeof(core_stat_t));
> +     globals->core_stat[thr].prio[HI_PRIO].min = UINT64_MAX;
> +     globals->core_stat[thr].prio[LO_PRIO].min = UINT64_MAX;
> +
> +     for (i = 0; i < TEST_ROUNDS; i++) {
> +             ev = odp_schedule(&src_queue, ODP_SCHED_WAIT);
> +
> +             buf = odp_buffer_from_event(ev);
> +             event = odp_buffer_addr(buf);
> +
> +             stats = &globals->core_stat[thr].prio[event->prio];
> +
> +             if (event->type == SAMPLE) {
> +                     latency = odp_time_to_ns(odp_time_global()) - event->ts;
> +
> +                     if (latency > stats->max)
> +                             stats->max = latency;
> +                     if (latency < stats->min)
> +                             stats->min = latency;
> +                     stats->tot += latency;
> +                     stats->sample_events++;
> +
> +                     /* Move sample event to a different priority */
> +                     if (!globals->args.sample_per_prio &&
> +                         globals->args.prio[!event->prio].queues)
> +                             event->prio = !event->prio;
> +             }
> +
> +             if (odp_unlikely(event->type == WARM_UP))
> +                     event->type = SAMPLE;
> +             else
> +                     stats->events++;
> +
> +             /* Move event to next queue */
> +             dst_idx = event->src_idx[event->prio] + 1;
> +             if (dst_idx >= globals->args.prio[event->prio].queues)
> +                     dst_idx = 0;
> +             event->src_idx[event->prio] = dst_idx;
> +             dst_queue = globals->queue[event->prio][dst_idx];
> +
> +             if (event->type == SAMPLE)
> +                     event->ts = odp_time_to_ns(odp_time_global());
> +
> +             if (odp_queue_enq(dst_queue, ev)) {
> +                     LOG_ERR("[%i] Queue enqueue failed.\n", thr);
> +                     odp_event_free(ev);
> +                     return -1;
> +             }
> +     }
> +
> +     /* Clear possible locally stored buffers */
> +     odp_schedule_pause();
> +
> +     while (1) {
> +             ev = odp_schedule(&src_queue, ODP_SCHED_NO_WAIT);
> +
> +             if (ev == ODP_EVENT_INVALID)
> +                     break;
> +
> +             if (odp_queue_enq(src_queue, ev)) {
> +                     LOG_ERR("[%i] Queue enqueue failed.\n", thr);
> +                     odp_event_free(ev);
> +                     return -1;
> +             }
> +     }
> +
> +     odp_schedule_resume();
> +
> +     odp_barrier_wait(&globals->barrier);
> +
> +     clear_sched_queues();
> +
> +     if (thr == MAIN_THREAD)
> +             print_results(globals);
> +
> +     return 0;
> +}
> +
> +/**
> + * Worker thread
> + *
> + * @param arg  Arguments
> + *
> + * @retval 0 on success
> + * @retval -1 on failure
> + */
> +static int run_thread(void *arg ODP_UNUSED)
> +{
> +     odp_shm_t shm;
> +     test_globals_t *globals;
> +     test_args_t *args;
> +     int thr;
> +     int sample_events = 0;
> +
> +     thr = odp_thread_id();
> +
> +     shm     = odp_shm_lookup("test_globals");
> +     globals = odp_shm_addr(shm);
> +
> +     if (globals == NULL) {
> +             LOG_ERR("Shared mem lookup failed\n");
> +             return -1;
> +     }
> +
> +     if (thr == MAIN_THREAD) {
> +             args = &globals->args;
> +
> +             if (enqueue_events(HI_PRIO, args->prio[HI_PRIO].queues,
> +                                args->prio[HI_PRIO].events, 1,
> +                                !args->prio[HI_PRIO].events_per_queue,
> +                                globals))
> +                     return -1;
> +
> +             if (!args->prio[HI_PRIO].queues || args->sample_per_prio)
> +                     sample_events = 1;
> +
> +             if (enqueue_events(LO_PRIO, args->prio[LO_PRIO].queues,
> +                                args->prio[LO_PRIO].events, sample_events,
> +                                !args->prio[LO_PRIO].events_per_queue,
> +                                globals))
> +                     return -1;
> +     }
> +
> +     odp_barrier_wait(&globals->barrier);
> +
> +     if (test_schedule(thr, globals))
> +             return -1;
> +
> +     return 0;
> +}
> +
> +/**
> + * Print usage information
> + */
> +static void usage(void)
> +{
> +     printf("\n"
> +            "OpenDataPlane scheduler latency benchmark application.\n"
> +            "\n"
> +            "Usage: ./odp_sched_latency [options]\n"
> +            "Optional OPTIONS:\n"
> +            "  -c, --count <number> CPU count\n"
> +            "  -l, --lo-prio-queues <number> Number of low priority 
> scheduled queues\n"
> +            "  -t, --hi-prio-queues <number> Number of high priority 
> scheduled queues\n"
> +            "  -m, --lo-prio-events-per-queue <number> Number of events per 
> low priority queue\n"
> +            "  -n, --hi-prio-events-per-queue <number> Number of events per 
> high priority queues\n"
> +            "  -o, --lo-prio-events <number> Total number of low priority 
> events (overrides the\n"
> +            "                                number of events per queue)\n"
> +            "  -p, --hi-prio-events <number> Total number of high priority 
> events (overrides the\n"
> +            "                                number of events per queue)\n"
> +            "  -r  --sample-per-prio Allocate a separate sample event for 
> each priority. By default\n"
> +            "                        a single sample event is used and its 
> priority is changed after\n"
> +            "                        each processing round.\n"
> +            "  -s, --sync  Scheduled queues' sync type\n"
> +            "               0: ODP_SCHED_SYNC_PARALLEL (default)\n"
> +            "               1: ODP_SCHED_SYNC_ATOMIC\n"
> +            "               2: ODP_SCHED_SYNC_ORDERED\n"
> +            "  -h, --help   Display help and exit.\n\n"
> +            );
> +}
> +
> +/**
> + * Parse arguments
> + *
> + * @param argc  Argument count
> + * @param argv  Argument vector
> + * @param args  Test arguments
> + */
> +static void parse_args(int argc, char *argv[], test_args_t *args)
> +{
> +     int opt;
> +     int long_index;
> +     int i;
> +
> +     static const struct option longopts[] = {
> +             {"count", required_argument, NULL, 'c'},
> +             {"lo-prio-queues", required_argument, NULL, 'l'},
> +             {"hi-prio-queues", required_argument, NULL, 't'},
> +             {"lo-prio-events-per-queue", required_argument, NULL, 'm'},
> +             {"hi-prio-events-per-queue", required_argument, NULL, 'n'},
> +             {"lo-prio-events", required_argument, NULL, 'o'},
> +             {"hi-prio-events", required_argument, NULL, 'p'},
> +             {"sample-per-prio", no_argument, NULL, 'r'},
> +             {"sync", required_argument, NULL, 's'},
> +             {"help", no_argument, NULL, 'h'},
> +             {NULL, 0, NULL, 0}
> +     };
> +
> +     static const char *shortopts = "+c:s:l:t:m:n:o:p:rh";
> +
> +     /* Let helper collect its own arguments (e.g. --odph_proc) */
> +     odph_parse_options(argc, argv, shortopts, longopts);
> +
> +     args->sync_type = ODP_SCHED_SYNC_PARALLEL;
> +     args->sample_per_prio = SAMPLE_EVENT_PER_PRIO;
> +     args->prio[LO_PRIO].queues = LO_PRIO_QUEUES;
> +     args->prio[HI_PRIO].queues = HI_PRIO_QUEUES;
> +     args->prio[LO_PRIO].events = LO_PRIO_EVENTS;
> +     args->prio[HI_PRIO].events = HI_PRIO_EVENTS;
> +     args->prio[LO_PRIO].events_per_queue = EVENTS_PER_LO_PRIO_QUEUE;
> +     args->prio[HI_PRIO].events_per_queue = EVENTS_PER_HI_PRIO_QUEUE;
> +
> +     opterr = 0; /* Do not issue errors on helper options */
> +     while (1) {
> +             opt = getopt_long(argc, argv, shortopts, longopts, &long_index);
> +
> +             if (opt == -1)
> +                     break;  /* No more options */
> +
> +             switch (opt) {
> +             case 'c':
> +                     args->cpu_count = atoi(optarg);
> +                     break;
> +             case 'l':
> +                     args->prio[LO_PRIO].queues = atoi(optarg);
> +                     break;
> +             case 't':
> +                     args->prio[HI_PRIO].queues = atoi(optarg);
> +                     break;
> +             case 'm':
> +                     args->prio[LO_PRIO].events = atoi(optarg);
> +                     args->prio[LO_PRIO].events_per_queue = 1;
> +                     break;
> +             case 'n':
> +                     args->prio[HI_PRIO].events = atoi(optarg);
> +                     args->prio[HI_PRIO].events_per_queue = 1;
> +                     break;
> +             case 'o':
> +                     args->prio[LO_PRIO].events = atoi(optarg);
> +                     args->prio[LO_PRIO].events_per_queue = 0;
> +                     break;
> +             case 'p':
> +                     args->prio[HI_PRIO].events = atoi(optarg);
> +                     args->prio[HI_PRIO].events_per_queue = 0;
> +                     break;
> +             case 's':
> +                     i = atoi(optarg);
> +                     if (i == 1)
> +                             args->sync_type = ODP_SCHED_SYNC_ATOMIC;
> +                     else if (i == 2)
> +                             args->sync_type = ODP_SCHED_SYNC_ORDERED;
> +                     else
> +                             args->sync_type = ODP_SCHED_SYNC_PARALLEL;
> +                     break;
> +             case 'r':
> +                     args->sample_per_prio = 1;
> +                     break;
> +             case 'h':
> +                     usage();
> +                     exit(EXIT_SUCCESS);
> +                     break;
> +
> +             default:
> +                     break;
> +             }
> +     }
> +
> +     /* Make sure arguments are valid */
> +     if (args->cpu_count > MAX_WORKERS)
> +             args->cpu_count = MAX_WORKERS;
> +     if (args->prio[LO_PRIO].queues > MAX_QUEUES)
> +             args->prio[LO_PRIO].queues = MAX_QUEUES;
> +     if (args->prio[HI_PRIO].queues > MAX_QUEUES)
> +             args->prio[HI_PRIO].queues = MAX_QUEUES;
> +     if (!args->prio[HI_PRIO].queues && !args->prio[LO_PRIO].queues) {
> +             printf("No queues configured\n");
> +             usage();
> +             exit(EXIT_FAILURE);
> +     }
> +}
> +
> +/**
> + * Test main function
> + */
> +int main(int argc, char *argv[])
> +{
> +     odp_instance_t instance;
> +     odph_odpthread_t *thread_tbl;
> +     odph_odpthread_params_t thr_params;
> +     odp_cpumask_t cpumask;
> +     odp_pool_t pool;
> +     odp_pool_param_t params;
> +     odp_shm_t shm;
> +     test_globals_t *globals;
> +     test_args_t args;
> +     char cpumaskstr[ODP_CPUMASK_STR_SIZE];
> +     int i, j;
> +     int ret = 0;
> +     int num_workers = 0;
> +
> +     printf("\nODP scheduling latency benchmark starts\n\n");
> +
> +     memset(&args, 0, sizeof(args));
> +     parse_args(argc, argv, &args);
> +
> +     /* ODP global init */
> +     if (odp_init_global(&instance, NULL, NULL)) {
> +             LOG_ERR("ODP global init failed.\n");
> +             return -1;
> +     }
> +
> +     /*
> +      * Init this thread. It makes also ODP calls when
> +      * setting up resources for worker threads.
> +      */
> +     if (odp_init_local(instance, ODP_THREAD_CONTROL)) {
> +             LOG_ERR("ODP global init failed.\n");
> +             return -1;
> +     }
> +
> +     printf("\n");
> +     printf("ODP system info\n");
> +     printf("---------------\n");
> +     printf("ODP API version:  %s\n",        odp_version_api_str());
> +     printf("ODP impl name:    %s\n",        odp_version_impl_name());
> +     printf("ODP impl details: %s\n",        odp_version_impl_str());
> +     printf("CPU model:        %s\n",        odp_cpu_model_str());
> +     printf("CPU freq (hz):    %" PRIu64 "\n", odp_cpu_hz_max());
> +     printf("Cache line size:  %i\n",        odp_sys_cache_line_size());
> +     printf("Max CPU count:    %i\n",        odp_cpu_count());
> +
> +     /* Get default worker cpumask */
> +     if (args.cpu_count)
> +             num_workers = args.cpu_count;
> +
> +     num_workers = odp_cpumask_default_worker(&cpumask, num_workers);
> +     args.cpu_count = num_workers;
> +
> +     (void)odp_cpumask_to_str(&cpumask, cpumaskstr, sizeof(cpumaskstr));
> +
> +     printf("Worker threads:   %i\n", num_workers);
> +     printf("First CPU:        %i\n", odp_cpumask_first(&cpumask));
> +     printf("CPU mask:         %s\n\n", cpumaskstr);
> +
> +     thread_tbl = calloc(sizeof(odph_odpthread_t), num_workers);
> +     if (!thread_tbl) {
> +             LOG_ERR("no memory for thread_tbl\n");
> +             return -1;
> +     }
> +
> +     shm = odp_shm_reserve("test_globals",
> +                           sizeof(test_globals_t), ODP_CACHE_LINE_SIZE, 0);
> +     if (shm == ODP_SHM_INVALID) {
> +             LOG_ERR("Shared memory reserve failed.\n");
> +             return -1;
> +     }
> +
> +     globals = odp_shm_addr(shm);
> +     memset(globals, 0, sizeof(test_globals_t));
> +     memcpy(&globals->args, &args, sizeof(test_args_t));
> +
> +     /*
> +      * Create event pool
> +      */
> +     odp_pool_param_init(&params);
> +     params.buf.size  = sizeof(test_event_t);
> +     params.buf.align = 0;
> +     params.buf.num   = EVENT_POOL_SIZE;
> +     params.type      = ODP_POOL_BUFFER;
> +
> +     pool = odp_pool_create("event_pool", &params);
> +
> +     if (pool == ODP_POOL_INVALID) {
> +             LOG_ERR("Pool create failed.\n");
> +             return -1;
> +     }
> +     globals->pool = pool;
> +
> +     /*
> +      * Create queues for schedule test
> +      */
> +     for (i = 0; i < NUM_PRIOS; i++) {
> +             char name[] = "sched_XX_YY";
> +             odp_queue_t queue;
> +             odp_queue_param_t param;
> +             int prio;
> +
> +             if (i == HI_PRIO)
> +                     prio = ODP_SCHED_PRIO_HIGHEST;
> +             else
> +                     prio = ODP_SCHED_PRIO_LOWEST;
> +
> +             name[6] = '0' + (prio / 10);
> +             name[7] = '0' + prio - (10 * (prio / 10));
> +
> +             odp_queue_param_init(&param);
> +             param.type        = ODP_QUEUE_TYPE_SCHED;
> +             param.sched.prio  = prio;
> +             param.sched.sync  = args.sync_type;
> +             param.sched.group = ODP_SCHED_GROUP_ALL;
> +
> +             for (j = 0; j < args.prio[i].queues; j++) {
> +                     name[9]  = '0' + j / 10;
> +                     name[10] = '0' + j - 10 * (j / 10);
> +
> +                     queue = odp_queue_create(name, &param);
> +
> +                     if (queue == ODP_QUEUE_INVALID) {
> +                             LOG_ERR("Scheduled queue create failed.\n");
> +                             return -1;
> +                     }
> +
> +                     globals->queue[i][j] = queue;
> +             }
> +     }
> +
> +     odp_barrier_init(&globals->barrier, num_workers);
> +
> +     /* Create and launch worker threads */
> +     memset(&thr_params, 0, sizeof(thr_params));
> +     thr_params.thr_type = ODP_THREAD_WORKER;
> +     thr_params.instance = instance;
> +     thr_params.start = run_thread;
> +     thr_params.arg   = NULL;
> +     odph_odpthreads_create(thread_tbl, &cpumask, &thr_params);
> +
> +     /* Wait for worker threads to terminate */
> +     odph_odpthreads_join(thread_tbl);
> +     free(thread_tbl);
> +
> +     printf("ODP scheduling latency test complete\n\n");
> +
> +     for (i = 0; i < NUM_PRIOS; i++) {
> +             odp_queue_t queue;
> +             int num_queues;
> +
> +             num_queues = args.prio[i].queues;
> +
> +             for (j = 0; j < num_queues; j++) {
> +                     queue = globals->queue[i][j];
> +                     ret += odp_queue_destroy(queue);
> +             }
> +     }
> +
> +     ret += odp_shm_free(shm);
> +     ret += odp_pool_destroy(pool);
> +     ret += odp_term_local();
> +     ret += odp_term_global(instance);
> +
> +     return ret;
> +}
> -- 
> 2.7.4
>

Re: [lng-odp] [PATCH v2 1/2] test: perf: add new scheduling latency test

Reply via email to