Re: [lng-odp] [API-NEXT PATCH v9 5/6] linux-gen: sched scalable: add scalable scheduler

Savolainen, Petri (Nokia - FI/Espoo) Tue, 20 Jun 2017 06:59:02 -0700

> --- a/platform/linux-generic/include/odp_config_internal.h
> +++ b/platform/linux-generic/include/odp_config_internal.h
> @@ -7,9 +7,7 @@
>  #ifndef ODP_CONFIG_INTERNAL_H_
>  #define ODP_CONFIG_INTERNAL_H_
> 
> -#ifdef __cplusplus
> -extern "C" {
> -#endif
> +#include <odp_schedule_scalable_config.h>


Why these configs need global visibility? This file should contain general 
configuration options. 

> 
>  /*
>   * Maximum number of pools
> @@ -22,6 +20,13 @@ extern "C" {
>  #define ODP_CONFIG_QUEUES 1024
> 
>  /*
> + * Maximum queue depth. Maximum number of elements that can be stored in
> a
> + * queue. This value is used only when the size is not explicitly
> provided
> + * during queue creation.
> + */
> +#define CONFIG_QUEUE_SIZE 4096
> +
> +/*
>   * Maximum number of ordered locks per queue
>   */
>  #define CONFIG_QUEUE_MAX_ORD_LOCKS 4
> @@ -120,7 +125,7 @@ extern "C" {
>   *
>   * This the the number of separate SHM areas that can be reserved
> concurrently
>   */
> -#define ODPDRV_CONFIG_SHM_BLOCKS 48
> +#define ODPDRV_CONFIG_SHM_BLOCKS ODP_CONFIG_SHM_BLOCKS


Is this change necessary? Increases driver memory usage for no reason?


> +
> +#endif  /* ODP_SCHEDULE_SCALABLE_H */
> diff --git a/platform/linux-generic/include/odp_schedule_scalable_config.h
> b/platform/linux-generic/include/odp_schedule_scalable_config.h
> new file mode 100644
> index 00000000..febf379b
> --- /dev/null
> +++ b/platform/linux-generic/include/odp_schedule_scalable_config.h
> @@ -0,0 +1,55 @@
> +/* Copyright (c) 2017, ARM Limited
> + * All rights reserved.
> + *
> + * SPDX-License-Identifier:     BSD-3-Clause
> + */
> +
> +#ifndef ODP_SCHEDULE_SCALABLE_CONFIG_H_
> +#define ODP_SCHEDULE_SCALABLE_CONFIG_H_
> +
> +/*
> + * Default scaling factor for the scheduler group
> + *
> + * This scaling factor is used when the application creates a scheduler
> + * group with no worker threads.
> + */
> +#define CONFIG_DEFAULT_XFACTOR 4
> +
> +/*
> + * Default weight (in events) for WRR in scalable scheduler
> + *
> + * This controls the per-queue weight for WRR between queues of the same
> + * priority in the scalable scheduler
> + * A higher value improves throughput while a lower value increases
> fairness
> + * and thus likely decreases latency
> + *
> + * If WRR is undesired, set the value to ~0 which will use the largest
> possible
> + * weight
> + *
> + * Note: an API for specifying this on a per-queue basis would be useful
> but is
> + * not yet available
> + */
> +#define CONFIG_WRR_WEIGHT 64
> +
> +/*
> + * Split queue producer/consumer metadata into separate cache lines.
> + * This is beneficial on e.g. Cortex-A57 but not so much on A53.
> + */
> +#define CONFIG_SPLIT_PRODCONS
> +
> +/*
> + * Use locks to protect queue (ring buffer) and scheduler state updates
> + * On x86, this decreases overhead noticeably.
> + */
> +#ifndef __ARM_ARCH
> +#define CONFIG_QSCHST_LOCK
> +/* Keep all ring buffer/qschst data together when using locks */
> +#undef CONFIG_SPLIT_PRODCONS
> +#endif
> +
> +/*
> + * Maximum number of ordered locks per queue.
> + */
> +#define CONFIG_MAX_ORDERED_LOCKS_PER_QUEUE 2


There's already CONFIG_QUEUE_MAX_ORD_LOCKS 4, in the general config file. 
Should not add the same define twice (with different value).



> +
> +#endif  /* ODP_SCHEDULE_SCALABLE_CONFIG_H_ */
> diff --git a/platform/linux-
> generic/include/odp_schedule_scalable_ordered.h b/platform/linux-
> generic/include/odp_schedule_scalable_ordered.h
> new file mode 100644
> index 00000000..9f3acf7a
> --- /dev/null
> +++ b/platform/linux-generic/include/odp_schedule_scalable_ordered.h
> @@ -0,0 +1,132 @@
> +/* Copyright (c) 2017, ARM Limited
> + * All rights reserved.
> + *
> + * SPDX-License-Identifier:     BSD-3-Clause
> + */
> +
> +#ifndef ODP_SCHEDULE_SCALABLE_ORDERED_H
> +#define ODP_SCHEDULE_SCALABLE_ORDERED_H
> +
> +#include <odp/api/shared_memory.h>
> +
> +#include <odp_internal.h>
> +#include <odp_align_internal.h>
> +#include <odp_bitset.h>
> +#include <_ishmpool_internal.h>

> +
> +/* Number of reorder contects in the reorder window.
> + * Should be at least one per CPU.
> + */
> +#define RWIN_SIZE 32
> +ODP_STATIC_ASSERT(CHECK_IS_POWER2(RWIN_SIZE), "RWIN_SIZE is not a power
> of 2");
> +
> +#define NUM_OLOCKS 2

Is this the same as CONFIG_MAX_ORDERED_LOCKS_PER_QUEUE or something different 
with similar name ?


> diff --git a/platform/linux-generic/odp_queue_if.c b/platform/linux-
> generic/odp_queue_if.c
> index c91f00eb..d7471dfc 100644
> --- a/platform/linux-generic/odp_queue_if.c
> +++ b/platform/linux-generic/odp_queue_if.c
> @@ -6,11 +6,19 @@
> 
>  #include <odp_queue_if.h>
> 
> +extern const queue_api_t queue_scalable_api;
> +extern const queue_fn_t queue_scalable_fn;
> +
>  extern const queue_api_t queue_default_api;
>  extern const queue_fn_t queue_default_fn;
> 
> +#ifdef ODP_SCHEDULE_SCALABLE
> +const queue_api_t *queue_api = &queue_scalable_api;
> +const queue_fn_t *queue_fn = &queue_scalable_fn;
> +#else
>  const queue_api_t *queue_api = &queue_default_api;
>  const queue_fn_t *queue_fn = &queue_default_fn;
> +#endif
> 
>  odp_queue_t odp_queue_create(const char *name, const odp_queue_param_t
> *param)
>  {
> diff --git a/platform/linux-generic/odp_queue_scalable.c b/platform/linux-
> generic/odp_queue_scalable.c
> new file mode 100644
> index 00000000..d5c6d0ae
> --- /dev/null
> +++ b/platform/linux-generic/odp_queue_scalable.c
> @@ -0,0 +1,1020 @@
> +/* Copyright (c) 2017, ARM Limited
> + * All rights reserved.
> + *
> + * SPDX-License-Identifier:     BSD-3-Clause
> + */
> +


> +static int queue_init(queue_entry_t *queue, const char *name,
> +                   const odp_queue_param_t *param)
> +{
> +     ringidx_t ring_idx;
> +     sched_elem_t *sched_elem;
> +     uint32_t ring_size;
> +     odp_buffer_hdr_t **ring;
> +     uint32_t size;
> +
> +     sched_elem = &queue->s.sched_elem;
> +     ring_size = param->size > 0 ?
> +             ROUNDUP_POWER2_U32(param->size) : CONFIG_QUEUE_SIZE;
> +     strncpy(queue->s.name, name ? name : "", ODP_QUEUE_NAME_LEN -
> 1);
> +     queue->s.name[ODP_QUEUE_NAME_LEN - 1] = 0;
> +     memcpy(&queue->s.param, param, sizeof(odp_queue_param_t));
> +
> +     size = ring_size * sizeof(odp_buffer_hdr_t *);
> +     ring = (odp_buffer_hdr_t
> **)shm_pool_alloc_align(queue_shm_pool, size);
> +     if (NULL == ring)
> +             return -1;
> +
> +     for (ring_idx = 0; ring_idx < ring_size; ring_idx++)
> +             ring[ring_idx] = NULL;
> +
> +     queue->s.type = queue->s.param.type;
> +     queue->s.enqueue = _queue_enq;
> +     queue->s.dequeue = _queue_deq;
> +     queue->s.enqueue_multi = _queue_enq_multi;
> +     queue->s.dequeue_multi = _queue_deq_multi;
> +     queue->s.pktin = PKTIN_INVALID;
> +
> +     sched_elem->node.next = NULL;
> +#ifdef CONFIG_QSCHST_LOCK
> +     LOCK_INIT(&sched_elem->qschlock);
> +#endif

There are about 30 of these CONFIG_QSCHST_LOCK ifdefs. It would be cleaner to 
embed ifdefs into the lock macros instead of surrounding each macro call with 
an ifdef.

Also some of these CONFIG_QSCHST_LOCK ifdefs contain a large amount of code, 
entire functions like: sched_update_deq, sched_update_deq_sc, _odp_queue_enq. 
Again, one version of a function is built for ARM, another version for other 
archs. Could these be built always and make selection with a minimal amount of 
ifdef'ed code?


-Petri

Re: [lng-odp] [API-NEXT PATCH v9 5/6] linux-gen: sched scalable: add scalable scheduler

Reply via email to