> --- a/platform/linux-generic/include/odp_config_internal.h > +++ b/platform/linux-generic/include/odp_config_internal.h > @@ -7,9 +7,7 @@ > #ifndef ODP_CONFIG_INTERNAL_H_ > #define ODP_CONFIG_INTERNAL_H_ > > -#ifdef __cplusplus > -extern "C" { > -#endif > +#include <odp_schedule_scalable_config.h>
Why these configs need global visibility? This file should contain general configuration options. > > /* > * Maximum number of pools > @@ -22,6 +20,13 @@ extern "C" { > #define ODP_CONFIG_QUEUES 1024 > > /* > + * Maximum queue depth. Maximum number of elements that can be stored in > a > + * queue. This value is used only when the size is not explicitly > provided > + * during queue creation. > + */ > +#define CONFIG_QUEUE_SIZE 4096 > + > +/* > * Maximum number of ordered locks per queue > */ > #define CONFIG_QUEUE_MAX_ORD_LOCKS 4 > @@ -120,7 +125,7 @@ extern "C" { > * > * This the the number of separate SHM areas that can be reserved > concurrently > */ > -#define ODPDRV_CONFIG_SHM_BLOCKS 48 > +#define ODPDRV_CONFIG_SHM_BLOCKS ODP_CONFIG_SHM_BLOCKS Is this change necessary? Increases driver memory usage for no reason? > + > +#endif /* ODP_SCHEDULE_SCALABLE_H */ > diff --git a/platform/linux-generic/include/odp_schedule_scalable_config.h > b/platform/linux-generic/include/odp_schedule_scalable_config.h > new file mode 100644 > index 00000000..febf379b > --- /dev/null > +++ b/platform/linux-generic/include/odp_schedule_scalable_config.h > @@ -0,0 +1,55 @@ > +/* Copyright (c) 2017, ARM Limited > + * All rights reserved. > + * > + * SPDX-License-Identifier: BSD-3-Clause > + */ > + > +#ifndef ODP_SCHEDULE_SCALABLE_CONFIG_H_ > +#define ODP_SCHEDULE_SCALABLE_CONFIG_H_ > + > +/* > + * Default scaling factor for the scheduler group > + * > + * This scaling factor is used when the application creates a scheduler > + * group with no worker threads. > + */ > +#define CONFIG_DEFAULT_XFACTOR 4 > + > +/* > + * Default weight (in events) for WRR in scalable scheduler > + * > + * This controls the per-queue weight for WRR between queues of the same > + * priority in the scalable scheduler > + * A higher value improves throughput while a lower value increases > fairness > + * and thus likely decreases latency > + * > + * If WRR is undesired, set the value to ~0 which will use the largest > possible > + * weight > + * > + * Note: an API for specifying this on a per-queue basis would be useful > but is > + * not yet available > + */ > +#define CONFIG_WRR_WEIGHT 64 > + > +/* > + * Split queue producer/consumer metadata into separate cache lines. > + * This is beneficial on e.g. Cortex-A57 but not so much on A53. > + */ > +#define CONFIG_SPLIT_PRODCONS > + > +/* > + * Use locks to protect queue (ring buffer) and scheduler state updates > + * On x86, this decreases overhead noticeably. > + */ > +#ifndef __ARM_ARCH > +#define CONFIG_QSCHST_LOCK > +/* Keep all ring buffer/qschst data together when using locks */ > +#undef CONFIG_SPLIT_PRODCONS > +#endif > + > +/* > + * Maximum number of ordered locks per queue. > + */ > +#define CONFIG_MAX_ORDERED_LOCKS_PER_QUEUE 2 There's already CONFIG_QUEUE_MAX_ORD_LOCKS 4, in the general config file. Should not add the same define twice (with different value). > + > +#endif /* ODP_SCHEDULE_SCALABLE_CONFIG_H_ */ > diff --git a/platform/linux- > generic/include/odp_schedule_scalable_ordered.h b/platform/linux- > generic/include/odp_schedule_scalable_ordered.h > new file mode 100644 > index 00000000..9f3acf7a > --- /dev/null > +++ b/platform/linux-generic/include/odp_schedule_scalable_ordered.h > @@ -0,0 +1,132 @@ > +/* Copyright (c) 2017, ARM Limited > + * All rights reserved. > + * > + * SPDX-License-Identifier: BSD-3-Clause > + */ > + > +#ifndef ODP_SCHEDULE_SCALABLE_ORDERED_H > +#define ODP_SCHEDULE_SCALABLE_ORDERED_H > + > +#include <odp/api/shared_memory.h> > + > +#include <odp_internal.h> > +#include <odp_align_internal.h> > +#include <odp_bitset.h> > +#include <_ishmpool_internal.h> > + > +/* Number of reorder contects in the reorder window. > + * Should be at least one per CPU. > + */ > +#define RWIN_SIZE 32 > +ODP_STATIC_ASSERT(CHECK_IS_POWER2(RWIN_SIZE), "RWIN_SIZE is not a power > of 2"); > + > +#define NUM_OLOCKS 2 Is this the same as CONFIG_MAX_ORDERED_LOCKS_PER_QUEUE or something different with similar name ? > diff --git a/platform/linux-generic/odp_queue_if.c b/platform/linux- > generic/odp_queue_if.c > index c91f00eb..d7471dfc 100644 > --- a/platform/linux-generic/odp_queue_if.c > +++ b/platform/linux-generic/odp_queue_if.c > @@ -6,11 +6,19 @@ > > #include <odp_queue_if.h> > > +extern const queue_api_t queue_scalable_api; > +extern const queue_fn_t queue_scalable_fn; > + > extern const queue_api_t queue_default_api; > extern const queue_fn_t queue_default_fn; > > +#ifdef ODP_SCHEDULE_SCALABLE > +const queue_api_t *queue_api = &queue_scalable_api; > +const queue_fn_t *queue_fn = &queue_scalable_fn; > +#else > const queue_api_t *queue_api = &queue_default_api; > const queue_fn_t *queue_fn = &queue_default_fn; > +#endif > > odp_queue_t odp_queue_create(const char *name, const odp_queue_param_t > *param) > { > diff --git a/platform/linux-generic/odp_queue_scalable.c b/platform/linux- > generic/odp_queue_scalable.c > new file mode 100644 > index 00000000..d5c6d0ae > --- /dev/null > +++ b/platform/linux-generic/odp_queue_scalable.c > @@ -0,0 +1,1020 @@ > +/* Copyright (c) 2017, ARM Limited > + * All rights reserved. > + * > + * SPDX-License-Identifier: BSD-3-Clause > + */ > + > +static int queue_init(queue_entry_t *queue, const char *name, > + const odp_queue_param_t *param) > +{ > + ringidx_t ring_idx; > + sched_elem_t *sched_elem; > + uint32_t ring_size; > + odp_buffer_hdr_t **ring; > + uint32_t size; > + > + sched_elem = &queue->s.sched_elem; > + ring_size = param->size > 0 ? > + ROUNDUP_POWER2_U32(param->size) : CONFIG_QUEUE_SIZE; > + strncpy(queue->s.name, name ? name : "", ODP_QUEUE_NAME_LEN - > 1); > + queue->s.name[ODP_QUEUE_NAME_LEN - 1] = 0; > + memcpy(&queue->s.param, param, sizeof(odp_queue_param_t)); > + > + size = ring_size * sizeof(odp_buffer_hdr_t *); > + ring = (odp_buffer_hdr_t > **)shm_pool_alloc_align(queue_shm_pool, size); > + if (NULL == ring) > + return -1; > + > + for (ring_idx = 0; ring_idx < ring_size; ring_idx++) > + ring[ring_idx] = NULL; > + > + queue->s.type = queue->s.param.type; > + queue->s.enqueue = _queue_enq; > + queue->s.dequeue = _queue_deq; > + queue->s.enqueue_multi = _queue_enq_multi; > + queue->s.dequeue_multi = _queue_deq_multi; > + queue->s.pktin = PKTIN_INVALID; > + > + sched_elem->node.next = NULL; > +#ifdef CONFIG_QSCHST_LOCK > + LOCK_INIT(&sched_elem->qschlock); > +#endif There are about 30 of these CONFIG_QSCHST_LOCK ifdefs. It would be cleaner to embed ifdefs into the lock macros instead of surrounding each macro call with an ifdef. Also some of these CONFIG_QSCHST_LOCK ifdefs contain a large amount of code, entire functions like: sched_update_deq, sched_update_deq_sc, _odp_queue_enq. Again, one version of a function is built for ARM, another version for other archs. Could these be built always and make selection with a minimal amount of ifdef'ed code? -Petri