> > > > > +++ b/lib/librte_ring/rte_ring_template.h
> > > > > @@ -0,0 +1,330 @@
> > > > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > > > + * Copyright (c) 2019 Arm Limited */
> > > > > +
> > > > > +#ifndef _RTE_RING_TEMPLATE_H_
> > > > > +#define _RTE_RING_TEMPLATE_H_
> > > > > +
> > > > > +#ifdef __cplusplus
> > > > > +extern "C" {
> > > > > +#endif
> > > > > +
> > > > > +#include <stdio.h>
> > > > > +#include <stdint.h>
> > > > > +#include <sys/queue.h>
> > > > > +#include <errno.h>
> > > > > +#include <rte_common.h>
> > > > > +#include <rte_config.h>
> > > > > +#include <rte_memory.h>
> > > > > +#include <rte_lcore.h>
> > > > > +#include <rte_atomic.h>
> > > > > +#include <rte_branch_prediction.h> #include <rte_memzone.h>
> > > > > +#include <rte_pause.h> #include <rte_ring.h>
> > > > > +
> > > > > +/* Ring API suffix name - used to append to API names */ #ifndef
> > > > > +RTE_RING_TMPLT_API_SUFFIX #error RTE_RING_TMPLT_API_SUFFIX
> > not
> > > > > +defined #endif
> > > > > +
> > > > > +/* Ring's element size in bits, should be a power of 2 */ #ifndef
> > > > > +RTE_RING_TMPLT_ELEM_SIZE #error RTE_RING_TMPLT_ELEM_SIZE
> > not
> > > > defined
> > > > > +#endif
> > > > > +
> > > > > +/* Type of ring elements */
> > > > > +#ifndef RTE_RING_TMPLT_ELEM_TYPE
> > > > > +#error RTE_RING_TMPLT_ELEM_TYPE not defined #endif
> > > > > +
> > > > > +#define _rte_fuse(a, b) a##_##b
> > > > > +#define __rte_fuse(a, b) _rte_fuse(a, b) #define
> > > > > +__RTE_RING_CONCAT(a) __rte_fuse(a, RTE_RING_TMPLT_API_SUFFIX)
> > > > > +
> > > > > +/* Calculate the memory size needed for a ring */
> > > > > +RTE_RING_TMPLT_EXPERIMENTAL ssize_t
> > > > > +__RTE_RING_CONCAT(rte_ring_get_memsize)(unsigned count);
> > > > > +
> > > > > +/* Create a new ring named *name* in memory. */
> > > > > +RTE_RING_TMPLT_EXPERIMENTAL struct rte_ring *
> > > > > +__RTE_RING_CONCAT(rte_ring_create)(const char *name, unsigned
> > count,
> > > > > + int socket_id, unsigned flags);
> > > >
> > > >
> > > > Just an idea - probably same thing can be achieved in a different way.
> > > > Instead of all these defines - replace ENQUEUE_PTRS/DEQUEUE_PTRS
> > > > macros with static inline functions and then make all internal
> > > > functions,
> > i.e.
> > > > __rte_ring_do_dequeue()
> > > > to accept enqueue/dequeue function pointer as a parameter.
> > > > Then let say default rte_ring_mc_dequeue_bulk will do:
> > > >
> > > > rte_ring_mc_dequeue_bulk(struct rte_ring *r, void **obj_table,
> > > > unsigned int n, unsigned int *available) {
> > > > return __rte_ring_do_dequeue(r, obj_table, n,
> > RTE_RING_QUEUE_FIXED,
> > > > __IS_MC, available, dequeue_ptr_default); }
> > > >
> > > > Then if someone will like to define ring functions forelt_size==X,
> > > > all he would need to do:
> > > > 1. define his own enqueue/dequeuer functions.
> > > > 2. do something like:
> > > > rte_ring_mc_dequeue_bulk(struct rte_ring *r, void **obj_table,
> > > > unsigned int n, unsigned int *available) {
> > > > return __rte_ring_do_dequeue(r, obj_table, n,
> > RTE_RING_QUEUE_FIXED,
> > > > __IS_MC, available, dequeue_X); }
> > > >
> > > > Konstantin
> > > Thanks for the feedback/idea. The goal of this patch was to make it
> > > simple enough to define APIs to store any element size without code
> > duplication.
> >
> > Well, then if we store elt_size inside the ring, it should be easy enough
> > to add
> > to the API generic functions that would use memcpy(or rte_memcpy) for
> > enqueue/dequeue.
> > Yes, it might be slower than existing (8B per elem), but might be still
> > acceptable.
> The element size will be a constant in most use cases. If we keep the element
> size as a parameter, it allows the compiler to do any loop
> unrolling and auto-vectorization optimizations on copying.
> Storing the element size will result in additional memory access.
I understand that, but for you case (rcu defer queue) you probably need highest
possible performance, right?
I am sure there will be other cases where such slight perf degradation is
acceptatble.
>
> >
> > >With this patch, the user has to write ~4 lines of code to get APIs for
> > >any element size. I would like to keep the goal still the same.
> > >
> > > If we have to avoid the macro-fest, the main problem that needs to be
> > > addressed is - how to represent different sizes of element types in a
> > > generic
> > way? IMO, we can do this by defining the element type to be a multiple of
> > uint32_t (I do not think we need to go to uint16_t).
> > >
> > > For ex:
> > > rte_ring_mp_enqueue_bulk_objs(struct rte_ring *r,
> > > uint32_t *obj_table, unsigned int num_objs,
> > > unsigned int n,
> > > enum rte_ring_queue_behavior behavior, unsigned int is_sp,
> > > unsigned int *free_space) { }
> > >
> > > This approach would ensure that we have generic enough APIs and they
> > > can be used for elements of any size. But the element itself needs to be a
> > multiple of 32b - I think this should not be a concern.
> > >
> > > The API suffix definitely needs to be better, any suggestions?
> >