<snip>

> > > > > > > +++ b/lib/librte_ring/rte_ring_template.h
> > > > > > > @@ -0,0 +1,330 @@
> > > > > > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > > > > > + * Copyright (c) 2019 Arm Limited  */
> > > > > > > +
> > > > > > > +#ifndef _RTE_RING_TEMPLATE_H_ #define
> _RTE_RING_TEMPLATE_H_
> > > > > > > +
> > > > > > > +#ifdef __cplusplus
> > > > > > > +extern "C" {
> > > > > > > +#endif
> > > > > > > +
> > > > > > > +#include <stdio.h>
> > > > > > > +#include <stdint.h>
> > > > > > > +#include <sys/queue.h>
> > > > > > > +#include <errno.h>
> > > > > > > +#include <rte_common.h>
> > > > > > > +#include <rte_config.h>
> > > > > > > +#include <rte_memory.h>
> > > > > > > +#include <rte_lcore.h>
> > > > > > > +#include <rte_atomic.h>
> > > > > > > +#include <rte_branch_prediction.h> #include <rte_memzone.h>
> > > > > > > +#include <rte_pause.h> #include <rte_ring.h>
> > > > > > > +
> > > > > > > +/* Ring API suffix name - used to append to API names */
> > > > > > > +#ifndef RTE_RING_TMPLT_API_SUFFIX #error
> > > > > > > +RTE_RING_TMPLT_API_SUFFIX
> > > > not
> > > > > > > +defined #endif
> > > > > > > +
> > > > > > > +/* Ring's element size in bits, should be a power of 2 */
> > > > > > > +#ifndef RTE_RING_TMPLT_ELEM_SIZE #error
> > > > > > > +RTE_RING_TMPLT_ELEM_SIZE
> > > > not
> > > > > > defined
> > > > > > > +#endif
> > > > > > > +
> > > > > > > +/* Type of ring elements */ #ifndef
> > > > > > > +RTE_RING_TMPLT_ELEM_TYPE #error
> RTE_RING_TMPLT_ELEM_TYPE
> > > > > > > +not defined #endif
> > > > > > > +
> > > > > > > +#define _rte_fuse(a, b) a##_##b #define __rte_fuse(a, b)
> > > > > > > +_rte_fuse(a, b) #define
> > > > > > > +__RTE_RING_CONCAT(a) __rte_fuse(a,
> > > > > > > +RTE_RING_TMPLT_API_SUFFIX)
> > > > > > > +
> > > > > > > +/* Calculate the memory size needed for a ring */
> > > > > > > +RTE_RING_TMPLT_EXPERIMENTAL ssize_t
> > > > > > > +__RTE_RING_CONCAT(rte_ring_get_memsize)(unsigned count);
> > > > > > > +
> > > > > > > +/* Create a new ring named *name* in memory. */
> > > > > > > +RTE_RING_TMPLT_EXPERIMENTAL struct rte_ring *
> > > > > > > +__RTE_RING_CONCAT(rte_ring_create)(const char *name,
> > > > > > > +unsigned
> > > > count,
> > > > > > > +                                 int socket_id, unsigned flags);
> > > > > >
> > > > > >
> > > > > > Just an idea - probably same thing can be achieved in a different
> way.
> > > > > > Instead of all these defines - replace
> > > > > > ENQUEUE_PTRS/DEQUEUE_PTRS macros with static inline functions
> > > > > > and then make all internal functions,
> > > > i.e.
> > > > > > __rte_ring_do_dequeue()
> > > > > > to accept enqueue/dequeue function pointer as a parameter.
> > > > > > Then let say default rte_ring_mc_dequeue_bulk will do:
> > > > > >
> > > > > > rte_ring_mc_dequeue_bulk(struct rte_ring *r, void **obj_table,
> > > > > >                 unsigned int n, unsigned int *available) {
> > > > > >         return __rte_ring_do_dequeue(r, obj_table, n,
> > > > RTE_RING_QUEUE_FIXED,
> > > > > >                         __IS_MC, available,
> > > > > > dequeue_ptr_default); }
> > > > > >
> > > > > > Then if someone will like to define ring functions
> > > > > > forelt_size==X, all he would need to do:
> > > > > > 1. define his own enqueue/dequeuer functions.
> > > > > > 2. do something like:
> > > > > > rte_ring_mc_dequeue_bulk(struct rte_ring *r, void **obj_table,
> > > > > >                 unsigned int n, unsigned int *available) {
> > > > > >         return __rte_ring_do_dequeue(r, obj_table, n,
> > > > RTE_RING_QUEUE_FIXED,
> > > > > >                         __IS_MC, available, dequeue_X); }
> > > > > >
> > > > > > Konstantin
> > > > > Thanks for the feedback/idea. The goal of this patch was to make
> > > > > it simple enough to define APIs to store any element size
> > > > > without code
> > > > duplication.
> > > >
> > > > Well, then if we store elt_size inside the ring, it should be easy
> > > > enough to add to the API generic functions that would use
> > > > memcpy(or rte_memcpy) for enqueue/dequeue.
> > > > Yes, it might be slower than existing (8B per elem), but might be
> > > > still acceptable.
> > > The element size will be a constant in most use cases. If we keep
> > > the element size as a parameter, it allows the compiler to do any loop
> unrolling and auto-vectorization optimizations on copying.
> > > Storing the element size will result in additional memory access.
> >
> > I understand that, but for you case (rcu defer queue) you probably need
> highest possible performance, right?
> 
> Meant 'don't need' of course :)
😊 understood. that is just one use case. It actually started as an option to 
reduce memory usage in different places. You can look at the rte_hash changes 
in this patch. I also have plans for further changes.

> 
> > I am sure there will be other cases where such slight perf degradation is
> acceptatble.
> >
> > >
> > > >
> > > > >With this patch, the user has to write ~4 lines of code to get
> > > > >APIs for any element size. I would like to keep the goal still the  
> > > > >same.
> > > > >
> > > > > If we have to avoid the macro-fest, the main problem that needs
> > > > > to be addressed is - how to represent different sizes of element
> > > > > types in a generic
> > > > way? IMO, we can do this by defining the element type to be a
> > > > multiple of uint32_t (I do not think we need to go to uint16_t).
> > > > >
> > > > > For ex:
> > > > > rte_ring_mp_enqueue_bulk_objs(struct rte_ring *r,
> > > > >                 uint32_t *obj_table, unsigned int num_objs,
> > > > >                 unsigned int n,
> > > > >                 enum rte_ring_queue_behavior behavior, unsigned int 
> > > > > is_sp,
> > > > >                 unsigned int *free_space) { }
> > > > >
> > > > > This approach would ensure that we have generic enough APIs and
> > > > > they can be used for elements of any size. But the element
> > > > > itself needs to be a
> > > > multiple of 32b - I think this should not be a concern.
> > > > >
> > > > > The API suffix definitely needs to be better, any suggestions?
> > > >

Reply via email to