On Fri, Oct 30, 2015 at 01:25:28AM +0100, Jan Viktorin wrote: > From: Vlastimil Kosar <kosar at rehivetech.com> > > This patch adds architecture specific atomic operation file > for ARM architecture. It utilizes compiler intrinsics only. > > Signed-off-by: Vlastimil Kosar <kosar at rehivetech.com> > Signed-off-by: Jan Viktorin <viktorin at rehivetech.com> > --- > v1 -> v2: > * improve rte_wmb() > * use __atomic_* or __sync_*? (may affect the required GCC version) > > v4: > * checkpatch complaints about volatile keyword (but seems to be OK to me) > * checkpatch complaints about do { ... } while (0) for single statement > with asm volatile (but I didn't find a way how to write it without > the checkpatch complaints) > * checkpatch is now happy with whitespaces > --- > .../common/include/arch/arm/rte_atomic.h | 256 > +++++++++++++++++++++ > 1 file changed, 256 insertions(+) > create mode 100644 lib/librte_eal/common/include/arch/arm/rte_atomic.h > > diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic.h > b/lib/librte_eal/common/include/arch/arm/rte_atomic.h > new file mode 100644 > index 0000000..ea1e485 > --- /dev/null > +++ b/lib/librte_eal/common/include/arch/arm/rte_atomic.h > @@ -0,0 +1,256 @@ > +/*- > + * BSD LICENSE > + * > + * Copyright(c) 2015 RehiveTech. All rights reserved. > + * > + * Redistribution and use in source and binary forms, with or without > + * modification, are permitted provided that the following conditions > + * are met: > + * > + * * Redistributions of source code must retain the above copyright > + * notice, this list of conditions and the following disclaimer. > + * * Redistributions in binary form must reproduce the above copyright > + * notice, this list of conditions and the following disclaimer in > + * the documentation and/or other materials provided with the > + * distribution. > + * * Neither the name of RehiveTech nor the names of its > + * contributors may be used to endorse or promote products derived > + * from this software without specific prior written permission. > + * > + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > + */ > + > +#ifndef _RTE_ATOMIC_ARM_H_ > +#define _RTE_ATOMIC_ARM_H_ > + > +#ifdef __cplusplus > +extern "C" { > +#endif > + > +#include "generic/rte_atomic.h" > + > +/** > + * General memory barrier. > + * > + * Guarantees that the LOAD and STORE operations generated before the > + * barrier occur before the LOAD and STORE operations generated after. > + */ > +#define rte_mb() __sync_synchronize() > + > +/** > + * Write memory barrier. > + * > + * Guarantees that the STORE operations generated before the barrier > + * occur before the STORE operations generated after. > + */ > +#define rte_wmb() do { asm volatile ("dmb st" : : : "memory"); } while > (0) > + > +/** > + * Read memory barrier. > + * > + * Guarantees that the LOAD operations generated before the barrier > + * occur before the LOAD operations generated after. > + */ > +#define rte_rmb() __sync_synchronize() > +
#define dmb(opt) asm volatile("dmb " #opt : : : "memory") static inline void rte_mb(void) { dmb(ish); } static inline void rte_wmb(void) { dmb(ishst); } static inline void rte_rmb(void) { dmb(ishld); } For armv8, it make sense to have above definition for rte_*mb(). If does n't make sense for armv7 then we need split this file rte_atomic_32/64.h > +/*------------------------- 16 bit atomic operations > -------------------------*/ > + > +#ifndef RTE_FORCE_INTRINSICS > +static inline int > +rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src) > +{ > + return __atomic_compare_exchange(dst, &exp, &src, 0, __ATOMIC_ACQUIRE, > + __ATOMIC_ACQUIRE) ? 1 : 0; > +} IMO, it should be __ATOMIC_SEQ_CST be instead of __ATOMIC_ACQUIRE. __ATOMIC_ACQUIRE works in conjunction with __ATOMIC_RELEASE. AFAIK, DPDK atomic api expects full barrier. C11 memory model not yet used. So why can't we use RTE_FORCE_INTRINSICS based generic implementation. Same holds true for spinlock implementation too(i.e using RTE_FORCE_INTRINSICS). Am I missing something here ? > + > +static inline int rte_atomic16_test_and_set(rte_atomic16_t *v) > +{ > + return rte_atomic16_cmpset((volatile uint16_t *)&v->cnt, 0, 1); > +} > + > +static inline void > +rte_atomic16_inc(rte_atomic16_t *v) > +{ > + __atomic_add_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE); > +} > + > +static inline void > +rte_atomic16_dec(rte_atomic16_t *v) > +{ > + __atomic_sub_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE); > +} > + > +static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v) > +{ > + return (__atomic_add_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE) == 0); > +} > + > +static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v) > +{ > + return (__atomic_sub_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE) == 0); > +} > + > +/*------------------------- 32 bit atomic operations > -------------------------*/ > + > +static inline int > +rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src) > +{ > + return __atomic_compare_exchange(dst, &exp, &src, 0, __ATOMIC_ACQUIRE, > + __ATOMIC_ACQUIRE) ? 1 : 0; > +} > + > +static inline int rte_atomic32_test_and_set(rte_atomic32_t *v) > +{ > + return rte_atomic32_cmpset((volatile uint32_t *)&v->cnt, 0, 1); > +} > + > +static inline void > +rte_atomic32_inc(rte_atomic32_t *v) > +{ > + __atomic_add_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE); > +} > + > +static inline void > +rte_atomic32_dec(rte_atomic32_t *v) > +{ > + __atomic_sub_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE); > +} > + > +static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v) > +{ > + return (__atomic_add_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE) == 0); > +} > + > +static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v) > +{ > + return (__atomic_sub_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE) == 0); > +} > + > +/*------------------------- 64 bit atomic operations > -------------------------*/ > + > +static inline int > +rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src) > +{ > + return __atomic_compare_exchange(dst, &exp, &src, 0, __ATOMIC_ACQUIRE, > + __ATOMIC_ACQUIRE) ? 1 : 0; > +} > + > +static inline void > +rte_atomic64_init(rte_atomic64_t *v) > +{ > + int success = 0; > + uint64_t tmp; > + > + while (success == 0) { > + tmp = v->cnt; > + success = rte_atomic64_cmpset( > + (volatile uint64_t *)&v->cnt, tmp, 0); > + } > +} > + > +static inline int64_t > +rte_atomic64_read(rte_atomic64_t *v) > +{ > + int success = 0; > + uint64_t tmp; > + > + while (success == 0) { > + tmp = v->cnt; > + /* replace the value by itself */ > + success = rte_atomic64_cmpset( > + (volatile uint64_t *) &v->cnt, tmp, tmp); > + } > + return tmp; > +} This will be overkill for arm64. Generic implementation has __LP64__ based check for 64bit platform > + > +static inline void > +rte_atomic64_set(rte_atomic64_t *v, int64_t new_value) > +{ > + int success = 0; > + uint64_t tmp; > + > + while (success == 0) { > + tmp = v->cnt; > + success = rte_atomic64_cmpset( > + (volatile uint64_t *)&v->cnt, tmp, new_value); > + } > +} > + > +static inline void > +rte_atomic64_add(rte_atomic64_t *v, int64_t inc) > +{ > + __atomic_fetch_add(&v->cnt, inc, __ATOMIC_ACQUIRE); > +} > + > +static inline void > +rte_atomic64_sub(rte_atomic64_t *v, int64_t dec) > +{ > + __atomic_fetch_sub(&v->cnt, dec, __ATOMIC_ACQUIRE); > +} > + __atomic_fetch_* operations on 64bit works only when compiler support (__GCC_ATOMIC_LLONG_LOCK_FREE >=2). if DPDK API's expects full barrier not the C11 memory model based __ATOMIC_ACQUIRE then better to use generic implementation. > +static inline void > +rte_atomic64_inc(rte_atomic64_t *v) > +{ > + __atomic_fetch_add(&v->cnt, 1, __ATOMIC_ACQUIRE); > +} > + > +static inline void > +rte_atomic64_dec(rte_atomic64_t *v) > +{ > + __atomic_fetch_sub(&v->cnt, 1, __ATOMIC_ACQUIRE); > +} > + > +static inline int64_t > +rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc) > +{ > + return __atomic_add_fetch(&v->cnt, inc, __ATOMIC_ACQUIRE); > +} > + > +static inline int64_t > +rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec) > +{ > + return __atomic_sub_fetch(&v->cnt, dec, __ATOMIC_ACQUIRE); > +} > + > +static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v) > +{ > + return (__atomic_add_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE) == 0); > +} > + > +static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v) > +{ > + return (__atomic_sub_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE) == 0); > +} > + > +static inline int rte_atomic64_test_and_set(rte_atomic64_t *v) > +{ > + return rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, 0, 1); > +} > + > +/** > + * Atomically set a 64-bit counter to 0. > + * > + * @param v > + * A pointer to the atomic counter. > + */ > +static inline void rte_atomic64_clear(rte_atomic64_t *v) > +{ > + rte_atomic64_set(v, 0); > +} > +#endif > + > +#ifdef __cplusplus > +} > +#endif > + > +#endif /* _RTE_ATOMIC_ARM_H_ */ > -- > 2.6.1 >