> >>>>> diff --git a/lib/eal/include/meson.build > >>>>> b/lib/eal/include/meson.build > >>>>> index 9700494816..48df5f1a21 100644 > >>>>> --- a/lib/eal/include/meson.build > >>>>> +++ b/lib/eal/include/meson.build > >>>>> @@ -36,6 +36,7 @@ headers += files( > >>>>> 'rte_per_lcore.h', > >>>>> 'rte_random.h', > >>>>> 'rte_reciprocal.h', > >>>>> + 'rte_seqlock.h', > >>>>> 'rte_service.h', > >>>>> 'rte_service_component.h', > >>>>> 'rte_string_fns.h', > >>>>> diff --git a/lib/eal/include/rte_seqlock.h > >>>>> b/lib/eal/include/rte_seqlock.h > >>>>> new file mode 100644 > >>>>> index 0000000000..b975ca848a > >>>>> --- /dev/null > >>>>> +++ b/lib/eal/include/rte_seqlock.h > >>>>> @@ -0,0 +1,84 @@ > >>>>> +/* SPDX-License-Identifier: BSD-3-Clause > >>>>> + * Copyright(c) 2022 Ericsson AB > >>>>> + */ > >>>>> + > >>>>> +#ifndef _RTE_SEQLOCK_H_ > >>>>> +#define _RTE_SEQLOCK_H_ > >>>>> + > >>>>> +#include <stdbool.h> > >>>>> +#include <stdint.h> > >>>>> + > >>>>> +#include <rte_atomic.h> > >>>>> +#include <rte_branch_prediction.h> > >>>>> +#include <rte_spinlock.h> > >>>>> + > >>>>> +struct rte_seqlock { > >>>>> + uint64_t sn; > >>>>> + rte_spinlock_t lock; > >>>>> +}; > >>>>> + > >>>>> +typedef struct rte_seqlock rte_seqlock_t; > >>>>> + > >>>>> +__rte_experimental > >>>>> +void > >>>>> +rte_seqlock_init(rte_seqlock_t *seqlock); > >>>> Probably worth to have static initializer too. > >>>> > >>> > >>> I will add that in the next version, thanks. > >>> > >>>>> + > >>>>> +__rte_experimental > >>>>> +static inline uint64_t > >>>>> +rte_seqlock_read_begin(const rte_seqlock_t *seqlock) > >>>>> +{ > >>>>> + /* __ATOMIC_ACQUIRE to prevent loads after (in program order) > >>>>> + * from happening before the sn load. Syncronizes-with the > >>>>> + * store release in rte_seqlock_end(). > >>>>> + */ > >>>>> + return __atomic_load_n(&seqlock->sn, __ATOMIC_ACQUIRE); > >>>>> +} > >>>>> + > >>>>> +__rte_experimental > >>>>> +static inline bool > >>>>> +rte_seqlock_read_retry(const rte_seqlock_t *seqlock, uint64_t > >>>>> begin_sn) > >>>>> +{ > >>>>> + uint64_t end_sn; > >>>>> + > >>>>> + /* make sure the data loads happens before the sn load */ > >>>>> + rte_atomic_thread_fence(__ATOMIC_ACQUIRE); > >>>> That's sort of 'read_end' correct? > >>>> If so, shouldn't it be '__ATOMIC_RELEASE' instead here, > >>>> and > >>>> end_sn = __atomic_load_n(..., (__ATOMIC_ACQUIRE) > >>>> on the line below? > >>> > >>> A release fence prevents reordering of stores. The reader doesn't do > >>> any > >>> stores, so I don't understand why you would use a release fence here. > >>> Could you elaborate? > >> > >> From my understanding: > >> rte_atomic_thread_fence(__ATOMIC_ACQUIRE); > >> serves as a hoist barrier here, so it would only prevent later > >> instructions > >> to be executed before that point. > >> But it wouldn't prevent earlier instructions to be executed after > >> that point. > >> While we do need to guarantee that cpu will finish all previous reads > >> before > >> progressing further. > >> > >> Suppose we have something like that: > >> > >> struct { > >> uint64_t shared; > >> rte_seqlock_t lock; > >> } data; > >> > >> ... > >> sn = ... > >> uint64_t x = data.shared; > >> /* inside rte_seqlock_read_retry(): */ > >> ... > >> rte_atomic_thread_fence(__ATOMIC_ACQUIRE); > >> end_sn = __atomic_load_n(&data.lock.sn, __ATOMIC_RELAXED); > >> > >> Here we need to make sure that read of data.shared will always happen > >> before reading of data.lock.sn. > >> It is not a problem on IA (as reads are not reordered), but on > >> machines with > >> relaxed memory ordering (ARM, etc.) it can happen. > >> So to prevent it we do need a sink barrier here first (ATOMIC_RELEASE) > > We can't use store-release since there is no write on the reader-side. > > And fence-release orders against later stores, not later loads. > > > >> > >> Honnappa and other ARM & atomics experts, please correct me if I am > >> wrong here. > > The C standard (chapter 7.17.4 in the C11 (draft)) isn't so easy to > > digest. If we trust Preshing, he has a more accessible description > > here: > > https://protect2.fireeye.com/v1/url?k=31323334-501d5122-313273af-454445555731-f4f5b1eec2980283&q=1&e=3479ebfa-e18d-4bf8- > 88fe-76823a531912&u=https%3A%2F%2Fpreshing.com%2F20130922%2Facquire-and-release-fences%2F > > "An acquire fence prevents the memory reordering of any read which > > precedes it in program order with any read or write which follows it > > in program order." > > and here: > > https://protect2.fireeye.com/v1/url?k=31323334-501d5122-313273af-454445555731-64b0eba450be934b&q=1&e=3479ebfa-e18d-4bf8- > 88fe-76823a531912&u=https%3A%2F%2Fpreshing.com%2F20131125%2Facquire-and-release-fences-dont-work-the-way-youd-expect%2F > > (for C++ but the definition seems to be identical to that of C11). > > Essentially a LoadLoad+LoadStore barrier which is what we want to > > achieve. > > > > GCC 10.3 for AArch64/A64 ISA generates a "DMB ISHLD" instruction. This > > waits for all loads preceding (in program order) the memory barrier to > > be observed before any memory accesses after (in program order) the > > memory barrier. > > > > I think the key to understanding atomic thread fences is that they are > > not associated with a specific memory access (unlike load-acquire and > > store-release) so they can't order earlier or later memory accesses > > against some specific memory access. Instead the fence orders any/all > > earlier loads and/or stores against any/all later loads or stores > > (depending on acquire or release). > > > >> > >>>>> + > >>>>> + end_sn = __atomic_load_n(&seqlock->sn, __ATOMIC_RELAXED); > >>>>> + > >>>>> + return unlikely(begin_sn & 1 || begin_sn != end_sn); > >>>>> +} > >>>>> + > >>>>> +__rte_experimental > >>>>> +static inline void > >>>>> +rte_seqlock_write_begin(rte_seqlock_t *seqlock) > >>>>> +{ > >>>>> + uint64_t sn; > >>>>> + > >>>>> + /* to synchronize with other writers */ > >>>>> + rte_spinlock_lock(&seqlock->lock); > >>>>> + > >>>>> + sn = seqlock->sn + 1; > >>>>> + > >>>>> + __atomic_store_n(&seqlock->sn, sn, __ATOMIC_RELAXED); > >>>>> + > >>>>> + /* __ATOMIC_RELEASE to prevent stores after (in program order) > >>>>> + * from happening before the sn store. > >>>>> + */ > >>>>> + rte_atomic_thread_fence(__ATOMIC_RELEASE); > >>>> I think it needs to be '__ATOMIC_ACQUIRE' here instead of > >>>> '__ATOMIC_RELEASE'. > >>> > >>> Please elaborate on why. > >> > >> As you said in the comments above, we need to prevent later stores > >> to be executed before that point. So we do need a hoist barrier here. > >> AFAIK to guarantee a hoist barrier '__ATOMIC_ACQUIRE' is required. > > An acquire fence wouldn't order an earlier store (the write to > > seqlock->sn) from being reordered with some later store (e.g. writes > > to the protected data), thus it would allow readers to see updated > > data (possibly torn) with a pre-update sequence number. We need a > > StoreStore barrier for ordering the SN store and data stores => > > fence(release). > > > > Acquire and releases fences can (also) be used to create > > synchronize-with relationships (this is how the C standard defines > > them). Preshing has a good example on this. Basically > > Thread 1: > > data = 242; > > atomic_thread_fence(atomic_release); > > atomic_store_n(&guard, 1, atomic_relaxed); > > > > Thread 2: > > while (atomic_load_n(&guard, atomic_relaxed) != 1) ; > > atomic_thread_fence(atomic_acquire); > > do_something(data); > > > > These are obvious analogues to store-release and load-acquire, thus > > the acquire & release names of the fences. > > > > - Ola > > > >> > >>> > >>>>> +} > >>>>> + > >>>>> +__rte_experimental > >>>>> +static inline void > >>>>> +rte_seqlock_write_end(rte_seqlock_t *seqlock) > >>>>> +{ > >>>>> + uint64_t sn; > >>>>> + > >>>>> + sn = seqlock->sn + 1; > >>>>> + > >>>>> + /* synchronizes-with the load acquire in rte_seqlock_begin() */ > >>>>> + __atomic_store_n(&seqlock->sn, sn, __ATOMIC_RELEASE); > >>>>> + > >>>>> + rte_spinlock_unlock(&seqlock->lock); > >>>>> +} > >>>>> + > >> > > I have nothing to add, but Ola's mail seems to have been blocked from > the dev list, so I'm posting this again.
Ok, thanks Ola for detailed explanation. Have to admit then that my understanding of atomic_fence() behaviour was incorrect. Please disregard my comments above about rte_seqlock_read_retry() and rte_seqlock_write_begin(). Konstantin