<snip> Few minor comments, overall looks good. Tested on few Arm platforms.
> > This is a new type of reader-writer lock that provides better fairness > guarantees which better suited for typical DPDK applications. > A pflock has two ticket pools, one for readers and one for writers. > > Phase fair reader writer locks ensure that neither reader nor writer will be > starved. Neither reader or writer are preferred, they execute in alternating > phases. All operations of the same type (reader or writer) that acquire the > lock are handled in FIFO order. Write operations are exclusive, and multiple > read operations can be run together (until a write arrives). > > A similar implementation is in Concurrency Kit package in FreeBSD. > For more information see: > "Reader-Writer Synchronization for Shared-Memory Multiprocessor > Real-Time Systems", > http://www.cs.unc.edu/~anderson/papers/ecrts09b.pdf > > Signed-off-by: Stephen Hemminger <step...@networkplumber.org> > --- > app/test/meson.build | 2 + > app/test/test_pflock.c | 193 +++++++++++++++++++ > lib/librte_eal/arm/include/meson.build | 1 + > lib/librte_eal/arm/include/rte_pflock.h | 18 ++ > lib/librte_eal/include/generic/rte_pflock.h | 202 ++++++++++++++++++++ > lib/librte_eal/ppc/include/meson.build | 1 + > lib/librte_eal/ppc/include/rte_pflock.h | 17 ++ > lib/librte_eal/x86/include/meson.build | 1 + > lib/librte_eal/x86/include/rte_pflock.h | 18 ++ > 9 files changed, 453 insertions(+) > create mode 100644 app/test/test_pflock.c create mode 100644 > lib/librte_eal/arm/include/rte_pflock.h > create mode 100644 lib/librte_eal/include/generic/rte_pflock.h > create mode 100644 lib/librte_eal/ppc/include/rte_pflock.h > create mode 100644 lib/librte_eal/x86/include/rte_pflock.h > > diff --git a/app/test/meson.build b/app/test/meson.build index > 76eaaea45746..bd50818f82b0 100644 > --- a/app/test/meson.build > +++ b/app/test/meson.build > @@ -90,6 +90,7 @@ test_sources = files('commands.c', > 'test_mcslock.c', > 'test_mp_secondary.c', > 'test_per_lcore.c', > + 'test_pflock.c', > 'test_pmd_perf.c', > 'test_power.c', > 'test_power_cpufreq.c', > @@ -228,6 +229,7 @@ fast_tests = [ > ['meter_autotest', true], > ['multiprocess_autotest', false], > ['per_lcore_autotest', true], > + ['pflock_autotest', true], > ['prefetch_autotest', true], > ['rcu_qsbr_autotest', true], > ['red_autotest', true], > diff --git a/app/test/test_pflock.c b/app/test/test_pflock.c new file mode > 100644 index 000000000000..5e3c05767767 > --- /dev/null > +++ b/app/test/test_pflock.c > @@ -0,0 +1,193 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(c) 2021 Microsoft Corporation */ > + > +#include <stdio.h> > +#include <stdint.h> > +#include <inttypes.h> > +#include <unistd.h> > +#include <sys/queue.h> > +#include <string.h> > + > +#include <rte_common.h> > +#include <rte_memory.h> > +#include <rte_per_lcore.h> > +#include <rte_launch.h> > +#include <rte_pause.h> > +#include <rte_pflock.h> > +#include <rte_eal.h> > +#include <rte_lcore.h> > +#include <rte_cycles.h> > + > +#include "test.h" > + > +/* > + * phase fair lock test > + * =========== > + * Provides UT for phase fair lock API. > + * Main concern is on functional testing, but also provides some > + * performance measurements. > + * Obviously for proper testing need to be executed with more than one > lcore. > + */ > + > +#define ITER_NUM 0x80 > + > +#define TEST_SEC 5 The above 2 #defines are not used, you can remove them > + > +static rte_pflock_t sl; > +static rte_pflock_t sl_tab[RTE_MAX_LCORE]; static uint32_t synchro; > + > +enum { > + LC_TYPE_RDLOCK, > + LC_TYPE_WRLOCK, > +}; This enum is not used, you can remove it > + > +static int > +test_pflock_per_core(__rte_unused void *arg) { > + rte_pflock_write_lock(&sl); > + printf("Global write lock taken on core %u\n", rte_lcore_id()); > + rte_pflock_write_unlock(&sl); > + > + rte_pflock_write_lock(&sl_tab[rte_lcore_id()]); > + printf("Hello from core %u !\n", rte_lcore_id()); > + rte_pflock_write_unlock(&sl_tab[rte_lcore_id()]); > + > + rte_pflock_read_lock(&sl); > + printf("Global read lock taken on core %u\n", rte_lcore_id()); > + rte_delay_ms(100); > + printf("Release global read lock on core %u\n", rte_lcore_id()); > + rte_pflock_read_unlock(&sl); > + > + return 0; > +} > + > +static rte_pflock_t lk = RTE_PFLOCK_INITIALIZER; static volatile > +uint64_t pflock_data; static uint64_t time_count[RTE_MAX_LCORE] = {0}; > + > +#define MAX_LOOP 10000 > +#define TEST_PFLOCK_DEBUG 0 > + > +static int > +load_loop_fn(__rte_unused void *arg) > +{ > + uint64_t time_diff = 0, begin; > + uint64_t hz = rte_get_timer_hz(); > + uint64_t lcount = 0; > + const unsigned int lcore = rte_lcore_id(); > + > + /* wait synchro for workers */ > + if (lcore != rte_get_main_lcore()) > + rte_wait_until_equal_32(&synchro, 1, __ATOMIC_RELAXED); > + > + begin = rte_rdtsc_precise(); > + while (lcount < MAX_LOOP) { > + rte_pflock_write_lock(&lk); > + ++pflock_data; This should be an atomic increment, better to use atomic fetch add > + rte_pflock_write_unlock(&lk); > + > + rte_pflock_read_lock(&lk); > + if (TEST_PFLOCK_DEBUG && !(lcount % 100)) > + printf("Core [%u] pflock_data = %"PRIu64"\n", > + lcore, pflock_data); > + rte_pflock_read_unlock(&lk); > + > + lcount++; > + /* delay to make lock duty cycle slightly realistic */ > + rte_pause(); > + } > + > + time_diff = rte_rdtsc_precise() - begin; > + time_count[lcore] = time_diff * 1000000 / hz; > + return 0; > +} > + > +static int > +test_pflock_perf(void) > +{ > + unsigned int i; > + uint64_t total = 0; > + > + printf("\nPhase fair test on %u cores...\n", rte_lcore_count()); > + > + /* clear synchro and start workers */ > + synchro = 0; > + if (rte_eal_mp_remote_launch(load_loop_fn, NULL, SKIP_MAIN) < > 0) > + return -1; > + > + /* start synchro and launch test on main */ > + __atomic_store_n(&synchro, 1, __ATOMIC_RELAXED); > + load_loop_fn(NULL); > + > + rte_eal_mp_wait_lcore(); > + > + RTE_LCORE_FOREACH(i) { > + printf("Core [%u] cost time = %"PRIu64" us\n", > + i, time_count[i]); > + total += time_count[i]; > + } > + > + printf("Total cost time = %"PRIu64" us\n", total); > + memset(time_count, 0, sizeof(time_count)); > + > + return 0; > +} > + > +/* > + * - There is a global pflock and a table of pflocks (one per lcore). > + * > + * - The test function takes all of these locks and launches the > + * ``test_pflock_per_core()`` function on each core (except the main). > + * > + * - The function takes the global write lock, display something, > + * then releases the global lock. > + * - Then, it takes the per-lcore write lock, display something, and > + * releases the per-core lock. > + * - Finally, a read lock is taken during 100 ms, then released. > + * > + * - The main function unlocks the per-lcore locks sequentially and > + * waits between each lock. This triggers the display of a message > + * for each core, in the correct order. > + * > + * Then, it tries to take the global write lock and display the last > + * message. The autotest script checks that the message order is correct. > + */ > +static int > +test_pflock(void) > +{ > + int i; > + > + rte_pflock_init(&sl); > + for (i = 0; i < RTE_MAX_LCORE; i++) > + rte_pflock_init(&sl_tab[i]); > + > + rte_pflock_write_lock(&sl); > + > + RTE_LCORE_FOREACH_WORKER(i) { > + rte_pflock_write_lock(&sl_tab[i]); > + rte_eal_remote_launch(test_pflock_per_core, NULL, i); > + } > + > + rte_pflock_write_unlock(&sl); > + > + RTE_LCORE_FOREACH_WORKER(i) { > + rte_pflock_write_unlock(&sl_tab[i]); > + rte_delay_ms(100); > + } > + > + rte_pflock_write_lock(&sl); > + /* this message should be the last message of test */ > + printf("Global write lock taken on main core %u\n", rte_lcore_id()); > + rte_pflock_write_unlock(&sl); > + > + rte_eal_mp_wait_lcore(); > + > + if (test_pflock_perf() < 0) Suggest separating out the performance test so that it is not run on the cloud CI platforms (which have issues with performance tests timing out). I think autotest_data.py needs to be modified. > + return -1; > + > + return 0; > +} > + > +REGISTER_TEST_COMMAND(pflock_autotest, test_pflock); > diff --git a/lib/librte_eal/arm/include/meson.build > b/lib/librte_eal/arm/include/meson.build > index 770766de1a34..2c3cff61bed6 100644 > --- a/lib/librte_eal/arm/include/meson.build > +++ b/lib/librte_eal/arm/include/meson.build > @@ -21,6 +21,7 @@ arch_headers = files( > 'rte_pause_32.h', > 'rte_pause_64.h', > 'rte_pause.h', > + 'rte_pflock.h', > 'rte_power_intrinsics.h', > 'rte_prefetch_32.h', > 'rte_prefetch_64.h', > diff --git a/lib/librte_eal/arm/include/rte_pflock.h > b/lib/librte_eal/arm/include/rte_pflock.h > new file mode 100644 > index 000000000000..bb9934eec469 > --- /dev/null > +++ b/lib/librte_eal/arm/include/rte_pflock.h > @@ -0,0 +1,18 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(c) 2021 Microsoft Corporation */ > + > +#ifndef _RTE_PFLOCK_ARM_H_ > +#define _RTE_PFLOCK_ARM_H_ > + > +#ifdef __cplusplus > +extern "C" { > +#endif > + > +#include "generic/rte_pflock.h" > + > +#ifdef __cplusplus > +} > +#endif > + > +#endif /* _RTE_PFLOCK_ARM_H_ */ > diff --git a/lib/librte_eal/include/generic/rte_pflock.h > b/lib/librte_eal/include/generic/rte_pflock.h > new file mode 100644 > index 000000000000..7c183633df60 > --- /dev/null > +++ b/lib/librte_eal/include/generic/rte_pflock.h > @@ -0,0 +1,202 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(c) 2021 Microsoft Corp. > + * All rights reserved. > + * > + * Derived from Concurrency Kit > + * Copyright 2011-2015 Samy Al Bahra. > + */ > + > +#ifndef _RTE_PFLOCK_H_ > +#define _RTE_PFLOCK_H_ > + > +/** > + * @file > + * > + * Phase-fair locks > + * > + * This file defines an API for Phase Fair reader writer locks, > + * which is a variant of typical reader-writer locks that prevent > + * starvation. In this type of lock, readers and writers alternate. > + * This significantly reduces the worst-case blocking for readers and > writers. > + * > + * This is an implementation derived from FreeBSD > + * based on the work described in: > + * Brandenburg, B. and Anderson, J. 2010. Spin-Based > + * Reader-Writer Synchronization for Multiprocessor Real-Time Systems > + * > + * All locks must be initialised before use, and only initialised once. > + */ > + > +#ifdef __cplusplus > +extern "C" { > +#endif > + > +#include <rte_common.h> > +#include <rte_pause.h> > + > +/** > + * The rte_pflock_t type. > + */ > +struct rte_pflock { > + struct { > + uint16_t in; > + uint16_t out; > + } rd, wr; > +}; > +typedef struct rte_pflock rte_pflock_t; > + > +/* > + * Allocation of bits to reader > + * > + * 16 8 7 2 1 0 Typo, this numbering should be 15 4 3 2 1 0 > + * +-------------------+------+-+-+ > + * | rin: reads issued |unused| | | > + * +-------------------+------+-+-+ > + * ^ ^ > + * | | > + * PRES: writer present ---+ | > + * PHID: writer phase id ----+ > + * > + * 16 2 7 0 Here, it should be 15 4 3 0 > + * +------------------+------+ > + * |rout:read complete|unused| > + * +------------------+------+ > + * > + * The maximum number of readers is 4095 */ > + > +/* Constants used to map the bits in reader counter */ > +#define RTE_PFLOCK_WBITS 0x3 /* Writer bits in reader. */ > +#define RTE_PFLOCK_PRES 0x2 /* Writer present bit. */ > +#define RTE_PFLOCK_PHID 0x1 /* Phase ID bit. */ > +#define RTE_PFLOCK_LSB 0xFFF0 /* reader bits. */ > +#define RTE_PFLOCK_RINC 0x10 /* Reader increment. */ > + > +/** > + * A static pflock initializer. > + */ > +#define RTE_PFLOCK_INITIALIZER { } > + > +/** > + * @warning > + * @b EXPERIMENTAL: this API may change without prior notice. > + * > + * Initialize the pflock to an unlocked state. > + * > + * @param pf > + * A pointer to the pflock. > + */ > +__rte_experimental > +static inline void Minor, this API does not need to be inline. > +rte_pflock_init(struct rte_pflock *pf) > +{ > + memset(pf, 0, sizeof(*pf)); > +} > + > +/** > + * @warning > + * @b EXPERIMENTAL: this API may change without prior notice. > + * > + * Take a pflock for read. > + * > + * @param pf > + * A pointer to a pflock structure. > + */ > +__rte_experimental > +static inline void > +rte_pflock_read_lock(rte_pflock_t *pf) > +{ > + uint16_t w; > + > + /* > + * If no writer is present, then the operation has completed > + * successfully. > + */ > + w = __atomic_fetch_add(&pf->rd.in, RTE_PFLOCK_RINC, > __ATOMIC_ACQUIRE) > + & RTE_PFLOCK_WBITS; > + if (w == 0) > + return; > + > + /* Wait for current write phase to complete. */ > + while ((__atomic_load_n(&pf->rd.in, __ATOMIC_ACQUIRE) & > RTE_PFLOCK_WBITS) == w) > + rte_pause(); > +} > + > +/** > + * @warning > + * @b EXPERIMENTAL: this API may change without prior notice. > + * > + * Release a pflock locked for reading. > + * > + * @param pf > + * A pointer to the pflock structure. > + */ > +__rte_experimental > +static inline void > +rte_pflock_read_unlock(rte_pflock_t *pf) { > + __atomic_fetch_add(&pf->rd.out, RTE_PFLOCK_RINC, > __ATOMIC_RELEASE); } > + > +/** > + * @warning > + * @b EXPERIMENTAL: this API may change without prior notice. > + * > + * Take the pflock for write. > + * > + * @param pf > + * A pointer to the ticketlock. Typo ^^^^^^^^ pflock > + */ > +__rte_experimental > +static inline void > +rte_pflock_write_lock(rte_pflock_t *pf) { > + uint16_t ticket, w; > + > + /* Acquire ownership of write-phase. > + * This is same as rte_tickelock_lock(). > + */ > + ticket = __atomic_fetch_add(&pf->wr.in, 1, __ATOMIC_RELAXED); > + rte_wait_until_equal_16(&pf->wr.out, ticket, __ATOMIC_ACQUIRE); > + > + /* > + * Acquire ticket on read-side in order to allow them > + * to flush. Indicates to any incoming reader that a > + * write-phase is pending. > + * > + * The load of rd.out in wait loop could be executed > + * speculatively. > + */ > + w = RTE_PFLOCK_PRES | (ticket & RTE_PFLOCK_PHID); > + ticket = __atomic_fetch_add(&pf->rd.in, w, __ATOMIC_RELAXED); > + > + /* Wait for any pending readers to flush. */ > + rte_wait_until_equal_16(&pf->rd.out, ticket, __ATOMIC_ACQUIRE); > } > + > +/** > + * @warning > + * @b EXPERIMENTAL: this API may change without prior notice. > + * > + * Release a pflock held for writing. > + * > + * @param pf > + * A pointer to a pflock structure. > + */ > +__rte_experimental > +static inline void > +rte_pflock_write_unlock(rte_pflock_t *pf) { > + /* Migrate from write phase to read phase. */ > + __atomic_fetch_and(&pf->rd.in, RTE_PFLOCK_LSB, > __ATOMIC_RELEASE); > + > + /* Allow other writers to continue. */ > + __atomic_fetch_add(&pf->wr.out, 1, __ATOMIC_RELEASE); } > + > + > +#ifdef __cplusplus > +} > +#endif > + > +#endif /* RTE_PFLOCK_H */ > diff --git a/lib/librte_eal/ppc/include/meson.build > b/lib/librte_eal/ppc/include/meson.build > index dae40ede546e..7692a531ccba 100644 > --- a/lib/librte_eal/ppc/include/meson.build > +++ b/lib/librte_eal/ppc/include/meson.build > @@ -11,6 +11,7 @@ arch_headers = files( > 'rte_mcslock.h', > 'rte_memcpy.h', > 'rte_pause.h', > + 'rte_pflock.h', > 'rte_power_intrinsics.h', > 'rte_prefetch.h', > 'rte_rwlock.h', > diff --git a/lib/librte_eal/ppc/include/rte_pflock.h > b/lib/librte_eal/ppc/include/rte_pflock.h > new file mode 100644 > index 000000000000..27c201b11d05 > --- /dev/null > +++ b/lib/librte_eal/ppc/include/rte_pflock.h > @@ -0,0 +1,17 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(c) 2021 Microsoft Corporation */ #ifndef > +_RTE_PFLOCK_PPC_64_H_ #define _RTE_PFLOCK_PPC_64_H_ > + > +#ifdef __cplusplus > +extern "C" { > +#endif > + > +#include "generic/rte_pflock.h" > + > +#ifdef __cplusplus > +} > +#endif > + > +#endif /* _RTE_PFLOCK_PPC_64_H_ */ > diff --git a/lib/librte_eal/x86/include/meson.build > b/lib/librte_eal/x86/include/meson.build > index 1a6ad0b17342..f43645c20899 100644 > --- a/lib/librte_eal/x86/include/meson.build > +++ b/lib/librte_eal/x86/include/meson.build > @@ -10,6 +10,7 @@ arch_headers = files( > 'rte_mcslock.h', > 'rte_memcpy.h', > 'rte_pause.h', > + 'rte_pflock.h', > 'rte_power_intrinsics.h', > 'rte_prefetch.h', > 'rte_rtm.h', > diff --git a/lib/librte_eal/x86/include/rte_pflock.h > b/lib/librte_eal/x86/include/rte_pflock.h > new file mode 100644 > index 000000000000..c2d876062c08 > --- /dev/null > +++ b/lib/librte_eal/x86/include/rte_pflock.h > @@ -0,0 +1,18 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(c) 2021 Microsoft Corporation */ > + > +#ifndef _RTE_PFLOCK_X86_64_H_ > +#define _RTE_PFLOCK_X86_64_H_ > + > +#ifdef __cplusplus > +extern "C" { > +#endif > + > +#include "generic/rte_pflock.h" > + > +#ifdef __cplusplus > +} > +#endif > + > +#endif /* _RTE_PFLOCK_X86_64_H_ */ > -- > 2.30.2