> > Now that we have everything in a C file, we can store the information > about our sleep, and have a native mechanism to wake up the sleeping > core. This mechanism would however only wake up a core that's sleeping > while monitoring - waking up from `rte_power_pause` won't work. > > Signed-off-by: Anatoly Burakov <anatoly.bura...@intel.com> > --- > > Notes: > v13: > - Add comments around wakeup code to explain what it does > - Add lcore_id parameter checking to prevent buffer overrun > > .../arm/include/rte_power_intrinsics.h | 9 ++ > .../include/generic/rte_power_intrinsics.h | 16 ++++ > .../ppc/include/rte_power_intrinsics.h | 9 ++ > lib/librte_eal/version.map | 1 + > lib/librte_eal/x86/rte_power_intrinsics.c | 85 +++++++++++++++++++ > 5 files changed, 120 insertions(+) > > diff --git a/lib/librte_eal/arm/include/rte_power_intrinsics.h > b/lib/librte_eal/arm/include/rte_power_intrinsics.h > index 27869251a8..39e49cc45b 100644 > --- a/lib/librte_eal/arm/include/rte_power_intrinsics.h > +++ b/lib/librte_eal/arm/include/rte_power_intrinsics.h > @@ -33,6 +33,15 @@ rte_power_pause(const uint64_t tsc_timestamp) > RTE_SET_USED(tsc_timestamp); > } > > +/** > + * This function is not supported on ARM. > + */ > +void > +rte_power_monitor_wakeup(const unsigned int lcore_id) > +{ > + RTE_SET_USED(lcore_id); > +} > + > #ifdef __cplusplus > } > #endif > diff --git a/lib/librte_eal/include/generic/rte_power_intrinsics.h > b/lib/librte_eal/include/generic/rte_power_intrinsics.h > index a6f1955996..e311d6f8ea 100644 > --- a/lib/librte_eal/include/generic/rte_power_intrinsics.h > +++ b/lib/librte_eal/include/generic/rte_power_intrinsics.h > @@ -57,6 +57,22 @@ __rte_experimental > void rte_power_monitor(const struct rte_power_monitor_cond *pmc, > const uint64_t tsc_timestamp); > > +/** > + * @warning > + * @b EXPERIMENTAL: this API may change without prior notice > + * > + * Wake up a specific lcore that is in a power optimized state and is > monitoring > + * an address. > + * > + * @note This function will *not* wake up a core that is in a power optimized > + * state due to calling `rte_power_pause`. > + * > + * @param lcore_id > + * Lcore ID of a sleeping thread. > + */ > +__rte_experimental > +void rte_power_monitor_wakeup(const unsigned int lcore_id); > + > /** > * @warning > * @b EXPERIMENTAL: this API may change without prior notice > diff --git a/lib/librte_eal/ppc/include/rte_power_intrinsics.h > b/lib/librte_eal/ppc/include/rte_power_intrinsics.h > index 248d1f4a23..2e7db0e7eb 100644 > --- a/lib/librte_eal/ppc/include/rte_power_intrinsics.h > +++ b/lib/librte_eal/ppc/include/rte_power_intrinsics.h > @@ -33,6 +33,15 @@ rte_power_pause(const uint64_t tsc_timestamp) > RTE_SET_USED(tsc_timestamp); > } > > +/** > + * This function is not supported on PPC64. > + */ > +void > +rte_power_monitor_wakeup(const unsigned int lcore_id) > +{ > + RTE_SET_USED(lcore_id); > +} > + > #ifdef __cplusplus > } > #endif > diff --git a/lib/librte_eal/version.map b/lib/librte_eal/version.map > index 20945b1efa..ac026e289d 100644 > --- a/lib/librte_eal/version.map > +++ b/lib/librte_eal/version.map > @@ -406,6 +406,7 @@ EXPERIMENTAL { > > # added in 21.02 > rte_power_monitor; > + rte_power_monitor_wakeup; > rte_power_pause; > }; > > diff --git a/lib/librte_eal/x86/rte_power_intrinsics.c > b/lib/librte_eal/x86/rte_power_intrinsics.c > index a9cd1afe9d..46a4fb6cd5 100644 > --- a/lib/librte_eal/x86/rte_power_intrinsics.c > +++ b/lib/librte_eal/x86/rte_power_intrinsics.c > @@ -2,8 +2,31 @@ > * Copyright(c) 2020 Intel Corporation > */ > > +#include <rte_common.h> > +#include <rte_lcore.h> > +#include <rte_spinlock.h> > + > #include "rte_power_intrinsics.h" > > +/* > + * Per-lcore structure holding current status of C0.2 sleeps. > + */ > +static struct power_wait_status { > + rte_spinlock_t lock; > + volatile void *monitor_addr; /**< NULL if not currently sleeping */ > +} __rte_cache_aligned wait_status[RTE_MAX_LCORE]; > + > +static inline void > +__umwait_wakeup(volatile void *addr) > +{ > + uint64_t val; > + > + /* trigger a write but don't change the value */ > + val = __atomic_load_n((volatile uint64_t *)addr, __ATOMIC_RELAXED); > + __atomic_compare_exchange_n((volatile uint64_t *)addr, &val, val, 0, > + __ATOMIC_RELAXED, __ATOMIC_RELAXED); > +} > + > static uint8_t wait_supported; > > static inline uint64_t > @@ -36,6 +59,12 @@ rte_power_monitor(const struct rte_power_monitor_cond *pmc, > { > const uint32_t tsc_l = (uint32_t)tsc_timestamp; > const uint32_t tsc_h = (uint32_t)(tsc_timestamp >> 32); > + const unsigned int lcore_id = rte_lcore_id(); > + struct power_wait_status *s; > + > + /* prevent non-EAL thread from using this API */ > + if (lcore_id >= RTE_MAX_LCORE) > + return; > > /* prevent user from running this instruction if it's not supported */ > if (!wait_supported) > @@ -60,11 +89,24 @@ rte_power_monitor(const struct rte_power_monitor_cond > *pmc, > if (masked == pmc->val) > return; > } > + > + s = &wait_status[lcore_id]; > + > + /* update sleep address */ > + rte_spinlock_lock(&s->lock); > + s->monitor_addr = pmc->addr; > + rte_spinlock_unlock(&s->lock);
It was a while, since I looked at it last time, but shouldn't we grab the lock before monitor()? I.E: lock(); monitor(); addr=...; unlock(); umwait(); > + > /* execute UMWAIT */ > asm volatile(".byte 0xf2, 0x0f, 0xae, 0xf7;" > : /* ignore rflags */ > : "D"(0), /* enter C0.2 */ > "a"(tsc_l), "d"(tsc_h)); > + > + /* erase sleep address */ > + rte_spinlock_lock(&s->lock); > + s->monitor_addr = NULL; > + rte_spinlock_unlock(&s->lock); > } > > /** > @@ -97,3 +139,46 @@ RTE_INIT(rte_power_intrinsics_init) { > if (i.power_monitor && i.power_pause) > wait_supported = 1; > } > + > +void > +rte_power_monitor_wakeup(const unsigned int lcore_id) > +{ > + struct power_wait_status *s; > + > + /* prevent buffer overrun */ > + if (lcore_id >= RTE_MAX_LCORE) > + return; > + > + /* prevent user from running this instruction if it's not supported */ > + if (!wait_supported) > + return; > + > + s = &wait_status[lcore_id]; > + > + /* > + * There is a race condition between sleep, wakeup and locking, but we > + * don't need to handle it. > + * > + * Possible situations: > + * > + * 1. T1 locks, sets address, unlocks > + * 2. T2 locks, triggers wakeup, unlocks > + * 3. T1 sleeps > + * > + * In this case, because T1 has already set the address for monitoring, > + * we will wake up immediately even if T2 triggers wakeup before T1 > + * goes to sleep. > + * > + * 1. T1 locks, sets address, unlocks, goes to sleep, and wakes up > + * 2. T2 locks, triggers wakeup, and unlocks > + * 3. T1 locks, erases address, and unlocks > + * > + * In this case, since we've already woken up, the "wakeup" was > + * unneeded, and since T1 is still waiting on T2 releasing the lock, the > + * wakeup address is still valid so it's perfectly safe to write it. > + */ > + rte_spinlock_lock(&s->lock); > + if (s->monitor_addr != NULL) > + __umwait_wakeup(s->monitor_addr); > + rte_spinlock_unlock(&s->lock); > +} > -- > 2.25.1