mwaitx allows epyc processors to enter a implementation dependent power/performance optimized state (C1 state) for a specific period or until a store to the monitored address range.
Signed-off-by: Sivaprasad Tummala <sivaprasad.tumm...@amd.com> --- lib/eal/x86/rte_power_intrinsics.c | 83 ++++++++++++++++++++++++++++-- lib/power/rte_power_pmd_mgmt.c | 3 +- 2 files changed, 82 insertions(+), 4 deletions(-) diff --git a/lib/eal/x86/rte_power_intrinsics.c b/lib/eal/x86/rte_power_intrinsics.c index f749da9b85..d688066b3a 100644 --- a/lib/eal/x86/rte_power_intrinsics.c +++ b/lib/eal/x86/rte_power_intrinsics.c @@ -30,6 +30,7 @@ __umwait_wakeup(volatile void *addr) static bool wait_supported; static bool wait_multi_supported; +static bool amd_mwaitx_supported; static inline uint64_t __get_umwait_val(const volatile void *p, const uint8_t sz) @@ -65,6 +66,76 @@ __check_val_size(const uint8_t sz) } } +/** + * This function uses MONITORX/MWAITX instructions and will enter C1 state. + * For more information about usage of these instructions, please refer to + * AMD64 Architecture Programmer’s Manual. + */ +static inline int +amd_power_monitorx(const struct rte_power_monitor_cond *pmc, + const uint64_t tsc_timestamp) +{ + const unsigned int lcore_id = rte_lcore_id(); + struct power_wait_status *s; + uint64_t cur_value; + + RTE_SET_USED(tsc_timestamp); + + /* prevent non-EAL thread from using this API */ + if (lcore_id >= RTE_MAX_LCORE) + return -EINVAL; + + if (pmc == NULL) + return -EINVAL; + + if (__check_val_size(pmc->size) < 0) + return -EINVAL; + + if (pmc->fn == NULL) + return -EINVAL; + + s = &wait_status[lcore_id]; + + /* update sleep address */ + rte_spinlock_lock(&s->lock); + s->monitor_addr = pmc->addr; + + /* + * we're using raw byte codes for now as only the newest compiler + * versions support this instruction natively. + */ + /* set address for MONITORX */ + asm volatile(".byte 0x0f, 0x01, 0xfa;" + : + : "a"(pmc->addr), + "c"(0), /* no extensions */ + "d"(0)); /* no hints */ + + /* now that we've put this address into monitor, we can unlock */ + rte_spinlock_unlock(&s->lock); + + cur_value = __get_umwait_val(pmc->addr, pmc->size); + + /* check if callback indicates we should abort */ + if (pmc->fn(cur_value, pmc->opaque) != 0) + goto end; + + /* execute MWAITX */ + asm volatile(".byte 0x0f, 0x01, 0xfb;" + : /* ignore rflags */ + : "a"(0), /* enter C1 */ + "c"(0), /* no time-out */ + "b"(0)); + +end: + /* erase sleep address */ + rte_spinlock_lock(&s->lock); + s->monitor_addr = NULL; + rte_spinlock_unlock(&s->lock); + + return 0; +} + /** * This function uses UMONITOR/UMWAIT instructions and will enter C0.2 state. * For more information about usage of these instructions, please refer to @@ -81,8 +152,12 @@ rte_power_monitor(const struct rte_power_monitor_cond *pmc, uint64_t cur_value; /* prevent user from running this instruction if it's not supported */ - if (!wait_supported) - return -ENOTSUP; + if (!wait_supported) { + if (amd_mwaitx_supported) + return amd_power_monitorx(pmc, tsc_timestamp); + else + return -ENOTSUP; + } /* prevent non-EAL thread from using this API */ if (lcore_id >= RTE_MAX_LCORE) @@ -170,6 +245,8 @@ RTE_INIT(rte_power_intrinsics_init) { wait_supported = 1; if (i.power_monitor_multi) wait_multi_supported = 1; + if (i.amd_power_monitorx) + amd_mwaitx_supported = 1; } int @@ -178,7 +255,7 @@ rte_power_monitor_wakeup(const unsigned int lcore_id) struct power_wait_status *s; /* prevent user from running this instruction if it's not supported */ - if (!wait_supported) + if (!wait_supported && !amd_mwaitx_supported) return -ENOTSUP; /* prevent buffer overrun */ diff --git a/lib/power/rte_power_pmd_mgmt.c b/lib/power/rte_power_pmd_mgmt.c index ca1840387c..048a41dc29 100644 --- a/lib/power/rte_power_pmd_mgmt.c +++ b/lib/power/rte_power_pmd_mgmt.c @@ -447,7 +447,8 @@ check_monitor(struct pmd_core_cfg *cfg, const union queue *qdata) bool multimonitor_supported; /* check if rte_power_monitor is supported */ - if (!global_data.intrinsics_support.power_monitor) { + if ((!global_data.intrinsics_support.power_monitor) && + (!global_data.intrinsics_support.amd_power_monitorx)) { RTE_LOG(DEBUG, POWER, "Monitoring intrinsics are not supported\n"); return -ENOTSUP; } -- 2.34.1