Since DPDK 23.03, it is possible to register a callback to report lcore TSC cycles usage. Reuse the busy/idle cycles gathering in dpif-netdev and expose them to the DPDK telemetry socket.
Upon dpdk_attach_thread, record the mapping between the DPDK lcore_id and the dpif-netdev core_id. Reuse that mapping in the lcore usage callback to invoke dpif_netdev_get_pmd_cycles. Here is an example output: ~# ovs-appctl dpif-netdev/pmd-stats-show | grep -e ^pmd -e cycles: pmd thread numa_id 0 core_id 8: idle cycles: 2720796781680 (100.00%) processing cycles: 3566020 (0.00%) pmd thread numa_id 0 core_id 9: idle cycles: 2718974371440 (100.00%) processing cycles: 3136840 (0.00%) pmd thread numa_id 0 core_id 72: pmd thread numa_id 0 core_id 73: ~# echo /eal/lcore/usage | dpdk-telemetry.py | jq { "/eal/lcore/usage": { "lcore_ids": [ 3, 5, 11, 15 ], "total_cycles": [ 2725722342740, 2725722347480, 2723899464040, 2725722354980 ], "busy_cycles": [ 3566020, 3566020, 3136840, 3566020 ] } } Link: https://git.dpdk.org/dpdk/commit/?id=9ab1804922ba583b0b16 Cc: David Marchand <david.march...@redhat.com> Cc: Kevin Traynor <ktray...@redhat.com> Signed-off-by: Robin Jarry <rja...@redhat.com> --- lib/dpdk-stub.c | 5 +++ lib/dpdk.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++- lib/dpdk.h | 5 +++ lib/dpif-netdev.c | 38 +++++++++++++++++++ 4 files changed, 142 insertions(+), 1 deletion(-) diff --git a/lib/dpdk-stub.c b/lib/dpdk-stub.c index 58ebf6cb62cd..02fb561bea7b 100644 --- a/lib/dpdk-stub.c +++ b/lib/dpdk-stub.c @@ -49,6 +49,11 @@ dpdk_detach_thread(void) { } +void +dpdk_register_core_usage_callback(dpdk_core_usage_cb *cb OVS_UNUSED) +{ +} + bool dpdk_available(void) { diff --git a/lib/dpdk.c b/lib/dpdk.c index d76d53f8f16c..31871300f719 100644 --- a/lib/dpdk.c +++ b/lib/dpdk.c @@ -23,6 +23,7 @@ #include <rte_cpuflags.h> #include <rte_errno.h> +#include <rte_lcore.h> #include <rte_log.h> #include <rte_malloc.h> #include <rte_memzone.h> @@ -310,6 +311,10 @@ malloc_dump_stats_wrapper(FILE *stream) rte_malloc_dump_stats(stream, NULL); } +#ifdef ALLOW_EXPERIMENTAL_API +static int dpdk_get_lcore_cycles(unsigned int, struct rte_lcore_usage *); +#endif + static bool dpdk_init__(const struct smap *ovs_other_config) { @@ -440,6 +445,10 @@ dpdk_init__(const struct smap *ovs_other_config) /* We are called from the main thread here */ RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID; +#ifdef ALLOW_EXPERIMENTAL_API + rte_lcore_register_usage_cb(dpdk_get_lcore_cycles); +#endif + /* Finally, register the dpdk classes */ netdev_dpdk_register(ovs_other_config); netdev_register_flow_api_provider(&netdev_offload_dpdk); @@ -490,9 +499,52 @@ dpdk_available(void) return initialized; } +struct lcore_id_map { + unsigned int lcore_id; + unsigned int pmd_core_id; +}; + +/* Protects against changes to 'lcore_id_maps'. */ +struct ovs_mutex lcore_id_maps_mutex = OVS_MUTEX_INITIALIZER; + +/* Contains all 'struct lcore_id_map's. */ +static struct shash lcore_id_maps OVS_GUARDED_BY(lcore_id_maps_mutex) + = SHASH_INITIALIZER(&lcore_id_maps); + +static void +lcore_id_to_str(char *buf, size_t len, unsigned int lcore_id) +{ + int n; + + n = snprintf(buf, len, "%u", lcore_id); + if (n < 0) { + VLOG_WARN("Failed to format lcore_id: %s", ovs_strerror(errno)); + n = 0; + } + buf[n] = '\0'; +} + +static void +lcore_id_map_update(unsigned int lcore_id, unsigned int cpu, bool add) +{ + char buf[128]; + + lcore_id_to_str(buf, sizeof buf, lcore_id); + + ovs_mutex_lock(&lcore_id_maps_mutex); + if (add) { + shash_replace(&lcore_id_maps, buf, (void *) (uintptr_t) cpu); + } else { + shash_find_and_delete(&lcore_id_maps, buf); + } + ovs_mutex_unlock(&lcore_id_maps_mutex); +} + bool dpdk_attach_thread(unsigned cpu) { + unsigned int lcore_id; + /* NON_PMD_CORE_ID is reserved for use by non pmd threads. */ ovs_assert(cpu != NON_PMD_CORE_ID); @@ -506,7 +558,9 @@ dpdk_attach_thread(unsigned cpu) return false; } - VLOG_INFO("PMD thread uses DPDK lcore %u.", rte_lcore_id()); + lcore_id = rte_lcore_id(); + lcore_id_map_update(lcore_id, cpu, true); + VLOG_INFO("PMD thread uses DPDK lcore %u.", lcore_id); return true; } @@ -516,10 +570,49 @@ dpdk_detach_thread(void) unsigned int lcore_id; lcore_id = rte_lcore_id(); + lcore_id_map_update(lcore_id, 0, false); + rte_thread_unregister(); VLOG_INFO("PMD thread released DPDK lcore %u.", lcore_id); } +static dpdk_core_usage_cb_t *core_usage_cb; + +void +dpdk_register_core_usage_callback(dpdk_core_usage_cb_t *cb) +{ + core_usage_cb = cb; +} + +#ifdef ALLOW_EXPERIMENTAL_API +static int +dpdk_get_lcore_cycles(unsigned int lcore_id, struct rte_lcore_usage *usage) +{ + struct shash_node *node; + unsigned int core_id; + char buf[128]; + + if (!core_usage_cb) { + return -1; + } + + lcore_id_to_str(buf, sizeof buf, lcore_id); + + ovs_mutex_lock(&lcore_id_maps_mutex); + node = shash_find(&lcore_id_maps, buf); + ovs_mutex_unlock(&lcore_id_maps_mutex); + + if (!node) { + return -1; + } + + core_id = (unsigned int) (uintptr_t) node->data; + core_usage_cb(core_id, &usage->busy_cycles, &usage->total_cycles); + + return 0; +} +#endif + void print_dpdk_version(void) { diff --git a/lib/dpdk.h b/lib/dpdk.h index 1b790e682e4d..95a51698c74d 100644 --- a/lib/dpdk.h +++ b/lib/dpdk.h @@ -18,6 +18,7 @@ #define DPDK_H #include <stdbool.h> +#include <stdint.h> #ifdef DPDK_NETDEV @@ -42,4 +43,8 @@ bool dpdk_available(void); void print_dpdk_version(void); void dpdk_status(const struct ovsrec_open_vswitch *); +typedef void (dpdk_core_usage_cb_t)(unsigned cpu, uint64_t *busy_cycles, + uint64_t *total_cycles); +void dpdk_register_core_usage_callback(dpdk_core_usage_cb_t *cb); + #endif /* dpdk.h */ diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 70b953ae6dd3..ebf43a0f62e4 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -1427,6 +1427,41 @@ dpif_netdev_pmd_rebalance(struct unixctl_conn *conn, int argc, ds_destroy(&reply); } +static void +dpif_netdev_get_pmd_cycles(unsigned int core_id, + uint64_t *busy_cycles, uint64_t *total_cycles) +{ + struct dp_netdev_pmd_thread **pmd_list = NULL; + uint64_t stats[PMD_N_STATS]; + struct dp_netdev *dp; + size_t num_pmds; + + ovs_mutex_lock(&dp_netdev_mutex); + + if (shash_count(&dp_netdevs) != 1) { + goto out; + } + + dp = shash_first(&dp_netdevs)->data; + sorted_poll_thread_list(dp, &pmd_list, &num_pmds); + + for (size_t i = 0; i < num_pmds; i++) { + struct dp_netdev_pmd_thread *pmd = pmd_list[i]; + + if (pmd->core_id == core_id) { + continue; + } + pmd_perf_read_counters(&pmd->perf_stats, stats); + *busy_cycles = stats[PMD_CYCLES_ITER_BUSY]; + *total_cycles = *busy_cycles + stats[PMD_CYCLES_ITER_IDLE]; + break; + } + +out: + free(pmd_list); + ovs_mutex_unlock(&dp_netdev_mutex); +} + static void dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[], void *aux) @@ -1661,6 +1696,9 @@ dpif_netdev_init(void) unixctl_command_register("dpif-netdev/miniflow-parser-get", "", 0, 0, dpif_miniflow_extract_impl_get, NULL); + + dpdk_register_core_usage_callback(dpif_netdev_get_pmd_cycles); + return 0; } -- 2.41.0 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev