> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Remy Horton > Sent: Wednesday, September 30, 2015 10:05 AM > To: dev at dpdk.org > Subject: [dpdk-dev] [PATCH v2 1/3] rte: add keep alive functionality > > Adds functions for detecting and reporting the live-ness of LCores, the > primary requirement of which is minimal overheads for the > core(s) being checked. Core failures are notified via an application defined > callback. > > Signed-off-by: Remy Horton <remy.horton at intel.com> > --- > lib/librte_eal/bsdapp/eal/Makefile | 1 + > lib/librte_eal/common/Makefile | 2 +- > lib/librte_eal/common/include/rte_keepalive.h | 140 > ++++++++++++++++++++++++++ > lib/librte_eal/common/rte_keepalive.c | 122 > ++++++++++++++++++++++ > lib/librte_eal/linuxapp/eal/Makefile | 1 + > 5 files changed, 265 insertions(+), 1 deletion(-) create mode 100644 > lib/librte_eal/common/include/rte_keepalive.h > create mode 100644 lib/librte_eal/common/rte_keepalive.c > > diff --git a/lib/librte_eal/bsdapp/eal/Makefile > b/lib/librte_eal/bsdapp/eal/Makefile > index a49dcec..65b293f 100644 > --- a/lib/librte_eal/bsdapp/eal/Makefile > +++ b/lib/librte_eal/bsdapp/eal/Makefile > @@ -80,6 +80,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += > eal_common_thread.c > SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += rte_malloc.c > SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += malloc_elem.c > SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += malloc_heap.c > +SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += rte_keepalive.c > > CFLAGS_eal.o := -D_GNU_SOURCE > #CFLAGS_eal_thread.o := -D_GNU_SOURCE > diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile > index 0c43d6a..7f1757a 100644 > --- a/lib/librte_eal/common/Makefile > +++ b/lib/librte_eal/common/Makefile > @@ -40,7 +40,7 @@ INC += rte_string_fns.h rte_version.h INC += > rte_eal_memconfig.h rte_malloc_heap.h INC += rte_hexdump.h > rte_devargs.h rte_dev.h INC += rte_pci_dev_feature_defs.h > rte_pci_dev_features.h -INC += rte_malloc.h > +INC += rte_malloc.h rte_keepalive.h > > ifeq ($(CONFIG_RTE_INSECURE_FUNCTION_WARNING),y) > INC += rte_warnings.h > diff --git a/lib/librte_eal/common/include/rte_keepalive.h > b/lib/librte_eal/common/include/rte_keepalive.h > new file mode 100644 > index 0000000..d67bf4b > --- /dev/null > +++ b/lib/librte_eal/common/include/rte_keepalive.h > @@ -0,0 +1,140 @@ > +/*- > + * BSD LICENSE > + * > + * Copyright 2015 Intel Shannon Ltd. All rights reserved. > + * > + * Redistribution and use in source and binary forms, with or without > + * modification, are permitted provided that the following conditions > + * are met: > + * > + * * Redistributions of source code must retain the above copyright > + * notice, this list of conditions and the following disclaimer. > + * * Redistributions in binary form must reproduce the above copyright > + * notice, this list of conditions and the following disclaimer in > + * the documentation and/or other materials provided with the > + * distribution. > + * * Neither the name of Intel Corporation nor the names of its > + * contributors may be used to endorse or promote products derived > + * from this software without specific prior written permission. > + * > + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND > CONTRIBUTORS > + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND > FITNESS FOR > + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE > COPYRIGHT > + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, > INCIDENTAL, > + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT > NOT > + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS > OF USE, > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND > ON ANY > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF > THE USE > + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH > DAMAGE. > + */ > + > +/** > + * @file keepalive.h > + * DPDK RTE LCore Keepalive Monitor. > + * > + **/ > + > +#ifndef _KEEPALIVE_H_ > +#define _KEEPALIVE_H_ > + > +#include <rte_memory.h> > + > +#ifndef RTE_KEEPALIVE_MAXCORES > +/** > + * Number of cores to track. > + * @note Must be larger than the highest core id. */ #define > +RTE_KEEPALIVE_MAXCORES RTE_MAX_LCORE #endif > + > + > +/** > + * Keepalive failure callback. > + * > + * Receives a data pointer passed to rte_keepalive_create() and the id > +of the > + * failed core. > + */ > +typedef void (*rte_keepalive_failure_callback_t)( > + void *data, > + const int id_core); > + > + > +/** > + * Keepalive state structure. > + * @internal > + */ > +struct rte_keepalive { > + /** Core Liveness. */ > + uint32_t __rte_cache_aligned > state_flags[RTE_KEEPALIVE_MAXCORES]; > + > + /** Last-seen-alive timestamps */ > + uint64_t last_alive[RTE_KEEPALIVE_MAXCORES]; > + > + /** > + * Cores to check. > + * Indexed by core id, non-zero if the core should be checked. > + */ > + uint8_t active_cores[RTE_KEEPALIVE_MAXCORES]; > + > + /** Dead core handler. */ > + rte_keepalive_failure_callback_t callback; > + > + /** > + * Dead core handler app data. > + * Pointer is passed to dead core handler. > + */ > + void *callback_data; > + uint64_t tsc_initial; > + uint64_t tsc_mhz; > +}; > + > + > +/** > + * Initialise keepalive sub-system. > + * @param callback > + * Function called upon detection of a dead core. > + * @param data > + * Data pointer to be passed to function callback. > + * @return > + * Keepalive structure success, NULL on failure. > + */ > +struct rte_keepalive *rte_keepalive_create( > + rte_keepalive_failure_callback_t callback, > + void *data); > + > + > +/** > + * @param *ptr_timer Triggering timer (unused) > + * @param *ptr_data Data pointer (keepalive structure) */ void > +rte_keepalive_dispatch_pings(__attribute__((unused)) void *ptr_timer, > + void *ptr_data); > + > + > +/** > + * Registers a core for keepalive checks. > + * @param *keepcfg > + * Keepalive structure pointer > + * @param id_core > + * ID number of core to register. > + */ > +void rte_keepalive_register_core(struct rte_keepalive *keepcfg, const > +int id_core); > + > + > +/** > + * Per-core keepalive check. > + * @param *keepcfg > + * Keepalive structure pointer > + * > + * This function needs to be called from within the main process loop > +of > + * the LCore to be checked. > + */ > +static inline void > +rte_keepalive_mark_alive(struct rte_keepalive *keepcfg) { > + keepcfg->state_flags[rte_lcore_id()] = 1; } > + > + > +#endif /* _KEEPALIVE_H_ */ > diff --git a/lib/librte_eal/common/rte_keepalive.c > b/lib/librte_eal/common/rte_keepalive.c > new file mode 100644 > index 0000000..cbdd801 > --- /dev/null > +++ b/lib/librte_eal/common/rte_keepalive.c > @@ -0,0 +1,122 @@ > +/*- > + * BSD LICENSE > + * > + * Copyright 2015 Intel Shannon Ltd. All rights reserved. > + * > + * Redistribution and use in source and binary forms, with or without > + * modification, are permitted provided that the following conditions > + * are met: > + * > + * * Redistributions of source code must retain the above copyright > + * notice, this list of conditions and the following disclaimer. > + * * Redistributions in binary form must reproduce the above copyright > + * notice, this list of conditions and the following disclaimer in > + * the documentation and/or other materials provided with the > + * distribution. > + * * Neither the name of Intel Corporation nor the names of its > + * contributors may be used to endorse or promote products derived > + * from this software without specific prior written permission. > + * > + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND > CONTRIBUTORS > + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND > FITNESS FOR > + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE > COPYRIGHT > + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, > INCIDENTAL, > + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT > NOT > + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS > OF USE, > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND > ON ANY > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF > THE USE > + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH > DAMAGE. > + */ > + > +#include <inttypes.h> > + > +#include <rte_cycles.h> > +#include <rte_lcore.h> > +#include <rte_keepalive.h> > + > +#ifdef KEEPALIVE_DEBUG_MSGS > +static void > +print_trace(const char *msg, struct rte_keepalive *keepcfg, int > +idx_core) { > + printf("%sLast seen %" PRId64 "ms ago.\n", > + msg, > + ((rte_rdtsc() - keepcfg->last_alive[idx_core])*1000) > + / rte_get_tsc_hz() > + );
Hi Remy Looks great overall, should this be an RTE_LOG message rather than a printf? BR Maryam > +} > +#else > +static void > +print_trace(__attribute__((unused)) const char *msg, > + __attribute__((unused)) struct rte_keepalive *keepcfg, > + __attribute__((unused)) int idx_core) > +{ > +} > +#endif > + > + > + > +void > +rte_keepalive_dispatch_pings(__attribute__((unused)) void *ptr_timer, > + void *ptr_data) > +{ > + struct rte_keepalive *keepcfg = (struct rte_keepalive *)ptr_data; > + int idx_core; > + > + for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES; > idx_core++) { > + if (keepcfg->active_cores[idx_core] == 0) > + continue; > + switch (keepcfg->state_flags[idx_core]) { > + case 1: /* Alive */ > + keepcfg->state_flags[idx_core] = 0; > + keepcfg->last_alive[idx_core] = rte_rdtsc(); > + break; > + case 0: /* MIA */ > + print_trace("Core MIA. ", keepcfg, idx_core); > + keepcfg->state_flags[idx_core] = 2; > + break; > + case 2: /* Dead */ > + keepcfg->state_flags[idx_core] = 3; > + print_trace("Core died. ", keepcfg, idx_core); > + if (keepcfg->callback) > + keepcfg->callback( > + keepcfg->callback_data, > + idx_core > + ); > + break; > + case 3: /* Buried */ > + break; > + } > + } > +} > + > + > +struct rte_keepalive * > +rte_keepalive_create(rte_keepalive_failure_callback_t callback, > + void *data) > +{ > + int idx_core; > + struct rte_keepalive *keepcfg; > + > + keepcfg = malloc(sizeof(struct rte_keepalive)); > + if (keepcfg != NULL) { > + for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES; > idx_core++) { > + keepcfg->state_flags[idx_core] = 0; > + keepcfg->active_cores[idx_core] = 0; > + } > + keepcfg->callback = callback; > + keepcfg->callback_data = data; > + keepcfg->tsc_initial = rte_rdtsc(); > + keepcfg->tsc_mhz = rte_get_tsc_hz() / 1000; > + } > + return keepcfg; > +} > + > + > +void > +rte_keepalive_register_core(struct rte_keepalive *keepcfg, const int > +id_core) { > + if (id_core < RTE_KEEPALIVE_MAXCORES) > + keepcfg->active_cores[id_core] = 1; > +} > diff --git a/lib/librte_eal/linuxapp/eal/Makefile > b/lib/librte_eal/linuxapp/eal/Makefile > index d62196e..05a44d7 100644 > --- a/lib/librte_eal/linuxapp/eal/Makefile > +++ b/lib/librte_eal/linuxapp/eal/Makefile > @@ -90,6 +90,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += > eal_common_thread.c > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += rte_malloc.c > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += malloc_elem.c > SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += malloc_heap.c > +SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += rte_keepalive.c > > CFLAGS_eal.o := -D_GNU_SOURCE > CFLAGS_eal_interrupts.o := -D_GNU_SOURCE > -- > 1.9.3