> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Remy Horton
> Sent: Wednesday, September 30, 2015 10:05 AM
> To: dev at dpdk.org
> Subject: [dpdk-dev] [PATCH v2 1/3] rte: add keep alive functionality
> 
> Adds functions for detecting and reporting the live-ness of LCores, the
> primary requirement of which is minimal overheads for the
> core(s) being checked. Core failures are notified via an application defined
> callback.
> 
> Signed-off-by: Remy Horton <remy.horton at intel.com>
> ---
>  lib/librte_eal/bsdapp/eal/Makefile            |   1 +
>  lib/librte_eal/common/Makefile                |   2 +-
>  lib/librte_eal/common/include/rte_keepalive.h | 140
> ++++++++++++++++++++++++++
>  lib/librte_eal/common/rte_keepalive.c         | 122
> ++++++++++++++++++++++
>  lib/librte_eal/linuxapp/eal/Makefile          |   1 +
>  5 files changed, 265 insertions(+), 1 deletion(-)  create mode 100644
> lib/librte_eal/common/include/rte_keepalive.h
>  create mode 100644 lib/librte_eal/common/rte_keepalive.c
> 
> diff --git a/lib/librte_eal/bsdapp/eal/Makefile
> b/lib/librte_eal/bsdapp/eal/Makefile
> index a49dcec..65b293f 100644
> --- a/lib/librte_eal/bsdapp/eal/Makefile
> +++ b/lib/librte_eal/bsdapp/eal/Makefile
> @@ -80,6 +80,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) +=
> eal_common_thread.c
>  SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += rte_malloc.c
>  SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += malloc_elem.c
>  SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += malloc_heap.c
> +SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += rte_keepalive.c
> 
>  CFLAGS_eal.o := -D_GNU_SOURCE
>  #CFLAGS_eal_thread.o := -D_GNU_SOURCE
> diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
> index 0c43d6a..7f1757a 100644
> --- a/lib/librte_eal/common/Makefile
> +++ b/lib/librte_eal/common/Makefile
> @@ -40,7 +40,7 @@ INC += rte_string_fns.h rte_version.h  INC +=
> rte_eal_memconfig.h rte_malloc_heap.h  INC += rte_hexdump.h
> rte_devargs.h rte_dev.h  INC += rte_pci_dev_feature_defs.h
> rte_pci_dev_features.h -INC += rte_malloc.h
> +INC += rte_malloc.h rte_keepalive.h
> 
>  ifeq ($(CONFIG_RTE_INSECURE_FUNCTION_WARNING),y)
>  INC += rte_warnings.h
> diff --git a/lib/librte_eal/common/include/rte_keepalive.h
> b/lib/librte_eal/common/include/rte_keepalive.h
> new file mode 100644
> index 0000000..d67bf4b
> --- /dev/null
> +++ b/lib/librte_eal/common/include/rte_keepalive.h
> @@ -0,0 +1,140 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright 2015 Intel Shannon Ltd. All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
> NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
> OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
> ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
> + */
> +
> +/**
> + * @file keepalive.h
> + * DPDK RTE LCore Keepalive Monitor.
> + *
> + **/
> +
> +#ifndef _KEEPALIVE_H_
> +#define _KEEPALIVE_H_
> +
> +#include <rte_memory.h>
> +
> +#ifndef RTE_KEEPALIVE_MAXCORES
> +/**
> + * Number of cores to track.
> + * @note Must be larger than the highest core id. */ #define
> +RTE_KEEPALIVE_MAXCORES RTE_MAX_LCORE #endif
> +
> +
> +/**
> + * Keepalive failure callback.
> + *
> + *  Receives a data pointer passed to rte_keepalive_create() and the id
> +of the
> + *  failed core.
> + */
> +typedef void (*rte_keepalive_failure_callback_t)(
> +     void *data,
> +     const int id_core);
> +
> +
> +/**
> + * Keepalive state structure.
> + * @internal
> + */
> +struct rte_keepalive {
> +     /** Core Liveness. */
> +     uint32_t __rte_cache_aligned
> state_flags[RTE_KEEPALIVE_MAXCORES];
> +
> +     /** Last-seen-alive timestamps */
> +     uint64_t last_alive[RTE_KEEPALIVE_MAXCORES];
> +
> +     /**
> +      * Cores to check.
> +      * Indexed by core id, non-zero if the core should be checked.
> +      */
> +     uint8_t active_cores[RTE_KEEPALIVE_MAXCORES];
> +
> +     /** Dead core handler. */
> +     rte_keepalive_failure_callback_t callback;
> +
> +     /**
> +      * Dead core handler app data.
> +      * Pointer is passed to dead core handler.
> +      */
> +     void *callback_data;
> +     uint64_t tsc_initial;
> +     uint64_t tsc_mhz;
> +};
> +
> +
> +/**
> + * Initialise keepalive sub-system.
> + * @param callback
> + *   Function called upon detection of a dead core.
> + * @param data
> + *   Data pointer to be passed to function callback.
> + * @return
> + *   Keepalive structure success, NULL on failure.
> + */
> +struct rte_keepalive *rte_keepalive_create(
> +     rte_keepalive_failure_callback_t callback,
> +     void *data);
> +
> +
> +/**
> + * @param *ptr_timer Triggering timer (unused)
> + * @param *ptr_data  Data pointer (keepalive structure)  */ void
> +rte_keepalive_dispatch_pings(__attribute__((unused)) void *ptr_timer,
> +     void *ptr_data);
> +
> +
> +/**
> + * Registers a core for keepalive checks.
> + * @param *keepcfg
> + *   Keepalive structure pointer
> + * @param id_core
> + *   ID number of core to register.
> + */
> +void rte_keepalive_register_core(struct rte_keepalive *keepcfg, const
> +int id_core);
> +
> +
> +/**
> + * Per-core keepalive check.
> + * @param *keepcfg
> + *   Keepalive structure pointer
> + *
> + * This function needs to be called from within the main process loop
> +of
> + * the LCore to be checked.
> + */
> +static inline void
> +rte_keepalive_mark_alive(struct rte_keepalive *keepcfg) {
> +     keepcfg->state_flags[rte_lcore_id()] = 1; }
> +
> +
> +#endif /* _KEEPALIVE_H_ */
> diff --git a/lib/librte_eal/common/rte_keepalive.c
> b/lib/librte_eal/common/rte_keepalive.c
> new file mode 100644
> index 0000000..cbdd801
> --- /dev/null
> +++ b/lib/librte_eal/common/rte_keepalive.c
> @@ -0,0 +1,122 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright 2015 Intel Shannon Ltd. All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
> NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
> OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
> ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
> + */
> +
> +#include <inttypes.h>
> +
> +#include <rte_cycles.h>
> +#include <rte_lcore.h>
> +#include <rte_keepalive.h>
> +
> +#ifdef KEEPALIVE_DEBUG_MSGS
> +static void
> +print_trace(const char *msg, struct rte_keepalive *keepcfg, int
> +idx_core) {
> +     printf("%sLast seen %" PRId64  "ms ago.\n",
> +             msg,
> +             ((rte_rdtsc() - keepcfg->last_alive[idx_core])*1000)
> +             / rte_get_tsc_hz()
> +           );

Hi Remy
Looks great overall, should this be an RTE_LOG message rather than a printf?

BR 
Maryam
> +}
> +#else
> +static void
> +print_trace(__attribute__((unused)) const char *msg,
> +     __attribute__((unused)) struct rte_keepalive *keepcfg,
> +     __attribute__((unused)) int idx_core)
> +{
> +}
> +#endif
> +
> +
> +
> +void
> +rte_keepalive_dispatch_pings(__attribute__((unused)) void *ptr_timer,
> +     void *ptr_data)
> +{
> +     struct rte_keepalive *keepcfg = (struct rte_keepalive *)ptr_data;
> +     int idx_core;
> +
> +     for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES;
> idx_core++) {
> +             if (keepcfg->active_cores[idx_core] == 0)
> +                     continue;
> +             switch (keepcfg->state_flags[idx_core]) {
> +             case 1: /* Alive */
> +                     keepcfg->state_flags[idx_core] = 0;
> +                     keepcfg->last_alive[idx_core] = rte_rdtsc();
> +                     break;
> +             case 0: /* MIA */
> +                     print_trace("Core MIA. ", keepcfg, idx_core);
> +                     keepcfg->state_flags[idx_core] = 2;
> +                     break;
> +             case 2: /* Dead */
> +                     keepcfg->state_flags[idx_core] = 3;
> +                     print_trace("Core died. ", keepcfg, idx_core);
> +                     if (keepcfg->callback)
> +                             keepcfg->callback(
> +                                     keepcfg->callback_data,
> +                                     idx_core
> +                                     );
> +                     break;
> +             case 3: /* Buried */
> +                     break;
> +             }
> +     }
> +}
> +
> +
> +struct rte_keepalive *
> +rte_keepalive_create(rte_keepalive_failure_callback_t callback,
> +     void *data)
> +{
> +     int idx_core;
> +     struct rte_keepalive *keepcfg;
> +
> +     keepcfg = malloc(sizeof(struct rte_keepalive));
> +     if (keepcfg != NULL) {
> +             for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES;
> idx_core++) {
> +                     keepcfg->state_flags[idx_core] = 0;
> +                     keepcfg->active_cores[idx_core] = 0;
> +             }
> +             keepcfg->callback = callback;
> +             keepcfg->callback_data = data;
> +             keepcfg->tsc_initial = rte_rdtsc();
> +             keepcfg->tsc_mhz = rte_get_tsc_hz() / 1000;
> +     }
> +     return keepcfg;
> +}
> +
> +
> +void
> +rte_keepalive_register_core(struct rte_keepalive *keepcfg, const int
> +id_core) {
> +     if (id_core < RTE_KEEPALIVE_MAXCORES)
> +             keepcfg->active_cores[id_core] = 1;
> +}
> diff --git a/lib/librte_eal/linuxapp/eal/Makefile
> b/lib/librte_eal/linuxapp/eal/Makefile
> index d62196e..05a44d7 100644
> --- a/lib/librte_eal/linuxapp/eal/Makefile
> +++ b/lib/librte_eal/linuxapp/eal/Makefile
> @@ -90,6 +90,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) +=
> eal_common_thread.c
>  SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += rte_malloc.c
>  SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += malloc_elem.c
>  SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += malloc_heap.c
> +SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += rte_keepalive.c
> 
>  CFLAGS_eal.o := -D_GNU_SOURCE
>  CFLAGS_eal_interrupts.o := -D_GNU_SOURCE
> --
> 1.9.3

Reply via email to