Adds functions for detecting and reporting the live-ness of LCores,
the primary requirement of which is minimal overheads for the
core(s) being checked. Core failures are notified via an application
defined callback.

Signed-off-by: Remy Horton <remy.horton at intel.com>
---
 lib/librte_eal/bsdapp/eal/Makefile            |   1 +
 lib/librte_eal/common/Makefile                |   2 +-
 lib/librte_eal/common/include/rte_keepalive.h | 146 ++++++++++++++++++++++++++
 lib/librte_eal/common/rte_keepalive.c         | 124 ++++++++++++++++++++++
 lib/librte_eal/linuxapp/eal/Makefile          |   1 +
 5 files changed, 273 insertions(+), 1 deletion(-)
 create mode 100644 lib/librte_eal/common/include/rte_keepalive.h
 create mode 100644 lib/librte_eal/common/rte_keepalive.c

diff --git a/lib/librte_eal/bsdapp/eal/Makefile 
b/lib/librte_eal/bsdapp/eal/Makefile
index a49dcec..65b293f 100644
--- a/lib/librte_eal/bsdapp/eal/Makefile
+++ b/lib/librte_eal/bsdapp/eal/Makefile
@@ -80,6 +80,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += eal_common_thread.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += rte_malloc.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += malloc_elem.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += malloc_heap.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += rte_keepalive.c

 CFLAGS_eal.o := -D_GNU_SOURCE
 #CFLAGS_eal_thread.o := -D_GNU_SOURCE
diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index 0c43d6a..7f1757a 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -40,7 +40,7 @@ INC += rte_string_fns.h rte_version.h
 INC += rte_eal_memconfig.h rte_malloc_heap.h
 INC += rte_hexdump.h rte_devargs.h rte_dev.h
 INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h
-INC += rte_malloc.h
+INC += rte_malloc.h rte_keepalive.h

 ifeq ($(CONFIG_RTE_INSECURE_FUNCTION_WARNING),y)
 INC += rte_warnings.h
diff --git a/lib/librte_eal/common/include/rte_keepalive.h 
b/lib/librte_eal/common/include/rte_keepalive.h
new file mode 100644
index 0000000..eab7255
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_keepalive.h
@@ -0,0 +1,146 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2015 Intel Shannon Ltd. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file keepalive.h
+ * DPDK RTE LCore Keepalive Monitor.
+ *
+ **/
+
+#ifndef _KEEPALIVE_H_
+#define _KEEPALIVE_H_
+
+#include <rte_memory.h>
+
+#ifndef RTE_KEEPALIVE_MAXCORES
+/**
+ * Number of cores to track.
+ * @note Must be larger than the highest core id. */
+#define RTE_KEEPALIVE_MAXCORES RTE_MAX_LCORE
+#endif
+
+
+/**
+ * Keepalive failure callback.
+ *
+ *  Receives a data pointer passed to rte_keepalive_create() and the id of the
+ *  failed core.
+ */
+typedef void (*rte_keepalive_failure_callback_t)(
+       void *data,
+       const int id_core);
+
+
+/**
+ * Keepalive state structure.
+ * @internal
+ */
+struct rte_keepalive {
+       /** Core Liveness. */
+
+       enum {
+               ALIVE = 1,
+               MISSING = 0,
+               DEAD = 2,
+               GONE = 3
+       } __rte_cache_aligned state_flags[RTE_KEEPALIVE_MAXCORES];
+
+       /** Last-seen-alive timestamps */
+       uint64_t last_alive[RTE_KEEPALIVE_MAXCORES];
+
+       /**
+        * Cores to check.
+        * Indexed by core id, non-zero if the core should be checked.
+        */
+       uint8_t active_cores[RTE_KEEPALIVE_MAXCORES];
+
+       /** Dead core handler. */
+       rte_keepalive_failure_callback_t callback;
+
+       /**
+        * Dead core handler app data.
+        * Pointer is passed to dead core handler.
+        */
+       void *callback_data;
+       uint64_t tsc_initial;
+       uint64_t tsc_mhz;
+};
+
+
+/**
+ * Initialise keepalive sub-system.
+ * @param callback
+ *   Function called upon detection of a dead core.
+ * @param data
+ *   Data pointer to be passed to function callback.
+ * @return
+ *   Keepalive structure success, NULL on failure.
+ */
+struct rte_keepalive *rte_keepalive_create(
+       rte_keepalive_failure_callback_t callback,
+       void *data);
+
+
+/**
+ * @param *ptr_timer Triggering timer (unused)
+ * @param *ptr_data  Data pointer (keepalive structure)
+ */
+void rte_keepalive_dispatch_pings(void *ptr_timer, void *ptr_data);
+
+
+/**
+ * Registers a core for keepalive checks.
+ * @param *keepcfg
+ *   Keepalive structure pointer
+ * @param id_core
+ *   ID number of core to register.
+ */
+void rte_keepalive_register_core(struct rte_keepalive *keepcfg,
+       const int id_core);
+
+
+/**
+ * Per-core keepalive check.
+ * @param *keepcfg
+ *   Keepalive structure pointer
+ *
+ * This function needs to be called from within the main process loop of
+ * the LCore to be checked.
+ */
+static inline void
+rte_keepalive_mark_alive(struct rte_keepalive *keepcfg)
+{
+       keepcfg->state_flags[rte_lcore_id()] = 1;
+}
+
+
+#endif /* _KEEPALIVE_H_ */
diff --git a/lib/librte_eal/common/rte_keepalive.c 
b/lib/librte_eal/common/rte_keepalive.c
new file mode 100644
index 0000000..6a54f20
--- /dev/null
+++ b/lib/librte_eal/common/rte_keepalive.c
@@ -0,0 +1,124 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2015 Intel Shannon Ltd. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <inttypes.h>
+
+#include <rte_cycles.h>
+#include <rte_lcore.h>
+#include <rte_keepalive.h>
+
+#ifdef KEEPALIVE_DEBUG_MSGS
+static void
+print_trace(const char *msg, struct rte_keepalive *keepcfg, int idx_core)
+{
+       printf("%sLast seen %" PRId64  "ms ago.\n",
+               msg,
+               ((rte_rdtsc() - keepcfg->last_alive[idx_core])*1000)
+               / rte_get_tsc_hz()
+             );
+}
+#else
+static void
+print_trace(__attribute__((unused)) const char *msg,
+       __attribute__((unused)) struct rte_keepalive *keepcfg,
+       __attribute__((unused)) int idx_core)
+{
+}
+#endif
+
+
+
+void
+rte_keepalive_dispatch_pings(__attribute__((unused)) void *ptr_timer,
+       void *ptr_data)
+{
+       struct rte_keepalive *keepcfg = (struct rte_keepalive *)ptr_data;
+       int idx_core;
+
+       for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES; idx_core++) {
+               if (keepcfg->active_cores[idx_core] == 0)
+                       continue;
+               switch (keepcfg->state_flags[idx_core]) {
+               case ALIVE: /* Alive */
+                       keepcfg->state_flags[idx_core] = 0;
+                       keepcfg->last_alive[idx_core] = rte_rdtsc();
+                       break;
+               case MISSING: /* MIA */
+                       print_trace("Core MIA. ", keepcfg, idx_core);
+                       keepcfg->state_flags[idx_core] = 2;
+                       break;
+               case DEAD: /* Dead */
+                       keepcfg->state_flags[idx_core] = 3;
+                       print_trace("Core died. ", keepcfg, idx_core);
+                       if (keepcfg->callback)
+                               keepcfg->callback(
+                                       keepcfg->callback_data,
+                                       idx_core
+                                       );
+                       break;
+               case GONE: /* Buried */
+                       break;
+               }
+       }
+}
+
+
+struct rte_keepalive *
+rte_keepalive_create(rte_keepalive_failure_callback_t callback,
+       void *data)
+{
+       int idx_core;
+       struct rte_keepalive *keepcfg;
+
+       keepcfg = malloc(sizeof(struct rte_keepalive));
+       if (keepcfg != NULL) {
+               for (idx_core = 0;
+                               idx_core < RTE_KEEPALIVE_MAXCORES;
+                               idx_core++) {
+                       keepcfg->state_flags[idx_core] = 0;
+                       keepcfg->active_cores[idx_core] = 0;
+               }
+               keepcfg->callback = callback;
+               keepcfg->callback_data = data;
+               keepcfg->tsc_initial = rte_rdtsc();
+               keepcfg->tsc_mhz = rte_get_tsc_hz() / 1000;
+       }
+       return keepcfg;
+}
+
+
+void
+rte_keepalive_register_core(struct rte_keepalive *keepcfg, const int id_core)
+{
+       if (id_core < RTE_KEEPALIVE_MAXCORES)
+               keepcfg->active_cores[id_core] = 1;
+}
diff --git a/lib/librte_eal/linuxapp/eal/Makefile 
b/lib/librte_eal/linuxapp/eal/Makefile
index d62196e..05a44d7 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -90,6 +90,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_thread.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += rte_malloc.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += malloc_elem.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += malloc_heap.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += rte_keepalive.c

 CFLAGS_eal.o := -D_GNU_SOURCE
 CFLAGS_eal_interrupts.o := -D_GNU_SOURCE
-- 
1.9.3

Reply via email to