Signed-off-by: Angus Salkeld <asalk...@redhat.com>
---
 configure.ac                |   36 ++
 corosync.spec.in            |   14 +
 exec/Makefile.am            |    2 +-
 exec/service.c              |   12 +
 include/corosync/corodefs.h |    4 +-
 services/Makefile.am        |    6 +
 services/mon.c              |  635 ++++++++++++++++++++++++++++++++++++
 services/wd.c               |  755 +++++++++++++++++++++++++++++++++++++++++++
 8 files changed, 1462 insertions(+), 2 deletions(-)
 create mode 100644 services/mon.c
 create mode 100644 services/wd.c

diff --git a/configure.ac b/configure.ac
index b57fdd2..ad4b6c1 100644
--- a/configure.ac
+++ b/configure.ac
@@ -252,6 +252,16 @@ AC_ARG_ENABLE([rdma],
        [ enable_rdma="no" ])
 AM_CONDITIONAL(BUILD_RDMA, test x$enable_rdma = xyes)
 
+AC_ARG_ENABLE([monitoring],
+       [  --enable-monitoring              : resource monitoring ],,
+       [ default="no" ])
+AM_CONDITIONAL(BUILD_MONITORING, test x$enable_monitoring = xyes)
+
+AC_ARG_ENABLE([watchdog],
+       [  --enable-watchdog                   : Watchdog support ],,
+       [ edefault="no" ])
+AM_CONDITIONAL(BUILD_WATCHDOG, test x$enable_watchdog = xyes)
+
 AC_ARG_ENABLE([augeas],
        [  --enable-augeas                 : Install the augeas lens for 
corosync.conf ],,
        [ enable_augeas="no" ])
@@ -277,6 +287,7 @@ AC_ARG_WITH([socket-dir],
 # THIS SECTION MUST DIE!
 CP=cp
 OS_LDL="-ldl"
+have_linux="no"
 case "$host_os" in
        *linux*)
                AC_DEFINE_UNQUOTED([COROSYNC_LINUX], [1],
@@ -286,6 +297,7 @@ case "$host_os" in
                OS_LDFLAGS=""
                OS_DYFLAGS="-rdynamic"
                DARWIN_OPTS=""
+               have_linux="yes"
        ;;
        darwin*)
                AC_DEFINE_UNQUOTED([COROSYNC_DARWIN], [1],
@@ -387,6 +399,30 @@ if test "x${enable_rdma}" = xyes; then
        PACKAGE_FEATURES="$PACKAGE_FEATURES rdma"
 fi
 
+if test "x${enable_monitoring}" = xyes; then
+
+       AC_CHECK_LIB([statgrab], [sg_get_mem_stats], have_libstatgrab="yes", 
have_libstatgrab="no")
+
+       if test "x${have_libstatgrab}" = xyes; then
+               AC_DEFINE_UNQUOTED([HAVE_LIBSTATGRAB], 1, [have libstatgrab])
+               statgrab_LIBS="-lstatgrab"
+       else
+               if test "x${have_linux}" = xno; then
+                       AC_MSG_ERROR(monitoring requires libstatgrab on 
non-linux systems)
+               fi
+       fi
+       AC_SUBST([statgrab_LIBS])
+       AC_DEFINE_UNQUOTED([HAVE_MONITORING], 1, [have resource monitoring])
+       PACKAGE_FEATURES="$PACKAGE_FEATURES monitoring"
+fi
+
+if test "x${enable_watchdog}" = xyes; then
+       AC_CHECK_HEADER(linux/watchdog.h,,AC_MSG_ERROR(watchdog requires 
linux/watchdog.h))
+       AC_CHECK_HEADER(linux/reboot.h,,AC_MSG_ERROR(watchdog requires 
linux/reboot.h))
+       AC_DEFINE_UNQUOTED([HAVE_WATCHDOG], 1, [have watchdog])
+       PACKAGE_FEATURES="$PACKAGE_FEATURES watchdog"
+fi
+
 if test "x${enable_augeas}" = xyes; then
        PACKAGE_FEATURES="$PACKAGE_FEATURES augeas"
 fi
diff --git a/corosync.spec.in b/corosync.spec.in
index dafdb3c..ed531c3 100644
--- a/corosync.spec.in
+++ b/corosync.spec.in
@@ -5,6 +5,8 @@
 # Invoke "rpmbuild --without <feature>" or "rpmbuild --with <feature>"
 # to disable or enable specific features
 %bcond_with testagents
+%bcond_with watchdog
+%bcond_with monitoring
 
 Name: corosync
 Summary: The Corosync Cluster Engine and Application Programming Interfaces
@@ -52,6 +54,12 @@ export rdmacm_LIBS=-lrdmacm \
 %if %{with testagents}
        --enable-testagents \
 %endif
+%if %{with watchdog}
+       --enable-watchdog \
+%endif
+%if %{with monitoring}
+       --enable-monitoring \
+%endif
        --enable-rdma \
        --with-initddir=%{_initrddir}
 
@@ -115,6 +123,12 @@ fi
 %{_libexecdir}/lcrso/quorum_testquorum.lcrso
 %{_libexecdir}/lcrso/vsf_quorum.lcrso
 %{_libexecdir}/lcrso/vsf_ykd.lcrso
+%if %{with watchdog}
+%{_libexecdir}/lcrso/service_wd.lcrso
+%endif
+%if %{with monitoring}
+%{_libexecdir}/lcrso/service_mon.lcrso
+%endif
 %dir %{_localstatedir}/lib/corosync
 %dir %{_localstatedir}/log/cluster
 %{_mandir}/man8/corosync_overview.8*
diff --git a/exec/Makefile.am b/exec/Makefile.am
index a3a49bf..938237c 100644
--- a/exec/Makefile.am
+++ b/exec/Makefile.am
@@ -59,7 +59,7 @@ libcoroipcs_a_SOURCES = $(COROIPCS_SRC)
 corosync_SOURCES       = main.c util.c sync.c apidef.c service.c \
                          timer.c totemconfig.c mainconfig.c quorum.c 
schedwrk.c \
                          ../lcr/lcr_ifact.c evil.c syncv2.c
-corosync_LDADD         = -ltotem_pg -llogsys -lcoroipcs
+corosync_LDADD         = -ltotem_pg -llogsys -lcoroipcs $(statgrab_LIBS)
 corosync_DEPENDENCIES  = libtotem_pg.so.$(SONAME) liblogsys.so.$(SONAME) 
libcoroipcs.so.$(SONAME)
 corosync_LDFLAGS       = $(OS_DYFLAGS) -L./
 
diff --git a/exec/service.c b/exec/service.c
index be55459..dc30406 100644
--- a/exec/service.c
+++ b/exec/service.c
@@ -85,6 +85,18 @@ static struct default_service default_services[] = {
                .name                    = "corosync_pload",
                .ver                     = 0,
        },
+#ifdef HAVE_MONITORING
+       {
+               .name                    = "corosync_mon",
+               .ver                     = 0,
+       },
+#endif
+#ifdef HAVE_WATCHDOG
+       {
+               .name                    = "corosync_wd",
+               .ver                     = 0,
+       },
+#endif
        {
                .name                    = "corosync_quorum",
                .ver                     = 0,
diff --git a/include/corosync/corodefs.h b/include/corosync/corodefs.h
index 57923e2..a1e6539 100644
--- a/include/corosync/corodefs.h
+++ b/include/corosync/corodefs.h
@@ -59,7 +59,9 @@ enum corosync_service_types {
        NTF_SERVICE = 16,
        AMF_V2_SERVICE = 17,
        TST_SV1_SERVICE = 18,
-       TST_SV2_SERVICE = 19
+       TST_SV2_SERVICE = 19,
+       MON_SERVICE = 20,
+       WD_SERVICE = 21
 };
 
 #ifdef HAVE_SMALL_MEMORY_FOOTPRINT
diff --git a/services/Makefile.am b/services/Makefile.am
index cb64016..f39adc3 100644
--- a/services/Makefile.am
+++ b/services/Makefile.am
@@ -38,6 +38,12 @@ INCLUDES             = -I$(top_builddir)/include 
-I$(top_srcdir)/include \
                          -I$(top_srcdir)/include/corosync
 
 SERVICE_LCRSO          = evs cfg cpg confdb pload
+if BUILD_WATCHDOG
+SERVICE_LCRSO          += wd
+endif
+if BUILD_MONITORING
+SERVICE_LCRSO          += mon
+endif
 
 QUORUM_LCRSO           = votequorum testquorum
 
diff --git a/services/mon.c b/services/mon.c
new file mode 100644
index 0000000..3e475a1
--- /dev/null
+++ b/services/mon.c
@@ -0,0 +1,635 @@
+/*
+ * Copyright (c) 2010 Red Hat, Inc.
+ *
+ * All rights reserved.
+ *
+ * Author: Angus Salkeld <asalk...@redhat.com>
+ *
+ * This software licensed under BSD license, the text of which follows:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the MontaVista Software, Inc. nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <config.h>
+
+#include <unistd.h>
+#if defined(HAVE_LIBSTATGRAB)
+#include <statgrab.h>
+#endif
+
+#include <corosync/corotypes.h>
+#include <corosync/corodefs.h>
+#include <corosync/lcr/lcr_comp.h>
+#include <corosync/engine/coroapi.h>
+#include <corosync/list.h>
+#include <corosync/totem/coropoll.h>
+#include <corosync/engine/logsys.h>
+#include "../exec/fsm.h"
+
+
+LOGSYS_DECLARE_SUBSYS ("MON");
+
+#undef ENTER
+#define ENTER() log_printf (LOGSYS_LEVEL_INFO, "%s", __func__)
+
+/*
+ * Service Interfaces required by service_message_handler struct
+ */
+static int mon_exec_init_fn (
+       struct corosync_api_v1 *corosync_api);
+
+hdb_handle_t mon_poll = 0;
+static struct corosync_api_v1 *api;
+static hdb_handle_t resources_obj;
+static pthread_t mon_poll_thread;
+#define MON_DEFAULT_PERIOD 3
+
+struct corosync_service_engine mon_service_engine = {
+       .name                   = "corosync resource monitoring service",
+       .id                     = MON_SERVICE,
+       .priority               = 1,
+       .private_data_size      = 0,
+       .flow_control           = CS_LIB_FLOW_CONTROL_NOT_REQUIRED,
+       .lib_init_fn            = NULL,
+       .lib_exit_fn            = NULL,
+       .lib_engine             = NULL,
+       .lib_engine_count       = 0,
+       .exec_engine            = NULL,
+       .exec_engine_count      = 0,
+       .confchg_fn             = NULL,
+       .exec_init_fn           = mon_exec_init_fn,
+       .exec_dump_fn           = NULL,
+       .sync_mode              = CS_SYNC_V2
+};
+
+static DECLARE_LIST_INIT (confchg_notify);
+
+
+struct resource_instance {
+       hdb_handle_t handle;
+       const char *name;
+       poll_timer_handle timer_handle;
+       void (*update_stats_fn) (void *data);
+       struct cs_fsm fsm;
+       int32_t period;
+       objdb_value_types_t max_type;
+       union {
+               int32_t int32;
+               double dbl;
+       } max;
+};
+
+static void mem_update_stats_fn (void *data);
+static void load_update_stats_fn (void *data);
+
+static struct resource_instance memory_used_inst = {
+       .name = "memory_used",
+       .update_stats_fn = mem_update_stats_fn,
+       .max_type = OBJDB_VALUETYPE_INT32,
+       .max.int32 = INT32_MAX,
+       .period = MON_DEFAULT_PERIOD,
+};
+
+static struct resource_instance load_15min_inst = {
+       .name = "load_15min",
+       .update_stats_fn = load_update_stats_fn,
+       .max_type = OBJDB_VALUETYPE_DOUBLE,
+       .max.dbl = INT32_MAX,
+       .period = MON_DEFAULT_PERIOD,
+};
+
+
+/*
+ * F S M
+ */
+static void mon_config_changed (struct cs_fsm* fsm, int32_t event, void * 
data);
+static void mon_resource_failed (struct cs_fsm* fsm, int32_t event, void * 
data);
+
+const char * mon_ok_str = "ok";
+const char * mon_failed_str = "failed";
+const char * mon_failure_str = "failure";
+const char * mon_disabled_str = "disabled";
+const char * mon_config_changed_str = "config_changed";
+
+enum mon_resource_state {
+       MON_S_DISABLED,
+       MON_S_OK,
+       MON_S_FAILED
+};
+enum mon_resource_event {
+       MON_E_CONFIG_CHANGED,
+       MON_E_FAILURE
+};
+
+struct cs_fsm_entry mon_fsm_table[] = {
+       { MON_S_DISABLED,       MON_E_CONFIG_CHANGED,   mon_config_changed,     
{MON_S_DISABLED, MON_S_OK, -1} },
+       { MON_S_DISABLED,       MON_E_FAILURE,          NULL,                   
{-1} },
+       { MON_S_OK,             MON_E_CONFIG_CHANGED,   mon_config_changed,     
{MON_S_OK, MON_S_DISABLED, -1} },
+       { MON_S_OK,             MON_E_FAILURE,          mon_resource_failed,    
{MON_S_FAILED, -1} },
+       { MON_S_FAILED,         MON_E_CONFIG_CHANGED,   mon_config_changed,     
{MON_S_OK, MON_S_DISABLED, -1} },
+       { MON_S_FAILED,         MON_E_FAILURE,          NULL,                   
{-1} },
+};
+
+/*
+ * Dynamic loading descriptor
+ */
+
+static struct corosync_service_engine *mon_get_service_engine_ver0 (void);
+
+static struct corosync_service_engine_iface_ver0 mon_service_engine_iface = {
+       .corosync_get_service_engine_ver0       = mon_get_service_engine_ver0
+};
+
+static struct lcr_iface corosync_mon_ver0[1] = {
+       {
+               .name                   = "corosync_mon",
+               .version                = 0,
+               .versions_replace       = 0,
+               .versions_replace_count = 0,
+               .dependencies           = 0,
+               .dependency_count       = 0,
+               .constructor            = NULL,
+               .destructor             = NULL,
+               .interfaces             = NULL,
+       }
+};
+
+static struct lcr_comp mon_comp_ver0 = {
+       .iface_count    = 1,
+       .ifaces         = corosync_mon_ver0
+};
+
+static struct corosync_service_engine *mon_get_service_engine_ver0 (void)
+{
+       return (&mon_service_engine);
+}
+
+#ifdef COROSYNC_SOLARIS
+void corosync_lcr_component_register (void);
+
+void corosync_lcr_component_register (void) {
+#else
+__attribute__ ((constructor)) static void corosync_lcr_component_register 
(void) {
+#endif
+       lcr_interfaces_set (&corosync_mon_ver0[0], &mon_service_engine_iface);
+
+       lcr_component_register (&mon_comp_ver0);
+}
+
+static const char * mon_res_state_to_str(struct cs_fsm* fsm,
+       int32_t state)
+{
+       switch (state) {
+       case MON_S_DISABLED:
+               return mon_disabled_str;
+               break;
+       case MON_S_OK:
+               return mon_ok_str;
+               break;
+       case MON_S_FAILED:
+               return mon_failed_str;
+               break;
+       }
+       return NULL;
+}
+
+static const char * mon_res_event_to_str(struct cs_fsm* fsm,
+       int32_t event)
+{
+       switch (event) {
+       case MON_E_CONFIG_CHANGED:
+               return mon_config_changed_str;
+               break;
+       case MON_E_FAILURE:
+               return mon_failure_str;
+               break;
+       }
+       return NULL;
+}
+
+static void mon_fsm_state_set (struct cs_fsm* fsm,
+       enum mon_resource_state next_state, struct resource_instance* inst)
+{
+       enum mon_resource_state prev_state = fsm->curr_state;
+       const char *state_str;
+
+       ENTER();
+
+       cs_fsm_state_set(fsm, next_state, inst);
+
+       if (prev_state == fsm->curr_state) {
+               return;
+       }
+       state_str = mon_res_state_to_str(fsm, fsm->curr_state);
+
+       api->object_key_replace (inst->handle,
+               "state", strlen ("state"),
+               state_str, strlen (state_str));
+}
+
+
+static void mon_config_changed (struct cs_fsm* fsm, int32_t event, void * data)
+{
+       struct resource_instance * inst = (struct resource_instance *)data;
+       char *str;
+       size_t str_len;
+       objdb_value_types_t type;
+       int32_t tmp_value;
+       int32_t res;
+
+       ENTER();
+
+       res = api->object_key_get_typed (inst->handle,
+                       "poll_period",
+                       (void**)&str, &str_len,
+                       &type);
+       if (res == 0) {
+               tmp_value = strtol (str, NULL, 0);
+               if (tmp_value > 0 && tmp_value < 120) {
+                       if (inst->period != tmp_value) {
+                               inst->period = tmp_value;
+                       }
+               }
+       }
+
+       res = api->object_key_get_typed (inst->handle, "max",
+                       (void**)&str, &str_len, &type);
+       if (res != 0) {
+               if (inst->max_type == OBJDB_VALUETYPE_INT32) {
+                       inst->max.int32 = INT32_MAX;
+               } else
+               if (inst->max_type == OBJDB_VALUETYPE_DOUBLE) {
+                       inst->max.dbl = INT32_MAX;
+               }
+               mon_fsm_state_set (fsm, MON_S_DISABLED, inst);
+       } else {
+               if (inst->max_type == OBJDB_VALUETYPE_INT32) {
+                       inst->max.int32 = strtol (str, NULL, 0);
+               } else
+               if (inst->max_type == OBJDB_VALUETYPE_DOUBLE) {
+                       inst->max.dbl = strtod (str, NULL);
+               }
+               mon_fsm_state_set (fsm, MON_S_OK, inst);
+       }
+
+       if (mon_poll == 0) {
+               return;
+       }
+       poll_timer_delete (mon_poll, inst->timer_handle);
+       /*
+        * run the updater, incase the period has shortened
+        */
+       inst->update_stats_fn (inst);
+       poll_timer_add (mon_poll,
+               inst->period * 1000, NULL,
+               inst->update_stats_fn,
+               &inst->timer_handle);
+}
+
+void mon_resource_failed (struct cs_fsm* fsm, int32_t event, void * data)
+{
+       struct resource_instance * inst = (struct resource_instance *)data;
+       ENTER();
+       mon_fsm_state_set (fsm, MON_S_FAILED, inst);
+}
+
+static int32_t percent_mem_used_get(void)
+{
+#if defined(HAVE_LIBSTATGRAB)
+       sg_mem_stats *mem_stats;
+       sg_swap_stats *swap_stats;
+       long long total, freemem;
+
+       mem_stats = sg_get_mem_stats();
+       swap_stats = sg_get_swap_stats();
+
+       if (mem_stats == NULL || swap_stats != NULL) {
+               log_printf (LOGSYS_LEVEL_ERROR, "Unable to get memory stats: 
%s\n",
+                       sg_str_error(sg_get_error()));
+               return -1;
+       }
+       total = mem_stats->total + swap_stats->total;
+       freemem = mem_stats->free + swap_stats->free;
+       return ((total - freemem) * 100) / total;
+#else
+#if defined(COROSYNC_LINUX)
+       char *line_ptr;
+       char line[512];
+       unsigned long long value;
+       FILE *f;
+       long long total = 0;
+       long long freemem = 0;
+
+       if ((f = fopen("/proc/meminfo", "r")) == NULL) {
+               return -1;
+       }
+
+       while ((line_ptr = fgets(line, sizeof(line), f)) != NULL) {
+               if (sscanf(line_ptr, "%*s %llu kB", &value) != 1) {
+                       continue;
+               }
+               value *= 1024;
+
+               if (strncmp(line_ptr, "MemTotal:", 9) == 0) {
+                       total += value;
+               } else if (strncmp(line_ptr, "MemFree:", 8) == 0) {
+                       freemem += value;
+               } else if (strncmp(line_ptr, "SwapTotal:", 10) == 0) {
+                       total += value;
+               } else if (strncmp(line_ptr, "SwapFree:", 9) == 0) {
+                       freemem += value;
+               }
+       }
+
+       fclose(f);
+       return ((total - freemem) * 100) / total;
+#else
+#error need libstatgrab or linux.
+#endif /* COROSYNC_LINUX */
+#endif /* HAVE_LIBSTATGRAB */
+}
+
+
+static void mem_update_stats_fn (void *data)
+{
+       struct resource_instance * inst = (struct resource_instance *)data;
+       int32_t new_value;
+       uint64_t timestamp;
+
+       new_value = percent_mem_used_get();
+       if (new_value > 0) {
+               api->object_key_replace (inst->handle,
+                       "current", strlen("current"),
+                       &new_value, sizeof(new_value));
+
+               timestamp = time (NULL);
+
+               api->object_key_replace (inst->handle,
+                       "last_updated", strlen("last_updated"),
+                       &timestamp, sizeof(time_t));
+
+               if (new_value > inst->max.int32) {
+                       cs_fsm_process (&inst->fsm, MON_E_FAILURE, inst);
+               }
+       }
+       poll_timer_add (mon_poll,
+               inst->period * 1000, inst,
+               inst->update_stats_fn,
+               &inst->timer_handle);
+}
+
+static double min15_loadavg_get(void)
+{
+#if defined(HAVE_LIBSTATGRAB)
+       sg_load_stats *load_stats;
+       load_stats = sg_get_load_stats ();
+       if (load_stats == NULL) {
+               log_printf (LOGSYS_LEVEL_ERROR, "Unable to get load stats: 
%s\n",
+                       sg_str_error (sg_get_error()));
+               return -1;
+       }
+       return load_stats->min15;
+#else
+#if defined(COROSYNC_LINUX)
+       double loadav[3];
+       if (getloadavg(loadav,3) < 0) {
+               return -1;
+       }
+       return loadav[2];
+#else
+#error need libstatgrab or linux.
+#endif /* COROSYNC_LINUX */
+#endif /* HAVE_LIBSTATGRAB */
+}
+
+static void load_update_stats_fn (void *data)
+{
+       struct resource_instance * inst = (struct resource_instance *)data;
+       uint64_t timestamp;
+       int32_t res = 0;
+       double min15 = min15_loadavg_get();
+
+       if (min15 < 0) {
+       }
+       res = api->object_key_replace (inst->handle,
+               "current", strlen("current"),
+               &min15, sizeof (min15));
+       if (res != 0)
+               log_printf (LOGSYS_LEVEL_ERROR, "replace current failed: %d", 
res);
+
+       timestamp = cs_timestamp_get();
+
+       res = api->object_key_replace (inst->handle,
+               "last_updated", strlen("last_updated"),
+               &timestamp, sizeof(uint64_t));
+       if (res != 0)
+               log_printf (LOGSYS_LEVEL_ERROR, "replace last_updated failed: 
%d", res);
+
+       if (min15 > inst->max.dbl) {
+               cs_fsm_process (&inst->fsm, MON_E_FAILURE, &inst);
+       }
+
+       poll_timer_add (mon_poll,
+               inst->period * 1000, inst,
+               inst->update_stats_fn,
+               &inst->timer_handle);
+}
+
+static void *mon_thread_handler (void * unused)
+{
+#ifdef HAVE_LIBSTATGRAB
+       sg_init();
+#endif /* HAVE_LIBSTATGRAB */
+       mon_poll = poll_create ();
+
+       poll_timer_add (mon_poll,
+               memory_used_inst.period * 1000,
+               &memory_used_inst,
+               memory_used_inst.update_stats_fn,
+               &memory_used_inst.timer_handle);
+
+       poll_timer_add (mon_poll,
+               load_15min_inst.period * 1000,
+               &load_15min_inst,
+               load_15min_inst.update_stats_fn,
+               &load_15min_inst.timer_handle);
+       poll_run (mon_poll);
+
+       return NULL;
+}
+
+static int object_find_or_create (
+       hdb_handle_t parent_object_handle,
+       hdb_handle_t *object_handle,
+       const void *object_name,
+       size_t object_name_len)
+{
+       hdb_handle_t obj_finder;
+       hdb_handle_t obj;
+       int ret = -1;
+
+       api->object_find_create (
+               parent_object_handle,
+               object_name,
+               object_name_len,
+               &obj_finder);
+
+       if (api->object_find_next (obj_finder, &obj) == 0) {
+               /* found it */
+               *object_handle = obj;
+               ret = 0;
+       }
+       else {
+               ret = api->object_create (parent_object_handle,
+                       object_handle,
+                       object_name, object_name_len);
+       }
+
+       api->object_find_destroy (obj_finder);
+       return ret;
+}
+
+static void mon_key_change_notify (object_change_type_t change_type,
+       hdb_handle_t parent_object_handle,
+       hdb_handle_t object_handle,
+       const void *object_name_pt, size_t object_name_len,
+       const void *key_name_pt, size_t key_len,
+       const void *key_value_pt, size_t key_value_len,
+       void *priv_data_pt)
+{
+       struct resource_instance* inst = (struct 
resource_instance*)priv_data_pt;
+
+       if ((strcmp ((char*)key_name_pt, "max") == 0) ||
+               (strcmp ((char*)key_name_pt, "poll_period") == 0)) {
+               ENTER();
+               cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst);
+       }
+}
+
+static void mon_instance_init (hdb_handle_t parent, struct resource_instance* 
inst)
+{
+       int32_t res;
+       char mon_period_str[32];
+       size_t mon_period_len;
+       objdb_value_types_t mon_period_type;
+       int32_t tmp_value;
+       int32_t zero_32 = 0;
+       time_t zero_64 = 0;
+       double zero_double = 0;
+
+       ENTER();
+
+       object_find_or_create (parent,
+               &inst->handle,
+               inst->name, strlen (inst->name));
+
+       if (inst->max_type == OBJDB_VALUETYPE_INT32) {
+               api->object_key_create_typed (inst->handle,
+                       "current", &zero_32,
+                       sizeof (zero_32), inst->max_type);
+       } else {
+               api->object_key_create_typed (inst->handle,
+                       "current", &zero_double,
+                       sizeof (zero_double), inst->max_type);
+       }
+
+       api->object_key_create_typed (inst->handle,
+               "last_updated", &zero_64,
+               sizeof (time_t), OBJDB_VALUETYPE_INT64);
+
+       api->object_key_create_typed (inst->handle,
+               "state", mon_disabled_str, strlen (mon_disabled_str),
+               OBJDB_VALUETYPE_STRING);
+
+       inst->fsm.name = inst->name;
+       inst->fsm.curr_entry = 0;
+       inst->fsm.curr_state = MON_S_DISABLED;
+       inst->fsm.table = mon_fsm_table;
+       inst->fsm.entries = sizeof(mon_fsm_table) / sizeof(struct cs_fsm_entry);
+       inst->fsm.state_to_str = mon_res_state_to_str;
+       inst->fsm.event_to_str = mon_res_event_to_str;
+
+       res = api->object_key_get_typed (inst->handle,
+                       "poll_period",
+                       (void**)&mon_period_str, &mon_period_len,
+                       &mon_period_type);
+       if (res != 0) {
+               mon_period_len = snprintf (mon_period_str, 32, "%d",
+                       inst->period);
+               api->object_key_create_typed (inst->handle,
+                       "poll_period", &mon_period_str,
+                       mon_period_len,
+                       OBJDB_VALUETYPE_STRING);
+       }
+       else {
+               tmp_value = strtol (mon_period_str, NULL, 0);
+               if (tmp_value > 0 && tmp_value < 120)
+                       inst->period = tmp_value;
+       }
+       cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst);
+
+       poll_timer_add (mon_poll,
+               inst->period * 1000, inst,
+               inst->update_stats_fn,
+               &inst->timer_handle);
+
+       api->object_track_start (inst->handle, OBJECT_TRACK_DEPTH_ONE,
+               mon_key_change_notify,
+               NULL, NULL, NULL, NULL);
+
+}
+
+static int mon_exec_init_fn (
+       struct corosync_api_v1 *corosync_api)
+{
+       hdb_handle_t obj;
+       hdb_handle_t parent;
+
+#ifdef COROSYNC_SOLARIS
+       logsys_subsys_init();
+#endif
+       api = corosync_api;
+       ENTER();
+
+       object_find_or_create (OBJECT_PARENT_HANDLE,
+               &resources_obj,
+               "resources", strlen ("resources"));
+
+       object_find_or_create (resources_obj,
+               &obj,
+               "system", strlen ("system"));
+
+       parent = obj;
+
+       mon_instance_init (parent, &memory_used_inst);
+       mon_instance_init (parent, &load_15min_inst);
+
+
+       pthread_create (&mon_poll_thread, NULL, mon_thread_handler, NULL);
+
+       return 0;
+}
+
+
diff --git a/services/wd.c b/services/wd.c
new file mode 100644
index 0000000..9c9ad97
--- /dev/null
+++ b/services/wd.c
@@ -0,0 +1,755 @@
+/*
+ * Copyright (c) 2010 Red Hat, Inc.
+ *
+ * All rights reserved.
+ *
+ * Author: Angus Salkeld <asalk...@redhat.com>
+ *
+ * This software licensed under BSD license, the text of which follows:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the MontaVista Software, Inc. nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <config.h>
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <linux/types.h>
+#include <linux/watchdog.h>
+#include <linux/reboot.h>
+
+#include <corosync/corotypes.h>
+#include <corosync/corodefs.h>
+#include <corosync/lcr/lcr_comp.h>
+#include <corosync/engine/coroapi.h>
+#include <corosync/list.h>
+#include <corosync/engine/logsys.h>
+#include "../exec/fsm.h"
+
+
+typedef enum {
+       WD_RESOURCE_GOOD,
+       WD_RESOURCE_FAILED,
+       WD_RESOURCE_STATE_UNKNOWN,
+       WD_RESOURCE_NOT_MONITORED
+} wd_resource_state_t;
+
+struct resource {
+       hdb_handle_t handle;
+       char *recovery;
+       char name[128];
+       time_t last_updated;
+       struct cs_fsm fsm;
+
+       corosync_timer_handle_t check_timer;
+       uint32_t check_timeout;
+};
+
+LOGSYS_DECLARE_SUBSYS("WD");
+
+/*
+ * Service Interfaces required by service_message_handler struct
+ */
+static int wd_exec_init_fn (
+       struct corosync_api_v1 *corosync_api);
+static int wd_exec_exit_fn (void);
+static void wd_resource_check_fn (void* resource_ref);
+
+static struct corosync_api_v1 *api;
+#define WD_DEFAULT_TIMEOUT 6
+static uint32_t watchdog_timeout = WD_DEFAULT_TIMEOUT;
+static uint32_t tickle_timeout = (WD_DEFAULT_TIMEOUT / 2);
+static int dog = -1;
+static corosync_timer_handle_t wd_timer;
+static hdb_handle_t resources_obj;
+static int watchdog_ok = 1;
+
+struct corosync_service_engine wd_service_engine = {
+       .name                   = "corosync self-fencing service",
+       .id                     = WD_SERVICE,
+       .priority               = 1,
+       .private_data_size      = 0,
+       .flow_control           = CS_LIB_FLOW_CONTROL_REQUIRED,
+       .lib_init_fn            = NULL,
+       .lib_exit_fn            = NULL,
+       .lib_engine             = NULL,
+       .lib_engine_count       = 0,
+       .exec_engine            = NULL,
+       .exec_engine_count      = 0,
+       .confchg_fn             = NULL,
+       .exec_init_fn           = wd_exec_init_fn,
+       .exec_exit_fn           = wd_exec_exit_fn,
+       .exec_dump_fn           = NULL,
+       .sync_mode              = CS_SYNC_V2
+};
+
+static DECLARE_LIST_INIT (confchg_notify);
+
+/*
+ * F S M
+ */
+static void wd_config_changed (struct cs_fsm* fsm, int32_t event, void * data);
+static void wd_resource_failed (struct cs_fsm* fsm, int32_t event, void * 
data);
+
+enum wd_resource_state {
+       WD_S_GOOD,
+       WD_S_FAILED,
+       WD_S_DISABLED
+};
+
+enum wd_resource_event {
+       WD_E_FAILURE,
+       WD_E_CONFIG_CHANGED
+};
+
+const char * wd_ok_str                 = "ok";
+const char * wd_failed_str             = "failed";
+const char * wd_failure_str            = "failure";
+const char * wd_disabled_str           = "disabled";
+const char * wd_config_changed_str     = "config_changed";
+
+struct cs_fsm_entry wd_fsm_table[] = {
+       { WD_S_DISABLED,        WD_E_CONFIG_CHANGED,    wd_config_changed,      
{WD_S_DISABLED, WD_S_GOOD, -1} },
+       { WD_S_DISABLED,        WD_E_FAILURE,           NULL,                   
{-1} },
+       { WD_S_GOOD,            WD_E_CONFIG_CHANGED,    wd_config_changed,      
{WD_S_GOOD, WD_S_DISABLED, -1} },
+       { WD_S_GOOD,            WD_E_FAILURE,           wd_resource_failed,     
{WD_S_FAILED, -1} },
+       { WD_S_FAILED,          WD_E_CONFIG_CHANGED,    wd_config_changed,      
{WD_S_GOOD, WD_S_DISABLED, -1} },
+       { WD_S_FAILED,          WD_E_FAILURE,           NULL,                   
{-1} },
+};
+
+/*
+ * Dynamic loading descriptor
+ */
+
+static struct corosync_service_engine *wd_get_service_engine_ver0 (void);
+
+static struct corosync_service_engine_iface_ver0 wd_service_engine_iface = {
+       .corosync_get_service_engine_ver0       = wd_get_service_engine_ver0
+};
+
+static struct lcr_iface corosync_wd_ver0[1] = {
+       {
+               .name                   = "corosync_wd",
+               .version                = 0,
+               .versions_replace       = 0,
+               .versions_replace_count = 0,
+               .dependencies           = 0,
+               .dependency_count       = 0,
+               .constructor            = NULL,
+               .destructor             = NULL,
+               .interfaces             = NULL,
+       }
+};
+
+static struct lcr_comp wd_comp_ver0 = {
+       .iface_count    = 1,
+       .ifaces         = corosync_wd_ver0
+};
+
+static struct corosync_service_engine *wd_get_service_engine_ver0 (void)
+{
+       return (&wd_service_engine);
+}
+
+#ifdef COROSYNC_SOLARIS
+void corosync_lcr_component_register (void);
+
+void corosync_lcr_component_register (void) {
+#else
+__attribute__ ((constructor)) static void corosync_lcr_component_register 
(void) {
+#endif
+       lcr_interfaces_set (&corosync_wd_ver0[0], &wd_service_engine_iface);
+
+       lcr_component_register (&wd_comp_ver0);
+}
+
+static int object_find_or_create (
+       hdb_handle_t parent_object_handle,
+       hdb_handle_t *object_handle,
+       const void *object_name,
+       size_t object_name_len)
+{
+       hdb_handle_t obj_finder;
+       hdb_handle_t obj;
+       int ret = -1;
+
+       api->object_find_create (
+               parent_object_handle,
+               object_name,
+               object_name_len,
+               &obj_finder);
+
+       if (api->object_find_next (obj_finder, &obj) == 0) {
+               /* found it */
+               *object_handle = obj;
+               ret = 0;
+       }
+       else {
+               ret = api->object_create (parent_object_handle,
+                       object_handle,
+                       object_name, object_name_len);
+       }
+
+       api->object_find_destroy (obj_finder);
+       return ret;
+}
+
+static const char * wd_res_state_to_str(struct cs_fsm* fsm,
+       int32_t state)
+{
+       switch (state) {
+       case WD_S_DISABLED:
+               return wd_disabled_str;
+               break;
+       case WD_S_GOOD:
+               return wd_ok_str;
+               break;
+       case WD_S_FAILED:
+               return wd_failed_str;
+               break;
+       }
+       return NULL;
+}
+
+static const char * wd_res_event_to_str(struct cs_fsm* fsm,
+       int32_t event)
+{
+       switch (event) {
+       case WD_E_CONFIG_CHANGED:
+               return wd_config_changed_str;
+               break;
+       case WD_E_FAILURE:
+               return wd_failure_str;
+               break;
+       }
+       return NULL;
+}
+
+/*
+ * returns (0 == OK, 1 == failed)
+ */
+static int32_t wd_resource_has_failed (struct resource *ref)
+{
+       hdb_handle_t resource = ref->handle;
+       int res;
+       char* state;
+       size_t state_len;
+       objdb_value_types_t type;
+       time_t *last_updated;
+       time_t my_time;
+       size_t last_updated_len;
+
+       res = api->object_key_get_typed (resource,
+               "last_updated", (void*)&last_updated, &last_updated_len, &type);
+       if (res != 0) {
+               /* key does not exist.
+               */
+               return 1;
+       }
+       res = api->object_key_get_typed (resource,
+               "state", (void**)&state, &state_len, &type);
+       if (res != 0 || strncmp (state, "disabled", strlen ("disabled")) == 0) {
+               /* key does not exist.
+               */
+               return 1;
+       }
+
+       my_time = time (NULL);
+
+       if ((*last_updated + ref->check_timeout) < my_time) {
+               log_printf (LOGSYS_LEVEL_INFO, "delayed %ld + %d < %ld",
+                       *last_updated, ref->check_timeout, my_time);
+               return 1;
+       }
+
+       if ((*last_updated + ref->check_timeout) < my_time ||
+               strcmp (state, "bad") == 0) {
+               return 1;
+       }
+       return 0;
+}
+
+static void wd_config_changed (struct cs_fsm* fsm, int32_t event, void * data)
+{
+       int res;
+       size_t len;
+       char *state;
+       objdb_value_types_t type;
+       char mon_period_str[32];
+       int32_t tmp_value;
+       struct resource *ref = (struct resource*)data;
+
+       res = api->object_key_get_typed (ref->handle,
+                       "poll_period",
+                       (void**)&mon_period_str, &len,
+                       &type);
+       if (res == 0) {
+               tmp_value = strtol (mon_period_str, NULL, 0);
+               if (tmp_value > 0 && tmp_value < 120)
+                       ref->check_timeout = (tmp_value * 5)/4;
+       }
+
+       res = api->object_key_get_typed (ref->handle,
+               "recovery", (void*)&ref->recovery, &len, &type);
+       if (res != 0) {
+               /* key does not exist.
+                */
+               log_printf (LOGSYS_LEVEL_WARNING,
+                       "resource %s missing a recovery key.", ref->name);
+               cs_fsm_state_set(&ref->fsm, WD_S_DISABLED, ref);
+               return;
+       }
+       res = api->object_key_get_typed (ref->handle,
+               "state", (void*)&state, &len, &type);
+       if (res != 0) {
+               /* key does not exist.
+               */
+               log_printf (LOGSYS_LEVEL_WARNING,
+                       "resource %s missing a state key.", ref->name);
+               cs_fsm_state_set(&ref->fsm, WD_S_DISABLED, ref);
+               return;
+       }
+
+       cs_fsm_state_set(&ref->fsm, WD_S_GOOD, ref);
+
+       if (ref->check_timer) {
+               api->timer_delete(ref->check_timer);
+       }
+       api->timer_add_duration((unsigned long 
long)ref->check_timeout*1000000000,
+               ref,
+               wd_resource_check_fn, &ref->check_timer);
+
+}
+
+static void wd_resource_failed (struct cs_fsm* fsm, int32_t event, void * data)
+{
+       struct resource* ref = (struct resource*)data;
+
+       if (ref->check_timer) {
+               api->timer_delete(ref->check_timer);
+       }
+
+       log_printf (LOGSYS_LEVEL_CRIT, "%s resource \"%s\" failed!",
+               ref->recovery, (char*)ref->name);
+       if (strcmp (ref->recovery, "watchdog") == 0 ||
+           strcmp (ref->recovery, "quit") == 0) {
+               watchdog_ok = 0;
+       }
+       else if (strcmp (ref->recovery, "reboot") == 0) {
+               //reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, 
LINUX_REBOOT_CMD_RESTART, NULL);
+       }
+       else if (strcmp (ref->recovery, "shutdown") == 0) {
+               //reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, 
LINUX_REBOOT_CMD_POWER_OFF, NULL);
+       }
+       cs_fsm_state_set(fsm, WD_S_FAILED, data);
+}
+
+static void wd_key_changed(object_change_type_t change_type,
+       hdb_handle_t parent_object_handle,
+       hdb_handle_t object_handle,
+       const void *object_name_pt, size_t object_name_len,
+       const void *key_name_pt, size_t key_len,
+       const void *key_value_pt, size_t key_value_len,
+       void *priv_data_pt)
+{
+       struct resource* ref = (struct resource*)priv_data_pt;
+
+       if (strcmp(key_name_pt, "last_updated") == 0 ||
+               strcmp(key_name_pt, "current") == 0) {
+               return;
+       }
+//     log_printf (LOGSYS_LEVEL_WARNING,
+//             "watchdog resource key changed: %s.%s=%s ref=%p.",
+//             (char*)object_name_pt, (char*)key_name_pt, (char*)key_value_pt, 
ref);
+
+       if (ref == NULL) {
+               return;
+       }
+       cs_fsm_process(&ref->fsm, WD_E_CONFIG_CHANGED, ref);
+}
+
+static void wd_object_destroyed(
+       hdb_handle_t parent_object_handle,
+       const void *name_pt, size_t name_len,
+       void *priv_data_pt)
+{
+       struct resource* ref = (struct resource*)priv_data_pt;
+
+       log_printf (LOGSYS_LEVEL_WARNING,
+                       "watchdog resource \"%s\" deleted from objdb!",
+                       (char*)name_pt);
+
+       if (ref) {
+               api->timer_delete(ref->check_timer);
+               ref->check_timer = NULL;
+       }
+}
+
+static void wd_resource_check_fn (void* resource_ref)
+{
+       struct resource* ref = (struct resource*)resource_ref;
+
+       log_printf (LOGSYS_LEVEL_INFO,
+                       "checking watchdog resource \"%s\".",
+                       ref->name);
+       if (wd_resource_has_failed (ref) ) {
+               cs_fsm_process(&ref->fsm, WD_E_FAILURE, ref);
+               log_printf (LOGSYS_LEVEL_CRIT,
+                       "watchdog resource \"%s\" failed!",
+                       (char*)ref->name);
+               return;
+       }
+       api->timer_add_duration((unsigned long 
long)ref->check_timeout*1000000000,
+               ref, wd_resource_check_fn, &ref->check_timer);
+}
+
+
+static void wd_resource_create (hdb_handle_t resource_obj)
+{
+       int res;
+       size_t len;
+       char *state;
+       objdb_value_types_t type;
+       char mon_period_str[32];
+       int32_t tmp_value;
+       struct resource *ref = malloc (sizeof (struct resource));
+
+       ref->handle = resource_obj;
+       ref->check_timeout = WD_DEFAULT_TIMEOUT;
+       ref->check_timer = NULL;
+       api->object_name_get (resource_obj,
+               ref->name,
+               &len);
+       ref->name[len] = '\0';
+       ref->fsm.name = ref->name;
+       ref->fsm.table = wd_fsm_table;
+       ref->fsm.entries = sizeof(wd_fsm_table) / sizeof(struct cs_fsm_entry);
+       ref->fsm.curr_entry = 0;
+       ref->fsm.curr_state = WD_S_DISABLED;
+       ref->fsm.state_to_str = wd_res_state_to_str;
+       ref->fsm.event_to_str = wd_res_event_to_str;
+       api->object_priv_set (resource_obj, NULL);
+
+       res = api->object_key_get_typed (resource_obj,
+                       "poll_period",
+                       (void**)&mon_period_str, &len,
+                       &type);
+       if (res != 0) {
+               log_printf (LOGSYS_LEVEL_ERROR, "%s : %d",__func__, res);
+               len = snprintf (mon_period_str, 32, "%d", ref->check_timeout);
+               api->object_key_create_typed (resource_obj,
+                       "poll_period", &mon_period_str,
+                       len,
+                       OBJDB_VALUETYPE_STRING);
+       }
+       else {
+               tmp_value = strtol (mon_period_str, NULL, 0);
+               if (tmp_value > 0 && tmp_value < 120)
+                       ref->check_timeout = (tmp_value * 5)/4;
+       }
+
+       api->object_track_start (resource_obj, OBJECT_TRACK_DEPTH_ONE,
+                       wd_key_changed, NULL, wd_object_destroyed,
+                       NULL, ref);
+
+       res = api->object_key_get_typed (resource_obj,
+               "recovery", (void*)&ref->recovery, &len, &type);
+       if (res != 0) {
+               /* key does not exist.
+                */
+               log_printf (LOGSYS_LEVEL_WARNING,
+                       "resource %s missing a recovery key.", ref->name);
+               return;
+       }
+       res = api->object_key_get_typed (resource_obj,
+               "state", (void*)&state, &len, &type);
+       if (res != 0) {
+               /* key does not exist.
+               */
+               log_printf (LOGSYS_LEVEL_WARNING,
+                       "resource %s missing a state key.", ref->name);
+               return;
+       }
+
+       res = api->object_key_get_typed (resource_obj,
+               "last_updated", (void*)&ref->last_updated, &len, &type);
+       if (res != 0) {
+               /* key does not exist.
+                */
+               ref->last_updated = 0;
+       }
+
+       api->timer_add_duration((unsigned long 
long)ref->check_timeout*1000000000,
+               ref,
+               wd_resource_check_fn, &ref->check_timer);
+
+       cs_fsm_state_set(&ref->fsm, WD_S_GOOD, ref);
+}
+
+
+static void wd_tickle_fn (void* arg)
+{
+       ENTER();
+
+       if (watchdog_ok) {
+               if (dog > 0)
+                       ioctl(dog, WDIOC_KEEPALIVE, &watchdog_ok);
+       }
+       else {
+               log_printf (LOGSYS_LEVEL_ALERT, "NOT tickling the watchdog!");
+       }
+
+       api->timer_add_duration((unsigned long long)tickle_timeout*1000000000, 
NULL,
+                               wd_tickle_fn, &wd_timer);
+}
+
+static void wd_resource_object_created(hdb_handle_t parent_object_handle,
+       hdb_handle_t object_handle,
+       const void *name_pt, size_t name_len,
+       void *priv_data_pt)
+{
+       wd_resource_create (object_handle);
+}
+
+static void wd_scan_resources (void)
+{
+       hdb_handle_t obj_finder;
+       hdb_handle_t obj_finder2;
+       hdb_handle_t resource_type;
+       hdb_handle_t resource;
+       int res;
+
+       ENTER();
+
+       api->object_find_create (
+               OBJECT_PARENT_HANDLE,
+               "resources", strlen ("resources"),
+               &obj_finder);
+
+       res = api->object_find_next (obj_finder, &resources_obj);
+       api->object_find_destroy (obj_finder);
+       if (res != 0) {
+               log_printf (LOGSYS_LEVEL_INFO, "no resources.");
+               return;
+       }
+
+       /* this will be the system or process level
+        */
+       api->object_find_create (
+               resources_obj,
+               NULL, 0,
+               &obj_finder);
+       while (api->object_find_next (obj_finder,
+                       &resource_type) == 0) {
+
+               api->object_find_create (
+                       resource_type,
+                       NULL, 0,
+                       &obj_finder2);
+
+               while (api->object_find_next (obj_finder2,
+                               &resource) == 0) {
+
+                       wd_resource_create (resource);
+               }
+               api->object_find_destroy (obj_finder2);
+
+               api->object_track_start (resource_type, OBJECT_TRACK_DEPTH_ONE,
+                       NULL, wd_resource_object_created, NULL,
+                       NULL, NULL);
+       }
+       api->object_find_destroy (obj_finder);
+}
+
+
+static void watchdog_timeout_apply (uint32_t new)
+{
+       struct watchdog_info ident;
+
+       if (new < 2) {
+               watchdog_timeout = 2;
+       }
+       else if (new > 120) {
+               watchdog_timeout = 120;
+       }
+       else {
+               watchdog_timeout = new;
+       }
+
+       if (dog > 0) {
+               ioctl(dog, WDIOC_GETSUPPORT, &ident);
+               if (ident.options & WDIOF_SETTIMEOUT) {
+                       /* yay! the dog is trained.
+                        */
+                       ioctl(dog, WDIOC_SETTIMEOUT, &watchdog_timeout);
+               }
+               ioctl(dog, WDIOC_GETTIMEOUT, &watchdog_timeout);
+       }
+       tickle_timeout = watchdog_timeout / 2;
+
+       log_printf (LOGSYS_LEVEL_DEBUG, "The Watchdog timeout is %d seconds\n", 
watchdog_timeout);
+       log_printf (LOGSYS_LEVEL_DEBUG, "The tickle timeout is %d seconds\n", 
tickle_timeout);
+}
+
+static int setup_watchdog(void)
+{
+       struct watchdog_info ident;
+
+       ENTER();
+       if (access ("/dev/watchdog", W_OK) != 0) {
+               log_printf (LOGSYS_LEVEL_WARNING, "No Watchdog, try modprobe <a 
watchdog>");
+               dog = -1;
+               return -1;
+       }
+
+       /* here goes, lets hope they have "Magic Close"
+        */
+       dog = open("/dev/watchdog", O_WRONLY);
+
+       if (dog == -1) {
+               log_printf (LOGSYS_LEVEL_WARNING, "Watchdog exists but couldn't 
be opened.");
+               dog = -1;
+               return -1;
+       }
+
+       /* Right we have the dog.
+        * Lets see what breed it is.
+        */
+
+       ioctl(dog, WDIOC_GETSUPPORT, &ident);
+       log_printf (LOGSYS_LEVEL_INFO, "Watchdog is now been tickled by 
corosync.");
+       log_printf (LOGSYS_LEVEL_DEBUG, "%s", ident.identity);
+
+       watchdog_timeout_apply (watchdog_timeout);
+
+       ioctl(dog, WDIOC_SETOPTIONS, WDIOS_ENABLECARD);
+
+       return 0;
+}
+
+static void wd_top_level_key_changed(object_change_type_t change_type,
+       hdb_handle_t parent_object_handle,
+       hdb_handle_t object_handle,
+       const void *object_name_pt, size_t object_name_len,
+       const void *key_name_pt, size_t key_len,
+       const void *key_value_pt, size_t key_value_len,
+       void *priv_data_pt)
+{
+       uint32_t tmp_value;
+
+       ENTER();
+       if (change_type != OBJECT_KEY_DELETED &&
+               strncmp ((char*)key_name_pt, "watchdog_timeout", key_value_len) 
== 0) {
+               tmp_value = strtol (key_value_pt, NULL, 0);
+               watchdog_timeout_apply (tmp_value);
+       }
+       else {
+               watchdog_timeout_apply (WD_DEFAULT_TIMEOUT);
+       }
+       log_printf (LOGSYS_LEVEL_INFO, "new(%d) tickle_timeout: %d", 
change_type, tickle_timeout);
+}
+
+
+static void watchdog_timeout_get_initial (void)
+{
+       int32_t res;
+       char watchdog_timeout_str[32];
+       size_t watchdog_timeout_len;
+       objdb_value_types_t watchdog_timeout_type;
+       uint32_t tmp_value;
+
+       ENTER();
+
+       res = api->object_key_get_typed (resources_obj,
+                       "watchdog_timeout",
+                       (void**)&watchdog_timeout_str, &watchdog_timeout_len,
+                       &watchdog_timeout_type);
+       if (res != 0) {
+               watchdog_timeout_apply (WD_DEFAULT_TIMEOUT);
+
+               watchdog_timeout_len = snprintf (watchdog_timeout_str, 32, 
"%d", watchdog_timeout);
+               api->object_key_create_typed (resources_obj,
+                       "watchdog_timeout", &watchdog_timeout_str,
+                       watchdog_timeout_len,
+                       OBJDB_VALUETYPE_STRING);
+       }
+       else {
+               tmp_value = strtol (watchdog_timeout_str, NULL, 0);
+               watchdog_timeout_apply (tmp_value);
+       }
+
+       api->object_track_start (resources_obj, OBJECT_TRACK_DEPTH_ONE,
+               wd_top_level_key_changed, NULL, NULL,
+               NULL, NULL);
+
+}
+
+static int wd_exec_init_fn (
+       struct corosync_api_v1 *corosync_api)
+{
+       hdb_handle_t obj;
+
+       ENTER();
+#ifdef COROSYNC_SOLARIS
+       logsys_subsys_init();
+#endif
+       api = corosync_api;
+
+       object_find_or_create (OBJECT_PARENT_HANDLE,
+               &resources_obj,
+               "resources", strlen ("resources"));
+       object_find_or_create (resources_obj,
+               &obj,
+               "system", strlen ("system"));
+       object_find_or_create (resources_obj,
+               &obj,
+               "process", strlen ("process"));
+
+       watchdog_timeout_get_initial();
+
+       setup_watchdog();
+
+       wd_scan_resources();
+
+       api->timer_add_duration((unsigned long long)tickle_timeout*1000000000, 
NULL,
+                               wd_tickle_fn, &wd_timer);
+
+       return 0;
+}
+
+static int wd_exec_exit_fn (void)
+{
+       char magic = 'V';
+       ENTER();
+
+       if (dog > 0) {
+               log_printf (LOGSYS_LEVEL_INFO, "magically closing the 
watchdog.");
+               write (dog, &magic, 1);
+       }
+       return 0;
+}
+
+
-- 
1.7.2.2

_______________________________________________
Openais mailing list
Openais@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/openais

Reply via email to