Signed-off-by: Angus Salkeld <asalk...@redhat.com> --- configure.ac | 36 ++ corosync.spec.in | 14 + exec/Makefile.am | 2 +- exec/service.c | 12 + include/corosync/corodefs.h | 4 +- services/Makefile.am | 6 + services/mon.c | 635 ++++++++++++++++++++++++++++++++++++ services/wd.c | 755 +++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 1462 insertions(+), 2 deletions(-) create mode 100644 services/mon.c create mode 100644 services/wd.c
diff --git a/configure.ac b/configure.ac index b57fdd2..ad4b6c1 100644 --- a/configure.ac +++ b/configure.ac @@ -252,6 +252,16 @@ AC_ARG_ENABLE([rdma], [ enable_rdma="no" ]) AM_CONDITIONAL(BUILD_RDMA, test x$enable_rdma = xyes) +AC_ARG_ENABLE([monitoring], + [ --enable-monitoring : resource monitoring ],, + [ default="no" ]) +AM_CONDITIONAL(BUILD_MONITORING, test x$enable_monitoring = xyes) + +AC_ARG_ENABLE([watchdog], + [ --enable-watchdog : Watchdog support ],, + [ edefault="no" ]) +AM_CONDITIONAL(BUILD_WATCHDOG, test x$enable_watchdog = xyes) + AC_ARG_ENABLE([augeas], [ --enable-augeas : Install the augeas lens for corosync.conf ],, [ enable_augeas="no" ]) @@ -277,6 +287,7 @@ AC_ARG_WITH([socket-dir], # THIS SECTION MUST DIE! CP=cp OS_LDL="-ldl" +have_linux="no" case "$host_os" in *linux*) AC_DEFINE_UNQUOTED([COROSYNC_LINUX], [1], @@ -286,6 +297,7 @@ case "$host_os" in OS_LDFLAGS="" OS_DYFLAGS="-rdynamic" DARWIN_OPTS="" + have_linux="yes" ;; darwin*) AC_DEFINE_UNQUOTED([COROSYNC_DARWIN], [1], @@ -387,6 +399,30 @@ if test "x${enable_rdma}" = xyes; then PACKAGE_FEATURES="$PACKAGE_FEATURES rdma" fi +if test "x${enable_monitoring}" = xyes; then + + AC_CHECK_LIB([statgrab], [sg_get_mem_stats], have_libstatgrab="yes", have_libstatgrab="no") + + if test "x${have_libstatgrab}" = xyes; then + AC_DEFINE_UNQUOTED([HAVE_LIBSTATGRAB], 1, [have libstatgrab]) + statgrab_LIBS="-lstatgrab" + else + if test "x${have_linux}" = xno; then + AC_MSG_ERROR(monitoring requires libstatgrab on non-linux systems) + fi + fi + AC_SUBST([statgrab_LIBS]) + AC_DEFINE_UNQUOTED([HAVE_MONITORING], 1, [have resource monitoring]) + PACKAGE_FEATURES="$PACKAGE_FEATURES monitoring" +fi + +if test "x${enable_watchdog}" = xyes; then + AC_CHECK_HEADER(linux/watchdog.h,,AC_MSG_ERROR(watchdog requires linux/watchdog.h)) + AC_CHECK_HEADER(linux/reboot.h,,AC_MSG_ERROR(watchdog requires linux/reboot.h)) + AC_DEFINE_UNQUOTED([HAVE_WATCHDOG], 1, [have watchdog]) + PACKAGE_FEATURES="$PACKAGE_FEATURES watchdog" +fi + if test "x${enable_augeas}" = xyes; then PACKAGE_FEATURES="$PACKAGE_FEATURES augeas" fi diff --git a/corosync.spec.in b/corosync.spec.in index dafdb3c..ed531c3 100644 --- a/corosync.spec.in +++ b/corosync.spec.in @@ -5,6 +5,8 @@ # Invoke "rpmbuild --without <feature>" or "rpmbuild --with <feature>" # to disable or enable specific features %bcond_with testagents +%bcond_with watchdog +%bcond_with monitoring Name: corosync Summary: The Corosync Cluster Engine and Application Programming Interfaces @@ -52,6 +54,12 @@ export rdmacm_LIBS=-lrdmacm \ %if %{with testagents} --enable-testagents \ %endif +%if %{with watchdog} + --enable-watchdog \ +%endif +%if %{with monitoring} + --enable-monitoring \ +%endif --enable-rdma \ --with-initddir=%{_initrddir} @@ -115,6 +123,12 @@ fi %{_libexecdir}/lcrso/quorum_testquorum.lcrso %{_libexecdir}/lcrso/vsf_quorum.lcrso %{_libexecdir}/lcrso/vsf_ykd.lcrso +%if %{with watchdog} +%{_libexecdir}/lcrso/service_wd.lcrso +%endif +%if %{with monitoring} +%{_libexecdir}/lcrso/service_mon.lcrso +%endif %dir %{_localstatedir}/lib/corosync %dir %{_localstatedir}/log/cluster %{_mandir}/man8/corosync_overview.8* diff --git a/exec/Makefile.am b/exec/Makefile.am index a3a49bf..938237c 100644 --- a/exec/Makefile.am +++ b/exec/Makefile.am @@ -59,7 +59,7 @@ libcoroipcs_a_SOURCES = $(COROIPCS_SRC) corosync_SOURCES = main.c util.c sync.c apidef.c service.c \ timer.c totemconfig.c mainconfig.c quorum.c schedwrk.c \ ../lcr/lcr_ifact.c evil.c syncv2.c -corosync_LDADD = -ltotem_pg -llogsys -lcoroipcs +corosync_LDADD = -ltotem_pg -llogsys -lcoroipcs $(statgrab_LIBS) corosync_DEPENDENCIES = libtotem_pg.so.$(SONAME) liblogsys.so.$(SONAME) libcoroipcs.so.$(SONAME) corosync_LDFLAGS = $(OS_DYFLAGS) -L./ diff --git a/exec/service.c b/exec/service.c index be55459..dc30406 100644 --- a/exec/service.c +++ b/exec/service.c @@ -85,6 +85,18 @@ static struct default_service default_services[] = { .name = "corosync_pload", .ver = 0, }, +#ifdef HAVE_MONITORING + { + .name = "corosync_mon", + .ver = 0, + }, +#endif +#ifdef HAVE_WATCHDOG + { + .name = "corosync_wd", + .ver = 0, + }, +#endif { .name = "corosync_quorum", .ver = 0, diff --git a/include/corosync/corodefs.h b/include/corosync/corodefs.h index 57923e2..a1e6539 100644 --- a/include/corosync/corodefs.h +++ b/include/corosync/corodefs.h @@ -59,7 +59,9 @@ enum corosync_service_types { NTF_SERVICE = 16, AMF_V2_SERVICE = 17, TST_SV1_SERVICE = 18, - TST_SV2_SERVICE = 19 + TST_SV2_SERVICE = 19, + MON_SERVICE = 20, + WD_SERVICE = 21 }; #ifdef HAVE_SMALL_MEMORY_FOOTPRINT diff --git a/services/Makefile.am b/services/Makefile.am index cb64016..f39adc3 100644 --- a/services/Makefile.am +++ b/services/Makefile.am @@ -38,6 +38,12 @@ INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ -I$(top_srcdir)/include/corosync SERVICE_LCRSO = evs cfg cpg confdb pload +if BUILD_WATCHDOG +SERVICE_LCRSO += wd +endif +if BUILD_MONITORING +SERVICE_LCRSO += mon +endif QUORUM_LCRSO = votequorum testquorum diff --git a/services/mon.c b/services/mon.c new file mode 100644 index 0000000..3e475a1 --- /dev/null +++ b/services/mon.c @@ -0,0 +1,635 @@ +/* + * Copyright (c) 2010 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Angus Salkeld <asalk...@redhat.com> + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <config.h> + +#include <unistd.h> +#if defined(HAVE_LIBSTATGRAB) +#include <statgrab.h> +#endif + +#include <corosync/corotypes.h> +#include <corosync/corodefs.h> +#include <corosync/lcr/lcr_comp.h> +#include <corosync/engine/coroapi.h> +#include <corosync/list.h> +#include <corosync/totem/coropoll.h> +#include <corosync/engine/logsys.h> +#include "../exec/fsm.h" + + +LOGSYS_DECLARE_SUBSYS ("MON"); + +#undef ENTER +#define ENTER() log_printf (LOGSYS_LEVEL_INFO, "%s", __func__) + +/* + * Service Interfaces required by service_message_handler struct + */ +static int mon_exec_init_fn ( + struct corosync_api_v1 *corosync_api); + +hdb_handle_t mon_poll = 0; +static struct corosync_api_v1 *api; +static hdb_handle_t resources_obj; +static pthread_t mon_poll_thread; +#define MON_DEFAULT_PERIOD 3 + +struct corosync_service_engine mon_service_engine = { + .name = "corosync resource monitoring service", + .id = MON_SERVICE, + .priority = 1, + .private_data_size = 0, + .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED, + .lib_init_fn = NULL, + .lib_exit_fn = NULL, + .lib_engine = NULL, + .lib_engine_count = 0, + .exec_engine = NULL, + .exec_engine_count = 0, + .confchg_fn = NULL, + .exec_init_fn = mon_exec_init_fn, + .exec_dump_fn = NULL, + .sync_mode = CS_SYNC_V2 +}; + +static DECLARE_LIST_INIT (confchg_notify); + + +struct resource_instance { + hdb_handle_t handle; + const char *name; + poll_timer_handle timer_handle; + void (*update_stats_fn) (void *data); + struct cs_fsm fsm; + int32_t period; + objdb_value_types_t max_type; + union { + int32_t int32; + double dbl; + } max; +}; + +static void mem_update_stats_fn (void *data); +static void load_update_stats_fn (void *data); + +static struct resource_instance memory_used_inst = { + .name = "memory_used", + .update_stats_fn = mem_update_stats_fn, + .max_type = OBJDB_VALUETYPE_INT32, + .max.int32 = INT32_MAX, + .period = MON_DEFAULT_PERIOD, +}; + +static struct resource_instance load_15min_inst = { + .name = "load_15min", + .update_stats_fn = load_update_stats_fn, + .max_type = OBJDB_VALUETYPE_DOUBLE, + .max.dbl = INT32_MAX, + .period = MON_DEFAULT_PERIOD, +}; + + +/* + * F S M + */ +static void mon_config_changed (struct cs_fsm* fsm, int32_t event, void * data); +static void mon_resource_failed (struct cs_fsm* fsm, int32_t event, void * data); + +const char * mon_ok_str = "ok"; +const char * mon_failed_str = "failed"; +const char * mon_failure_str = "failure"; +const char * mon_disabled_str = "disabled"; +const char * mon_config_changed_str = "config_changed"; + +enum mon_resource_state { + MON_S_DISABLED, + MON_S_OK, + MON_S_FAILED +}; +enum mon_resource_event { + MON_E_CONFIG_CHANGED, + MON_E_FAILURE +}; + +struct cs_fsm_entry mon_fsm_table[] = { + { MON_S_DISABLED, MON_E_CONFIG_CHANGED, mon_config_changed, {MON_S_DISABLED, MON_S_OK, -1} }, + { MON_S_DISABLED, MON_E_FAILURE, NULL, {-1} }, + { MON_S_OK, MON_E_CONFIG_CHANGED, mon_config_changed, {MON_S_OK, MON_S_DISABLED, -1} }, + { MON_S_OK, MON_E_FAILURE, mon_resource_failed, {MON_S_FAILED, -1} }, + { MON_S_FAILED, MON_E_CONFIG_CHANGED, mon_config_changed, {MON_S_OK, MON_S_DISABLED, -1} }, + { MON_S_FAILED, MON_E_FAILURE, NULL, {-1} }, +}; + +/* + * Dynamic loading descriptor + */ + +static struct corosync_service_engine *mon_get_service_engine_ver0 (void); + +static struct corosync_service_engine_iface_ver0 mon_service_engine_iface = { + .corosync_get_service_engine_ver0 = mon_get_service_engine_ver0 +}; + +static struct lcr_iface corosync_mon_ver0[1] = { + { + .name = "corosync_mon", + .version = 0, + .versions_replace = 0, + .versions_replace_count = 0, + .dependencies = 0, + .dependency_count = 0, + .constructor = NULL, + .destructor = NULL, + .interfaces = NULL, + } +}; + +static struct lcr_comp mon_comp_ver0 = { + .iface_count = 1, + .ifaces = corosync_mon_ver0 +}; + +static struct corosync_service_engine *mon_get_service_engine_ver0 (void) +{ + return (&mon_service_engine); +} + +#ifdef COROSYNC_SOLARIS +void corosync_lcr_component_register (void); + +void corosync_lcr_component_register (void) { +#else +__attribute__ ((constructor)) static void corosync_lcr_component_register (void) { +#endif + lcr_interfaces_set (&corosync_mon_ver0[0], &mon_service_engine_iface); + + lcr_component_register (&mon_comp_ver0); +} + +static const char * mon_res_state_to_str(struct cs_fsm* fsm, + int32_t state) +{ + switch (state) { + case MON_S_DISABLED: + return mon_disabled_str; + break; + case MON_S_OK: + return mon_ok_str; + break; + case MON_S_FAILED: + return mon_failed_str; + break; + } + return NULL; +} + +static const char * mon_res_event_to_str(struct cs_fsm* fsm, + int32_t event) +{ + switch (event) { + case MON_E_CONFIG_CHANGED: + return mon_config_changed_str; + break; + case MON_E_FAILURE: + return mon_failure_str; + break; + } + return NULL; +} + +static void mon_fsm_state_set (struct cs_fsm* fsm, + enum mon_resource_state next_state, struct resource_instance* inst) +{ + enum mon_resource_state prev_state = fsm->curr_state; + const char *state_str; + + ENTER(); + + cs_fsm_state_set(fsm, next_state, inst); + + if (prev_state == fsm->curr_state) { + return; + } + state_str = mon_res_state_to_str(fsm, fsm->curr_state); + + api->object_key_replace (inst->handle, + "state", strlen ("state"), + state_str, strlen (state_str)); +} + + +static void mon_config_changed (struct cs_fsm* fsm, int32_t event, void * data) +{ + struct resource_instance * inst = (struct resource_instance *)data; + char *str; + size_t str_len; + objdb_value_types_t type; + int32_t tmp_value; + int32_t res; + + ENTER(); + + res = api->object_key_get_typed (inst->handle, + "poll_period", + (void**)&str, &str_len, + &type); + if (res == 0) { + tmp_value = strtol (str, NULL, 0); + if (tmp_value > 0 && tmp_value < 120) { + if (inst->period != tmp_value) { + inst->period = tmp_value; + } + } + } + + res = api->object_key_get_typed (inst->handle, "max", + (void**)&str, &str_len, &type); + if (res != 0) { + if (inst->max_type == OBJDB_VALUETYPE_INT32) { + inst->max.int32 = INT32_MAX; + } else + if (inst->max_type == OBJDB_VALUETYPE_DOUBLE) { + inst->max.dbl = INT32_MAX; + } + mon_fsm_state_set (fsm, MON_S_DISABLED, inst); + } else { + if (inst->max_type == OBJDB_VALUETYPE_INT32) { + inst->max.int32 = strtol (str, NULL, 0); + } else + if (inst->max_type == OBJDB_VALUETYPE_DOUBLE) { + inst->max.dbl = strtod (str, NULL); + } + mon_fsm_state_set (fsm, MON_S_OK, inst); + } + + if (mon_poll == 0) { + return; + } + poll_timer_delete (mon_poll, inst->timer_handle); + /* + * run the updater, incase the period has shortened + */ + inst->update_stats_fn (inst); + poll_timer_add (mon_poll, + inst->period * 1000, NULL, + inst->update_stats_fn, + &inst->timer_handle); +} + +void mon_resource_failed (struct cs_fsm* fsm, int32_t event, void * data) +{ + struct resource_instance * inst = (struct resource_instance *)data; + ENTER(); + mon_fsm_state_set (fsm, MON_S_FAILED, inst); +} + +static int32_t percent_mem_used_get(void) +{ +#if defined(HAVE_LIBSTATGRAB) + sg_mem_stats *mem_stats; + sg_swap_stats *swap_stats; + long long total, freemem; + + mem_stats = sg_get_mem_stats(); + swap_stats = sg_get_swap_stats(); + + if (mem_stats == NULL || swap_stats != NULL) { + log_printf (LOGSYS_LEVEL_ERROR, "Unable to get memory stats: %s\n", + sg_str_error(sg_get_error())); + return -1; + } + total = mem_stats->total + swap_stats->total; + freemem = mem_stats->free + swap_stats->free; + return ((total - freemem) * 100) / total; +#else +#if defined(COROSYNC_LINUX) + char *line_ptr; + char line[512]; + unsigned long long value; + FILE *f; + long long total = 0; + long long freemem = 0; + + if ((f = fopen("/proc/meminfo", "r")) == NULL) { + return -1; + } + + while ((line_ptr = fgets(line, sizeof(line), f)) != NULL) { + if (sscanf(line_ptr, "%*s %llu kB", &value) != 1) { + continue; + } + value *= 1024; + + if (strncmp(line_ptr, "MemTotal:", 9) == 0) { + total += value; + } else if (strncmp(line_ptr, "MemFree:", 8) == 0) { + freemem += value; + } else if (strncmp(line_ptr, "SwapTotal:", 10) == 0) { + total += value; + } else if (strncmp(line_ptr, "SwapFree:", 9) == 0) { + freemem += value; + } + } + + fclose(f); + return ((total - freemem) * 100) / total; +#else +#error need libstatgrab or linux. +#endif /* COROSYNC_LINUX */ +#endif /* HAVE_LIBSTATGRAB */ +} + + +static void mem_update_stats_fn (void *data) +{ + struct resource_instance * inst = (struct resource_instance *)data; + int32_t new_value; + uint64_t timestamp; + + new_value = percent_mem_used_get(); + if (new_value > 0) { + api->object_key_replace (inst->handle, + "current", strlen("current"), + &new_value, sizeof(new_value)); + + timestamp = time (NULL); + + api->object_key_replace (inst->handle, + "last_updated", strlen("last_updated"), + ×tamp, sizeof(time_t)); + + if (new_value > inst->max.int32) { + cs_fsm_process (&inst->fsm, MON_E_FAILURE, inst); + } + } + poll_timer_add (mon_poll, + inst->period * 1000, inst, + inst->update_stats_fn, + &inst->timer_handle); +} + +static double min15_loadavg_get(void) +{ +#if defined(HAVE_LIBSTATGRAB) + sg_load_stats *load_stats; + load_stats = sg_get_load_stats (); + if (load_stats == NULL) { + log_printf (LOGSYS_LEVEL_ERROR, "Unable to get load stats: %s\n", + sg_str_error (sg_get_error())); + return -1; + } + return load_stats->min15; +#else +#if defined(COROSYNC_LINUX) + double loadav[3]; + if (getloadavg(loadav,3) < 0) { + return -1; + } + return loadav[2]; +#else +#error need libstatgrab or linux. +#endif /* COROSYNC_LINUX */ +#endif /* HAVE_LIBSTATGRAB */ +} + +static void load_update_stats_fn (void *data) +{ + struct resource_instance * inst = (struct resource_instance *)data; + uint64_t timestamp; + int32_t res = 0; + double min15 = min15_loadavg_get(); + + if (min15 < 0) { + } + res = api->object_key_replace (inst->handle, + "current", strlen("current"), + &min15, sizeof (min15)); + if (res != 0) + log_printf (LOGSYS_LEVEL_ERROR, "replace current failed: %d", res); + + timestamp = cs_timestamp_get(); + + res = api->object_key_replace (inst->handle, + "last_updated", strlen("last_updated"), + ×tamp, sizeof(uint64_t)); + if (res != 0) + log_printf (LOGSYS_LEVEL_ERROR, "replace last_updated failed: %d", res); + + if (min15 > inst->max.dbl) { + cs_fsm_process (&inst->fsm, MON_E_FAILURE, &inst); + } + + poll_timer_add (mon_poll, + inst->period * 1000, inst, + inst->update_stats_fn, + &inst->timer_handle); +} + +static void *mon_thread_handler (void * unused) +{ +#ifdef HAVE_LIBSTATGRAB + sg_init(); +#endif /* HAVE_LIBSTATGRAB */ + mon_poll = poll_create (); + + poll_timer_add (mon_poll, + memory_used_inst.period * 1000, + &memory_used_inst, + memory_used_inst.update_stats_fn, + &memory_used_inst.timer_handle); + + poll_timer_add (mon_poll, + load_15min_inst.period * 1000, + &load_15min_inst, + load_15min_inst.update_stats_fn, + &load_15min_inst.timer_handle); + poll_run (mon_poll); + + return NULL; +} + +static int object_find_or_create ( + hdb_handle_t parent_object_handle, + hdb_handle_t *object_handle, + const void *object_name, + size_t object_name_len) +{ + hdb_handle_t obj_finder; + hdb_handle_t obj; + int ret = -1; + + api->object_find_create ( + parent_object_handle, + object_name, + object_name_len, + &obj_finder); + + if (api->object_find_next (obj_finder, &obj) == 0) { + /* found it */ + *object_handle = obj; + ret = 0; + } + else { + ret = api->object_create (parent_object_handle, + object_handle, + object_name, object_name_len); + } + + api->object_find_destroy (obj_finder); + return ret; +} + +static void mon_key_change_notify (object_change_type_t change_type, + hdb_handle_t parent_object_handle, + hdb_handle_t object_handle, + const void *object_name_pt, size_t object_name_len, + const void *key_name_pt, size_t key_len, + const void *key_value_pt, size_t key_value_len, + void *priv_data_pt) +{ + struct resource_instance* inst = (struct resource_instance*)priv_data_pt; + + if ((strcmp ((char*)key_name_pt, "max") == 0) || + (strcmp ((char*)key_name_pt, "poll_period") == 0)) { + ENTER(); + cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst); + } +} + +static void mon_instance_init (hdb_handle_t parent, struct resource_instance* inst) +{ + int32_t res; + char mon_period_str[32]; + size_t mon_period_len; + objdb_value_types_t mon_period_type; + int32_t tmp_value; + int32_t zero_32 = 0; + time_t zero_64 = 0; + double zero_double = 0; + + ENTER(); + + object_find_or_create (parent, + &inst->handle, + inst->name, strlen (inst->name)); + + if (inst->max_type == OBJDB_VALUETYPE_INT32) { + api->object_key_create_typed (inst->handle, + "current", &zero_32, + sizeof (zero_32), inst->max_type); + } else { + api->object_key_create_typed (inst->handle, + "current", &zero_double, + sizeof (zero_double), inst->max_type); + } + + api->object_key_create_typed (inst->handle, + "last_updated", &zero_64, + sizeof (time_t), OBJDB_VALUETYPE_INT64); + + api->object_key_create_typed (inst->handle, + "state", mon_disabled_str, strlen (mon_disabled_str), + OBJDB_VALUETYPE_STRING); + + inst->fsm.name = inst->name; + inst->fsm.curr_entry = 0; + inst->fsm.curr_state = MON_S_DISABLED; + inst->fsm.table = mon_fsm_table; + inst->fsm.entries = sizeof(mon_fsm_table) / sizeof(struct cs_fsm_entry); + inst->fsm.state_to_str = mon_res_state_to_str; + inst->fsm.event_to_str = mon_res_event_to_str; + + res = api->object_key_get_typed (inst->handle, + "poll_period", + (void**)&mon_period_str, &mon_period_len, + &mon_period_type); + if (res != 0) { + mon_period_len = snprintf (mon_period_str, 32, "%d", + inst->period); + api->object_key_create_typed (inst->handle, + "poll_period", &mon_period_str, + mon_period_len, + OBJDB_VALUETYPE_STRING); + } + else { + tmp_value = strtol (mon_period_str, NULL, 0); + if (tmp_value > 0 && tmp_value < 120) + inst->period = tmp_value; + } + cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst); + + poll_timer_add (mon_poll, + inst->period * 1000, inst, + inst->update_stats_fn, + &inst->timer_handle); + + api->object_track_start (inst->handle, OBJECT_TRACK_DEPTH_ONE, + mon_key_change_notify, + NULL, NULL, NULL, NULL); + +} + +static int mon_exec_init_fn ( + struct corosync_api_v1 *corosync_api) +{ + hdb_handle_t obj; + hdb_handle_t parent; + +#ifdef COROSYNC_SOLARIS + logsys_subsys_init(); +#endif + api = corosync_api; + ENTER(); + + object_find_or_create (OBJECT_PARENT_HANDLE, + &resources_obj, + "resources", strlen ("resources")); + + object_find_or_create (resources_obj, + &obj, + "system", strlen ("system")); + + parent = obj; + + mon_instance_init (parent, &memory_used_inst); + mon_instance_init (parent, &load_15min_inst); + + + pthread_create (&mon_poll_thread, NULL, mon_thread_handler, NULL); + + return 0; +} + + diff --git a/services/wd.c b/services/wd.c new file mode 100644 index 0000000..9c9ad97 --- /dev/null +++ b/services/wd.c @@ -0,0 +1,755 @@ +/* + * Copyright (c) 2010 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Angus Salkeld <asalk...@redhat.com> + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <config.h> + +#include <unistd.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <linux/types.h> +#include <linux/watchdog.h> +#include <linux/reboot.h> + +#include <corosync/corotypes.h> +#include <corosync/corodefs.h> +#include <corosync/lcr/lcr_comp.h> +#include <corosync/engine/coroapi.h> +#include <corosync/list.h> +#include <corosync/engine/logsys.h> +#include "../exec/fsm.h" + + +typedef enum { + WD_RESOURCE_GOOD, + WD_RESOURCE_FAILED, + WD_RESOURCE_STATE_UNKNOWN, + WD_RESOURCE_NOT_MONITORED +} wd_resource_state_t; + +struct resource { + hdb_handle_t handle; + char *recovery; + char name[128]; + time_t last_updated; + struct cs_fsm fsm; + + corosync_timer_handle_t check_timer; + uint32_t check_timeout; +}; + +LOGSYS_DECLARE_SUBSYS("WD"); + +/* + * Service Interfaces required by service_message_handler struct + */ +static int wd_exec_init_fn ( + struct corosync_api_v1 *corosync_api); +static int wd_exec_exit_fn (void); +static void wd_resource_check_fn (void* resource_ref); + +static struct corosync_api_v1 *api; +#define WD_DEFAULT_TIMEOUT 6 +static uint32_t watchdog_timeout = WD_DEFAULT_TIMEOUT; +static uint32_t tickle_timeout = (WD_DEFAULT_TIMEOUT / 2); +static int dog = -1; +static corosync_timer_handle_t wd_timer; +static hdb_handle_t resources_obj; +static int watchdog_ok = 1; + +struct corosync_service_engine wd_service_engine = { + .name = "corosync self-fencing service", + .id = WD_SERVICE, + .priority = 1, + .private_data_size = 0, + .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED, + .lib_init_fn = NULL, + .lib_exit_fn = NULL, + .lib_engine = NULL, + .lib_engine_count = 0, + .exec_engine = NULL, + .exec_engine_count = 0, + .confchg_fn = NULL, + .exec_init_fn = wd_exec_init_fn, + .exec_exit_fn = wd_exec_exit_fn, + .exec_dump_fn = NULL, + .sync_mode = CS_SYNC_V2 +}; + +static DECLARE_LIST_INIT (confchg_notify); + +/* + * F S M + */ +static void wd_config_changed (struct cs_fsm* fsm, int32_t event, void * data); +static void wd_resource_failed (struct cs_fsm* fsm, int32_t event, void * data); + +enum wd_resource_state { + WD_S_GOOD, + WD_S_FAILED, + WD_S_DISABLED +}; + +enum wd_resource_event { + WD_E_FAILURE, + WD_E_CONFIG_CHANGED +}; + +const char * wd_ok_str = "ok"; +const char * wd_failed_str = "failed"; +const char * wd_failure_str = "failure"; +const char * wd_disabled_str = "disabled"; +const char * wd_config_changed_str = "config_changed"; + +struct cs_fsm_entry wd_fsm_table[] = { + { WD_S_DISABLED, WD_E_CONFIG_CHANGED, wd_config_changed, {WD_S_DISABLED, WD_S_GOOD, -1} }, + { WD_S_DISABLED, WD_E_FAILURE, NULL, {-1} }, + { WD_S_GOOD, WD_E_CONFIG_CHANGED, wd_config_changed, {WD_S_GOOD, WD_S_DISABLED, -1} }, + { WD_S_GOOD, WD_E_FAILURE, wd_resource_failed, {WD_S_FAILED, -1} }, + { WD_S_FAILED, WD_E_CONFIG_CHANGED, wd_config_changed, {WD_S_GOOD, WD_S_DISABLED, -1} }, + { WD_S_FAILED, WD_E_FAILURE, NULL, {-1} }, +}; + +/* + * Dynamic loading descriptor + */ + +static struct corosync_service_engine *wd_get_service_engine_ver0 (void); + +static struct corosync_service_engine_iface_ver0 wd_service_engine_iface = { + .corosync_get_service_engine_ver0 = wd_get_service_engine_ver0 +}; + +static struct lcr_iface corosync_wd_ver0[1] = { + { + .name = "corosync_wd", + .version = 0, + .versions_replace = 0, + .versions_replace_count = 0, + .dependencies = 0, + .dependency_count = 0, + .constructor = NULL, + .destructor = NULL, + .interfaces = NULL, + } +}; + +static struct lcr_comp wd_comp_ver0 = { + .iface_count = 1, + .ifaces = corosync_wd_ver0 +}; + +static struct corosync_service_engine *wd_get_service_engine_ver0 (void) +{ + return (&wd_service_engine); +} + +#ifdef COROSYNC_SOLARIS +void corosync_lcr_component_register (void); + +void corosync_lcr_component_register (void) { +#else +__attribute__ ((constructor)) static void corosync_lcr_component_register (void) { +#endif + lcr_interfaces_set (&corosync_wd_ver0[0], &wd_service_engine_iface); + + lcr_component_register (&wd_comp_ver0); +} + +static int object_find_or_create ( + hdb_handle_t parent_object_handle, + hdb_handle_t *object_handle, + const void *object_name, + size_t object_name_len) +{ + hdb_handle_t obj_finder; + hdb_handle_t obj; + int ret = -1; + + api->object_find_create ( + parent_object_handle, + object_name, + object_name_len, + &obj_finder); + + if (api->object_find_next (obj_finder, &obj) == 0) { + /* found it */ + *object_handle = obj; + ret = 0; + } + else { + ret = api->object_create (parent_object_handle, + object_handle, + object_name, object_name_len); + } + + api->object_find_destroy (obj_finder); + return ret; +} + +static const char * wd_res_state_to_str(struct cs_fsm* fsm, + int32_t state) +{ + switch (state) { + case WD_S_DISABLED: + return wd_disabled_str; + break; + case WD_S_GOOD: + return wd_ok_str; + break; + case WD_S_FAILED: + return wd_failed_str; + break; + } + return NULL; +} + +static const char * wd_res_event_to_str(struct cs_fsm* fsm, + int32_t event) +{ + switch (event) { + case WD_E_CONFIG_CHANGED: + return wd_config_changed_str; + break; + case WD_E_FAILURE: + return wd_failure_str; + break; + } + return NULL; +} + +/* + * returns (0 == OK, 1 == failed) + */ +static int32_t wd_resource_has_failed (struct resource *ref) +{ + hdb_handle_t resource = ref->handle; + int res; + char* state; + size_t state_len; + objdb_value_types_t type; + time_t *last_updated; + time_t my_time; + size_t last_updated_len; + + res = api->object_key_get_typed (resource, + "last_updated", (void*)&last_updated, &last_updated_len, &type); + if (res != 0) { + /* key does not exist. + */ + return 1; + } + res = api->object_key_get_typed (resource, + "state", (void**)&state, &state_len, &type); + if (res != 0 || strncmp (state, "disabled", strlen ("disabled")) == 0) { + /* key does not exist. + */ + return 1; + } + + my_time = time (NULL); + + if ((*last_updated + ref->check_timeout) < my_time) { + log_printf (LOGSYS_LEVEL_INFO, "delayed %ld + %d < %ld", + *last_updated, ref->check_timeout, my_time); + return 1; + } + + if ((*last_updated + ref->check_timeout) < my_time || + strcmp (state, "bad") == 0) { + return 1; + } + return 0; +} + +static void wd_config_changed (struct cs_fsm* fsm, int32_t event, void * data) +{ + int res; + size_t len; + char *state; + objdb_value_types_t type; + char mon_period_str[32]; + int32_t tmp_value; + struct resource *ref = (struct resource*)data; + + res = api->object_key_get_typed (ref->handle, + "poll_period", + (void**)&mon_period_str, &len, + &type); + if (res == 0) { + tmp_value = strtol (mon_period_str, NULL, 0); + if (tmp_value > 0 && tmp_value < 120) + ref->check_timeout = (tmp_value * 5)/4; + } + + res = api->object_key_get_typed (ref->handle, + "recovery", (void*)&ref->recovery, &len, &type); + if (res != 0) { + /* key does not exist. + */ + log_printf (LOGSYS_LEVEL_WARNING, + "resource %s missing a recovery key.", ref->name); + cs_fsm_state_set(&ref->fsm, WD_S_DISABLED, ref); + return; + } + res = api->object_key_get_typed (ref->handle, + "state", (void*)&state, &len, &type); + if (res != 0) { + /* key does not exist. + */ + log_printf (LOGSYS_LEVEL_WARNING, + "resource %s missing a state key.", ref->name); + cs_fsm_state_set(&ref->fsm, WD_S_DISABLED, ref); + return; + } + + cs_fsm_state_set(&ref->fsm, WD_S_GOOD, ref); + + if (ref->check_timer) { + api->timer_delete(ref->check_timer); + } + api->timer_add_duration((unsigned long long)ref->check_timeout*1000000000, + ref, + wd_resource_check_fn, &ref->check_timer); + +} + +static void wd_resource_failed (struct cs_fsm* fsm, int32_t event, void * data) +{ + struct resource* ref = (struct resource*)data; + + if (ref->check_timer) { + api->timer_delete(ref->check_timer); + } + + log_printf (LOGSYS_LEVEL_CRIT, "%s resource \"%s\" failed!", + ref->recovery, (char*)ref->name); + if (strcmp (ref->recovery, "watchdog") == 0 || + strcmp (ref->recovery, "quit") == 0) { + watchdog_ok = 0; + } + else if (strcmp (ref->recovery, "reboot") == 0) { + //reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, LINUX_REBOOT_CMD_RESTART, NULL); + } + else if (strcmp (ref->recovery, "shutdown") == 0) { + //reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, LINUX_REBOOT_CMD_POWER_OFF, NULL); + } + cs_fsm_state_set(fsm, WD_S_FAILED, data); +} + +static void wd_key_changed(object_change_type_t change_type, + hdb_handle_t parent_object_handle, + hdb_handle_t object_handle, + const void *object_name_pt, size_t object_name_len, + const void *key_name_pt, size_t key_len, + const void *key_value_pt, size_t key_value_len, + void *priv_data_pt) +{ + struct resource* ref = (struct resource*)priv_data_pt; + + if (strcmp(key_name_pt, "last_updated") == 0 || + strcmp(key_name_pt, "current") == 0) { + return; + } +// log_printf (LOGSYS_LEVEL_WARNING, +// "watchdog resource key changed: %s.%s=%s ref=%p.", +// (char*)object_name_pt, (char*)key_name_pt, (char*)key_value_pt, ref); + + if (ref == NULL) { + return; + } + cs_fsm_process(&ref->fsm, WD_E_CONFIG_CHANGED, ref); +} + +static void wd_object_destroyed( + hdb_handle_t parent_object_handle, + const void *name_pt, size_t name_len, + void *priv_data_pt) +{ + struct resource* ref = (struct resource*)priv_data_pt; + + log_printf (LOGSYS_LEVEL_WARNING, + "watchdog resource \"%s\" deleted from objdb!", + (char*)name_pt); + + if (ref) { + api->timer_delete(ref->check_timer); + ref->check_timer = NULL; + } +} + +static void wd_resource_check_fn (void* resource_ref) +{ + struct resource* ref = (struct resource*)resource_ref; + + log_printf (LOGSYS_LEVEL_INFO, + "checking watchdog resource \"%s\".", + ref->name); + if (wd_resource_has_failed (ref) ) { + cs_fsm_process(&ref->fsm, WD_E_FAILURE, ref); + log_printf (LOGSYS_LEVEL_CRIT, + "watchdog resource \"%s\" failed!", + (char*)ref->name); + return; + } + api->timer_add_duration((unsigned long long)ref->check_timeout*1000000000, + ref, wd_resource_check_fn, &ref->check_timer); +} + + +static void wd_resource_create (hdb_handle_t resource_obj) +{ + int res; + size_t len; + char *state; + objdb_value_types_t type; + char mon_period_str[32]; + int32_t tmp_value; + struct resource *ref = malloc (sizeof (struct resource)); + + ref->handle = resource_obj; + ref->check_timeout = WD_DEFAULT_TIMEOUT; + ref->check_timer = NULL; + api->object_name_get (resource_obj, + ref->name, + &len); + ref->name[len] = '\0'; + ref->fsm.name = ref->name; + ref->fsm.table = wd_fsm_table; + ref->fsm.entries = sizeof(wd_fsm_table) / sizeof(struct cs_fsm_entry); + ref->fsm.curr_entry = 0; + ref->fsm.curr_state = WD_S_DISABLED; + ref->fsm.state_to_str = wd_res_state_to_str; + ref->fsm.event_to_str = wd_res_event_to_str; + api->object_priv_set (resource_obj, NULL); + + res = api->object_key_get_typed (resource_obj, + "poll_period", + (void**)&mon_period_str, &len, + &type); + if (res != 0) { + log_printf (LOGSYS_LEVEL_ERROR, "%s : %d",__func__, res); + len = snprintf (mon_period_str, 32, "%d", ref->check_timeout); + api->object_key_create_typed (resource_obj, + "poll_period", &mon_period_str, + len, + OBJDB_VALUETYPE_STRING); + } + else { + tmp_value = strtol (mon_period_str, NULL, 0); + if (tmp_value > 0 && tmp_value < 120) + ref->check_timeout = (tmp_value * 5)/4; + } + + api->object_track_start (resource_obj, OBJECT_TRACK_DEPTH_ONE, + wd_key_changed, NULL, wd_object_destroyed, + NULL, ref); + + res = api->object_key_get_typed (resource_obj, + "recovery", (void*)&ref->recovery, &len, &type); + if (res != 0) { + /* key does not exist. + */ + log_printf (LOGSYS_LEVEL_WARNING, + "resource %s missing a recovery key.", ref->name); + return; + } + res = api->object_key_get_typed (resource_obj, + "state", (void*)&state, &len, &type); + if (res != 0) { + /* key does not exist. + */ + log_printf (LOGSYS_LEVEL_WARNING, + "resource %s missing a state key.", ref->name); + return; + } + + res = api->object_key_get_typed (resource_obj, + "last_updated", (void*)&ref->last_updated, &len, &type); + if (res != 0) { + /* key does not exist. + */ + ref->last_updated = 0; + } + + api->timer_add_duration((unsigned long long)ref->check_timeout*1000000000, + ref, + wd_resource_check_fn, &ref->check_timer); + + cs_fsm_state_set(&ref->fsm, WD_S_GOOD, ref); +} + + +static void wd_tickle_fn (void* arg) +{ + ENTER(); + + if (watchdog_ok) { + if (dog > 0) + ioctl(dog, WDIOC_KEEPALIVE, &watchdog_ok); + } + else { + log_printf (LOGSYS_LEVEL_ALERT, "NOT tickling the watchdog!"); + } + + api->timer_add_duration((unsigned long long)tickle_timeout*1000000000, NULL, + wd_tickle_fn, &wd_timer); +} + +static void wd_resource_object_created(hdb_handle_t parent_object_handle, + hdb_handle_t object_handle, + const void *name_pt, size_t name_len, + void *priv_data_pt) +{ + wd_resource_create (object_handle); +} + +static void wd_scan_resources (void) +{ + hdb_handle_t obj_finder; + hdb_handle_t obj_finder2; + hdb_handle_t resource_type; + hdb_handle_t resource; + int res; + + ENTER(); + + api->object_find_create ( + OBJECT_PARENT_HANDLE, + "resources", strlen ("resources"), + &obj_finder); + + res = api->object_find_next (obj_finder, &resources_obj); + api->object_find_destroy (obj_finder); + if (res != 0) { + log_printf (LOGSYS_LEVEL_INFO, "no resources."); + return; + } + + /* this will be the system or process level + */ + api->object_find_create ( + resources_obj, + NULL, 0, + &obj_finder); + while (api->object_find_next (obj_finder, + &resource_type) == 0) { + + api->object_find_create ( + resource_type, + NULL, 0, + &obj_finder2); + + while (api->object_find_next (obj_finder2, + &resource) == 0) { + + wd_resource_create (resource); + } + api->object_find_destroy (obj_finder2); + + api->object_track_start (resource_type, OBJECT_TRACK_DEPTH_ONE, + NULL, wd_resource_object_created, NULL, + NULL, NULL); + } + api->object_find_destroy (obj_finder); +} + + +static void watchdog_timeout_apply (uint32_t new) +{ + struct watchdog_info ident; + + if (new < 2) { + watchdog_timeout = 2; + } + else if (new > 120) { + watchdog_timeout = 120; + } + else { + watchdog_timeout = new; + } + + if (dog > 0) { + ioctl(dog, WDIOC_GETSUPPORT, &ident); + if (ident.options & WDIOF_SETTIMEOUT) { + /* yay! the dog is trained. + */ + ioctl(dog, WDIOC_SETTIMEOUT, &watchdog_timeout); + } + ioctl(dog, WDIOC_GETTIMEOUT, &watchdog_timeout); + } + tickle_timeout = watchdog_timeout / 2; + + log_printf (LOGSYS_LEVEL_DEBUG, "The Watchdog timeout is %d seconds\n", watchdog_timeout); + log_printf (LOGSYS_LEVEL_DEBUG, "The tickle timeout is %d seconds\n", tickle_timeout); +} + +static int setup_watchdog(void) +{ + struct watchdog_info ident; + + ENTER(); + if (access ("/dev/watchdog", W_OK) != 0) { + log_printf (LOGSYS_LEVEL_WARNING, "No Watchdog, try modprobe <a watchdog>"); + dog = -1; + return -1; + } + + /* here goes, lets hope they have "Magic Close" + */ + dog = open("/dev/watchdog", O_WRONLY); + + if (dog == -1) { + log_printf (LOGSYS_LEVEL_WARNING, "Watchdog exists but couldn't be opened."); + dog = -1; + return -1; + } + + /* Right we have the dog. + * Lets see what breed it is. + */ + + ioctl(dog, WDIOC_GETSUPPORT, &ident); + log_printf (LOGSYS_LEVEL_INFO, "Watchdog is now been tickled by corosync."); + log_printf (LOGSYS_LEVEL_DEBUG, "%s", ident.identity); + + watchdog_timeout_apply (watchdog_timeout); + + ioctl(dog, WDIOC_SETOPTIONS, WDIOS_ENABLECARD); + + return 0; +} + +static void wd_top_level_key_changed(object_change_type_t change_type, + hdb_handle_t parent_object_handle, + hdb_handle_t object_handle, + const void *object_name_pt, size_t object_name_len, + const void *key_name_pt, size_t key_len, + const void *key_value_pt, size_t key_value_len, + void *priv_data_pt) +{ + uint32_t tmp_value; + + ENTER(); + if (change_type != OBJECT_KEY_DELETED && + strncmp ((char*)key_name_pt, "watchdog_timeout", key_value_len) == 0) { + tmp_value = strtol (key_value_pt, NULL, 0); + watchdog_timeout_apply (tmp_value); + } + else { + watchdog_timeout_apply (WD_DEFAULT_TIMEOUT); + } + log_printf (LOGSYS_LEVEL_INFO, "new(%d) tickle_timeout: %d", change_type, tickle_timeout); +} + + +static void watchdog_timeout_get_initial (void) +{ + int32_t res; + char watchdog_timeout_str[32]; + size_t watchdog_timeout_len; + objdb_value_types_t watchdog_timeout_type; + uint32_t tmp_value; + + ENTER(); + + res = api->object_key_get_typed (resources_obj, + "watchdog_timeout", + (void**)&watchdog_timeout_str, &watchdog_timeout_len, + &watchdog_timeout_type); + if (res != 0) { + watchdog_timeout_apply (WD_DEFAULT_TIMEOUT); + + watchdog_timeout_len = snprintf (watchdog_timeout_str, 32, "%d", watchdog_timeout); + api->object_key_create_typed (resources_obj, + "watchdog_timeout", &watchdog_timeout_str, + watchdog_timeout_len, + OBJDB_VALUETYPE_STRING); + } + else { + tmp_value = strtol (watchdog_timeout_str, NULL, 0); + watchdog_timeout_apply (tmp_value); + } + + api->object_track_start (resources_obj, OBJECT_TRACK_DEPTH_ONE, + wd_top_level_key_changed, NULL, NULL, + NULL, NULL); + +} + +static int wd_exec_init_fn ( + struct corosync_api_v1 *corosync_api) +{ + hdb_handle_t obj; + + ENTER(); +#ifdef COROSYNC_SOLARIS + logsys_subsys_init(); +#endif + api = corosync_api; + + object_find_or_create (OBJECT_PARENT_HANDLE, + &resources_obj, + "resources", strlen ("resources")); + object_find_or_create (resources_obj, + &obj, + "system", strlen ("system")); + object_find_or_create (resources_obj, + &obj, + "process", strlen ("process")); + + watchdog_timeout_get_initial(); + + setup_watchdog(); + + wd_scan_resources(); + + api->timer_add_duration((unsigned long long)tickle_timeout*1000000000, NULL, + wd_tickle_fn, &wd_timer); + + return 0; +} + +static int wd_exec_exit_fn (void) +{ + char magic = 'V'; + ENTER(); + + if (dog > 0) { + log_printf (LOGSYS_LEVEL_INFO, "magically closing the watchdog."); + write (dog, &magic, 1); + } + return 0; +} + + -- 1.7.2.2 _______________________________________________ Openais mailing list Openais@lists.linux-foundation.org https://lists.linux-foundation.org/mailman/listinfo/openais