good for merge

On 09/23/2010 06:55 PM, Angus Salkeld wrote:
> - timestamps ->  uint64_t and in nanosecs
> - use clock_gettime
> - common object naming
> - common state names
> - timeouts in milliseconds
>
> Signed-off-by: Angus Salkeld<asalk...@redhat.com>
> ---
>   include/corosync/corotypes.h |   33 +++++-
>   lib/sam.c                    |   15 +--
>   man/sam_overview.8           |    4 +-
>   services/mon.c               |  237 ++++++++++++++++----------------
>   services/wd.c                |  314 
> ++++++++++++++++++++++++++----------------
>   test/testsam.c               |   43 +++---
>   6 files changed, 377 insertions(+), 269 deletions(-)
>
> diff --git a/include/corosync/corotypes.h b/include/corosync/corotypes.h
> index 57f8b47..dcb8325 100644
> --- a/include/corosync/corotypes.h
> +++ b/include/corosync/corotypes.h
> @@ -40,6 +40,8 @@
>   #else
>   #include<sys/types.h>
>   #endif
> +#include<time.h>
> +#include<sys/time.h>
>
>   typedef int64_t cs_time_t;
>
> @@ -47,6 +49,7 @@ typedef int64_t cs_time_t;
>   #define CS_TRUE !CS_FALSE
>   #define CS_MAX_NAME_LENGTH 256
>   #define CS_TIME_END    ((cs_time_t)0x7FFFFFFFFFFFFFFFULL)
> +#define CS_MAX(x, y) (((x)>  (y)) ? (x) : (y))
>
>   typedef struct {
>      uint16_t length;
> @@ -103,6 +106,33 @@ typedef enum {
>   } cs_error_t;
>
>
> +#define CS_TIME_MS_IN_SEC   1000ULL
> +#define CS_TIME_US_IN_SEC   1000000ULL
> +#define CS_TIME_NS_IN_SEC   1000000000ULL
> +#define CS_TIME_US_IN_MSEC  1000ULL
> +#define CS_TIME_NS_IN_MSEC  1000000ULL
> +#define CS_TIME_NS_IN_USEC  1000ULL
> +static inline uint64_t cs_timestamp_get(void)
> +{
> +     uint64_t result;
> +
> +#if defined _POSIX_MONOTONIC_CLOCK&&  _POSIX_MONOTONIC_CLOCK>= 0
> +     struct timespec ts;
> +
> +     clock_gettime (CLOCK_MONOTONIC,&ts);
> +     result = (ts.tv_sec * CS_TIME_NS_IN_SEC) + (uint64_t)ts.tv_nsec;
> +#else
> +     struct timeval time_from_epoch;
> +
> +     gettimeofday (&time_from_epoch, 0);
> +     result = ((time_from_epoch.tv_sec * CS_TIME_NS_IN_SEC) +
> +             (time_from_epoch.tv_usec * CS_TIME_NS_IN_USEC));
> +#endif
> +
> +     return result;
> +}
> +
> +
>   /*
>    * DEPRECATED
>    */
> @@ -177,4 +207,5 @@ typedef enum {
>   #define QUORUM_ERR_SECURITY                 CS_ERR_SECURITY
>   #define quorum_error_t cs_error_t
>
> -#endif
> +#endif /* COROTYPES_H_DEFINED */
> +
> diff --git a/lib/sam.c b/lib/sam.c
> index 53020ac..35bb7ee 100644
> --- a/lib/sam.c
> +++ b/lib/sam.c
> @@ -64,8 +64,8 @@
>   #include<signal.h>
>
>   #define SAM_CONFDB_S_FAILED         "failed"
> -#define SAM_CONFDB_S_REGISTERED              "registered"
> -#define SAM_CONFDB_S_STARTED         "started"
> +#define SAM_CONFDB_S_REGISTERED              "stopped"
> +#define SAM_CONFDB_S_STARTED         "running"
>   #define SAM_CONFDB_S_Q_WAIT         "waiting for quorum"
>
>   #define SAM_RP_MASK_Q(pol)  (pol&  (~SAM_RECOVERY_POLICY_QUORUM))
> @@ -145,7 +145,6 @@ static cs_error_t sam_confdb_update_key (enum 
> sam_confdb_key_t key, const char *
>       cs_error_t err;
>       const char *svalue;
>       uint64_t hc_period, last_hc;
> -     struct timeval tv;
>       const char *ssvalue[] = { [SAM_RECOVERY_POLICY_QUIT] = "quit", 
> [SAM_RECOVERY_POLICY_RESTART] = "restart" };
>
>       switch (key) {
> @@ -161,19 +160,15 @@ static cs_error_t sam_confdb_update_key (enum 
> sam_confdb_key_t key, const char *
>               hc_period = sam_internal_data.time_interval;
>
>               if ((err = confdb_key_create_typed 
> (sam_internal_data.confdb_handle, sam_internal_data.confdb_pid_handle,
> -                     "hc_period",&hc_period, sizeof (uint64_t), 
> CONFDB_VALUETYPE_UINT64)) != CS_OK) {
> +                     "poll_period",&hc_period, sizeof (hc_period), 
> CONFDB_VALUETYPE_UINT64)) != CS_OK) {
>                       goto exit_error;
>               }
>               break;
>       case SAM_CONFDB_KEY_LAST_HC:
> -             if (gettimeofday (&tv, NULL) == -1) {
> -                     last_hc = 0;
> -             } else {
> -                     last_hc = ((uint64_t)tv.tv_sec * 1000) + 
> ((uint64_t)tv.tv_usec / 1000);
> -             }
> +             last_hc = cs_timestamp_get();
>
>               if ((err = confdb_key_create_typed 
> (sam_internal_data.confdb_handle, sam_internal_data.confdb_pid_handle,
> -                     "hc_last",&last_hc, sizeof (uint64_t), 
> CONFDB_VALUETYPE_UINT64)) != CS_OK) {
> +                     "last_updated",&last_hc, sizeof (last_hc), 
> CONFDB_VALUETYPE_UINT64)) != CS_OK) {
>                       goto exit_error;
>               }
>               break;
> diff --git a/man/sam_overview.8 b/man/sam_overview.8
> index a5807cf..e00d2e8 100644
> --- a/man/sam_overview.8
> +++ b/man/sam_overview.8
> @@ -142,9 +142,9 @@ keys:
>   .IP \(bu 3
>   \fIrecovery\fR - will be quit or restart depending on policy
>   .IP \(bu 3
> -\fIhc_period\fR - period of health checking in milliseconds
> +\fIpoll_period\fR - period of health checking in milliseconds
>   .IP \(bu 3
> -\fIhc_last\fR - last known GMT time in milliseconds when health check was 
> received
> +\fIlast_updated\fR - Timestamp (in nanoseconds) of the last health check.
>   .IP \(bu 3
>   \fIstate\fR - state of process (can be one of registered, started, failed, 
> waiting for quorum)
>   .RE
> diff --git a/services/mon.c b/services/mon.c
> index 3e475a1..3060749 100644
> --- a/services/mon.c
> +++ b/services/mon.c
> @@ -44,7 +44,6 @@
>   #include<corosync/lcr/lcr_comp.h>
>   #include<corosync/engine/coroapi.h>
>   #include<corosync/list.h>
> -#include<corosync/totem/coropoll.h>
>   #include<corosync/engine/logsys.h>
>   #include "../exec/fsm.h"
>
> @@ -60,11 +59,11 @@ LOGSYS_DECLARE_SUBSYS ("MON");
>   static int mon_exec_init_fn (
>       struct corosync_api_v1 *corosync_api);
>
> -hdb_handle_t mon_poll = 0;
>   static struct corosync_api_v1 *api;
>   static hdb_handle_t resources_obj;
> -static pthread_t mon_poll_thread;
> -#define MON_DEFAULT_PERIOD 3
> +#define MON_DEFAULT_PERIOD 3000
> +#define MON_MIN_PERIOD 500
> +#define MON_MAX_PERIOD (120 * CS_TIME_MS_IN_SEC)
>
>   struct corosync_service_engine mon_service_engine = {
>       .name                   = "corosync resource monitoring service",
> @@ -90,10 +89,10 @@ static DECLARE_LIST_INIT (confchg_notify);
>   struct resource_instance {
>       hdb_handle_t handle;
>       const char *name;
> -     poll_timer_handle timer_handle;
> +     corosync_timer_handle_t timer_handle;
>       void (*update_stats_fn) (void *data);
>       struct cs_fsm fsm;
> -     int32_t period;
> +     uint64_t period;
>       objdb_value_types_t max_type;
>       union {
>               int32_t int32;
> @@ -127,15 +126,15 @@ static struct resource_instance load_15min_inst = {
>   static void mon_config_changed (struct cs_fsm* fsm, int32_t event, void * 
> data);
>   static void mon_resource_failed (struct cs_fsm* fsm, int32_t event, void * 
> data);
>
> -const char * mon_ok_str = "ok";
> +const char * mon_running_str = "running";
>   const char * mon_failed_str = "failed";
>   const char * mon_failure_str = "failure";
> -const char * mon_disabled_str = "disabled";
> +const char * mon_stopped_str = "stopped";
>   const char * mon_config_changed_str = "config_changed";
>
>   enum mon_resource_state {
> -     MON_S_DISABLED,
> -     MON_S_OK,
> +     MON_S_STOPPED,
> +     MON_S_RUNNING,
>       MON_S_FAILED
>   };
>   enum mon_resource_event {
> @@ -144,12 +143,12 @@ enum mon_resource_event {
>   };
>
>   struct cs_fsm_entry mon_fsm_table[] = {
> -     { MON_S_DISABLED,       MON_E_CONFIG_CHANGED,   mon_config_changed,     
> {MON_S_DISABLED, MON_S_OK, -1} },
> -     { MON_S_DISABLED,       MON_E_FAILURE,          NULL,                   
> {-1} },
> -     { MON_S_OK,             MON_E_CONFIG_CHANGED,   mon_config_changed,     
> {MON_S_OK, MON_S_DISABLED, -1} },
> -     { MON_S_OK,             MON_E_FAILURE,          mon_resource_failed,    
> {MON_S_FAILED, -1} },
> -     { MON_S_FAILED,         MON_E_CONFIG_CHANGED,   mon_config_changed,     
> {MON_S_OK, MON_S_DISABLED, -1} },
> -     { MON_S_FAILED,         MON_E_FAILURE,          NULL,                   
> {-1} },
> +     { MON_S_STOPPED, MON_E_CONFIG_CHANGED,  mon_config_changed,     
> {MON_S_STOPPED, MON_S_RUNNING, -1} },
> +     { MON_S_STOPPED, MON_E_FAILURE,         NULL,                   {-1} },
> +     { MON_S_RUNNING, MON_E_CONFIG_CHANGED,  mon_config_changed,     
> {MON_S_RUNNING, MON_S_STOPPED, -1} },
> +     { MON_S_RUNNING, MON_E_FAILURE,         mon_resource_failed,    
> {MON_S_FAILED, -1} },
> +     { MON_S_FAILED,  MON_E_CONFIG_CHANGED,  mon_config_changed,     
> {MON_S_RUNNING, MON_S_STOPPED, -1} },
> +     { MON_S_FAILED,  MON_E_FAILURE,         NULL,                   {-1} },
>   };
>
>   /*
> @@ -202,11 +201,11 @@ static const char * mon_res_state_to_str(struct cs_fsm* 
> fsm,
>       int32_t state)
>   {
>       switch (state) {
> -     case MON_S_DISABLED:
> -             return mon_disabled_str;
> +     case MON_S_STOPPED:
> +             return mon_stopped_str;
>               break;
> -     case MON_S_OK:
> -             return mon_ok_str;
> +     case MON_S_RUNNING:
> +             return mon_running_str;
>               break;
>       case MON_S_FAILED:
>               return mon_failed_str;
> @@ -229,6 +228,24 @@ static const char * mon_res_event_to_str(struct cs_fsm* 
> fsm,
>       return NULL;
>   }
>
> +static cs_error_t str_to_uint64_t(const char* str, uint64_t *out_value, 
> uint64_t min, uint64_t max)
> +{
> +     char *endptr;
> +
> +     errno = 0;
> +        *out_value = strtol(str,&endptr, 0);
> +
> +        /* Check for various possible errors */
> +     if (errno != 0 || endptr == str) {
> +             return CS_ERR_INVALID_PARAM;
> +     }
> +
> +     if (*out_value>  max || *out_value<  min) {
> +             return CS_ERR_INVALID_PARAM;
> +     }
> +     return CS_OK;
> +}
> +
>   static void mon_fsm_state_set (struct cs_fsm* fsm,
>       enum mon_resource_state next_state, struct resource_instance* inst)
>   {
> @@ -256,7 +273,7 @@ static void mon_config_changed (struct cs_fsm* fsm, 
> int32_t event, void * data)
>       char *str;
>       size_t str_len;
>       objdb_value_types_t type;
> -     int32_t tmp_value;
> +     uint64_t tmp_value;
>       int32_t res;
>
>       ENTER();
> @@ -266,14 +283,22 @@ static void mon_config_changed (struct cs_fsm* fsm, 
> int32_t event, void * data)
>                       (void**)&str,&str_len,
>                       &type);
>       if (res == 0) {
> -             tmp_value = strtol (str, NULL, 0);
> -             if (tmp_value>  0&&  tmp_value<  120) {
> -                     if (inst->period != tmp_value) {
> -                             inst->period = tmp_value;
> -                     }
> +             if (str_to_uint64_t(str,&tmp_value, MON_MIN_PERIOD, 
> MON_MAX_PERIOD) == CS_OK) {
> +                     log_printf (LOGSYS_LEVEL_DEBUG,
> +                             "poll_period changing from:%"PRIu64" to 
> %"PRIu64".",
> +                             inst->period, tmp_value);
> +                     inst->period = tmp_value;
> +             } else {
> +                     log_printf (LOGSYS_LEVEL_WARNING,
> +                             "Could NOT use poll_period:%s ms for resource 
> %s",
> +                             str, inst->name);
>               }
>       }
>
> +     if (inst->timer_handle) {
> +             api->timer_delete(inst->timer_handle);
> +             inst->timer_handle = 0;
> +     }
>       res = api->object_key_get_typed (inst->handle, "max",
>                       (void**)&str,&str_len,&type);
>       if (res != 0) {
> @@ -283,7 +308,7 @@ static void mon_config_changed (struct cs_fsm* fsm, 
> int32_t event, void * data)
>               if (inst->max_type == OBJDB_VALUETYPE_DOUBLE) {
>                       inst->max.dbl = INT32_MAX;
>               }
> -             mon_fsm_state_set (fsm, MON_S_DISABLED, inst);
> +             mon_fsm_state_set (fsm, MON_S_STOPPED, inst);
>       } else {
>               if (inst->max_type == OBJDB_VALUETYPE_INT32) {
>                       inst->max.int32 = strtol (str, NULL, 0);
> @@ -291,21 +316,13 @@ static void mon_config_changed (struct cs_fsm* fsm, 
> int32_t event, void * data)
>               if (inst->max_type == OBJDB_VALUETYPE_DOUBLE) {
>                       inst->max.dbl = strtod (str, NULL);
>               }
> -             mon_fsm_state_set (fsm, MON_S_OK, inst);
> +             mon_fsm_state_set (fsm, MON_S_RUNNING, inst);
> +             /*
> +              * run the updater, incase the period has shortened
> +              * and to start the timer.
> +              */
> +             inst->update_stats_fn (inst);
>       }
> -
> -     if (mon_poll == 0) {
> -             return;
> -     }
> -     poll_timer_delete (mon_poll, inst->timer_handle);
> -     /*
> -      * run the updater, incase the period has shortened
> -      */
> -     inst->update_stats_fn (inst);
> -     poll_timer_add (mon_poll,
> -             inst->period * 1000, NULL,
> -             inst->update_stats_fn,
> -             &inst->timer_handle);
>   }
>
>   void mon_resource_failed (struct cs_fsm* fsm, int32_t event, void * data)
> @@ -384,20 +401,18 @@ static void mem_update_stats_fn (void *data)
>                       "current", strlen("current"),
>                       &new_value, sizeof(new_value));
>
> -             timestamp = time (NULL);
> +             timestamp = cs_timestamp_get();
>
>               api->object_key_replace (inst->handle,
>                       "last_updated", strlen("last_updated"),
> -                     &timestamp, sizeof(time_t));
> +                     &timestamp, sizeof(uint64_t));
>
> -             if (new_value>  inst->max.int32) {
> +             if (new_value>  inst->max.int32&&  inst->fsm.curr_state != 
> MON_S_FAILED) {
>                       cs_fsm_process (&inst->fsm, MON_E_FAILURE, inst);
>               }
>       }
> -     poll_timer_add (mon_poll,
> -             inst->period * 1000, inst,
> -             inst->update_stats_fn,
> -             &inst->timer_handle);
> +     api->timer_add_duration(inst->period * MILLI_2_NANO_SECONDS,
> +             inst, inst->update_stats_fn,&inst->timer_handle);
>   }
>
>   static double min15_loadavg_get(void)
> @@ -431,53 +446,28 @@ static void load_update_stats_fn (void *data)
>       int32_t res = 0;
>       double min15 = min15_loadavg_get();
>
> -     if (min15<  0) {
> -     }
> -     res = api->object_key_replace (inst->handle,
> -             "current", strlen("current"),
> -             &min15, sizeof (min15));
> -     if (res != 0)
> -             log_printf (LOGSYS_LEVEL_ERROR, "replace current failed: %d", 
> res);
> -
> -     timestamp = cs_timestamp_get();
> -
> -     res = api->object_key_replace (inst->handle,
> -             "last_updated", strlen("last_updated"),
> -             &timestamp, sizeof(uint64_t));
> -     if (res != 0)
> -             log_printf (LOGSYS_LEVEL_ERROR, "replace last_updated failed: 
> %d", res);
> -
> -     if (min15>  inst->max.dbl) {
> -             cs_fsm_process (&inst->fsm, MON_E_FAILURE,&inst);
> -     }
> -
> -     poll_timer_add (mon_poll,
> -             inst->period * 1000, inst,
> -             inst->update_stats_fn,
> -             &inst->timer_handle);
> -}
> -
> -static void *mon_thread_handler (void * unused)
> -{
> -#ifdef HAVE_LIBSTATGRAB
> -     sg_init();
> -#endif /* HAVE_LIBSTATGRAB */
> -     mon_poll = poll_create ();
> -
> -     poll_timer_add (mon_poll,
> -             memory_used_inst.period * 1000,
> -             &memory_used_inst,
> -             memory_used_inst.update_stats_fn,
> -             &memory_used_inst.timer_handle);
> +     if (min15>  0) {
> +             res = api->object_key_replace (inst->handle,
> +                     "current", strlen("current"),
> +                     &min15, sizeof (min15));
> +             if (res != 0) {
> +                     log_printf (LOGSYS_LEVEL_ERROR, "replace current 
> failed: %d", res);
> +             }
> +             timestamp = cs_timestamp_get();
>
> -     poll_timer_add (mon_poll,
> -             load_15min_inst.period * 1000,
> -             &load_15min_inst,
> -             load_15min_inst.update_stats_fn,
> -             &load_15min_inst.timer_handle);
> -     poll_run (mon_poll);
> +             res = api->object_key_replace (inst->handle,
> +                     "last_updated", strlen("last_updated"),
> +                     &timestamp, sizeof(uint64_t));
> +             if (res != 0) {
> +                     log_printf (LOGSYS_LEVEL_ERROR, "replace last_updated 
> failed: %d", res);
> +             }
> +             if (min15>  inst->max.dbl&&  inst->fsm.curr_state != 
> MON_S_FAILED) {
> +                     cs_fsm_process (&inst->fsm, MON_E_FAILURE,&inst);
> +             }
> +     }
>
> -     return NULL;
> +     api->timer_add_duration(inst->period * MILLI_2_NANO_SECONDS,
> +             inst, inst->update_stats_fn,&inst->timer_handle);
>   }
>
>   static int object_find_or_create (
> @@ -511,6 +501,23 @@ static int object_find_or_create (
>       return ret;
>   }
>
> +static void mon_object_destroyed(
> +     hdb_handle_t parent_object_handle,
> +     const void *name_pt, size_t name_len,
> +     void *priv_data_pt)
> +{
> +     struct resource_instance* inst = (struct 
> resource_instance*)priv_data_pt;
> +
> +     if (inst) {
> +             log_printf (LOGSYS_LEVEL_WARNING,
> +                     "resource \"%s\" deleted from objdb!",
> +                     inst->name);
> +
> +             cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst);
> +     }
> +}
> +
> +
>   static void mon_key_change_notify (object_change_type_t change_type,
>       hdb_handle_t parent_object_handle,
>       hdb_handle_t object_handle,
> @@ -521,8 +528,8 @@ static void mon_key_change_notify (object_change_type_t 
> change_type,
>   {
>       struct resource_instance* inst = (struct 
> resource_instance*)priv_data_pt;
>
> -     if ((strcmp ((char*)key_name_pt, "max") == 0) ||
> -             (strcmp ((char*)key_name_pt, "poll_period") == 0)) {
> +     if ((strncmp ((char*)key_name_pt, "max", key_len) == 0) ||
> +             (strncmp ((char*)key_name_pt, "poll_period", key_len) == 0)) {
>               ENTER();
>               cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst);
>       }
> @@ -532,15 +539,14 @@ static void mon_instance_init (hdb_handle_t parent, 
> struct resource_instance* in
>   {
>       int32_t res;
>       char mon_period_str[32];
> +     char *str;
>       size_t mon_period_len;
>       objdb_value_types_t mon_period_type;
> -     int32_t tmp_value;
> +     uint64_t tmp_value;
>       int32_t zero_32 = 0;
>       time_t zero_64 = 0;
>       double zero_double = 0;
>
> -     ENTER();
> -
>       object_find_or_create (parent,
>               &inst->handle,
>               inst->name, strlen (inst->name));
> @@ -557,15 +563,15 @@ static void mon_instance_init (hdb_handle_t parent, 
> struct resource_instance* in
>
>       api->object_key_create_typed (inst->handle,
>               "last_updated",&zero_64,
> -             sizeof (time_t), OBJDB_VALUETYPE_INT64);
> +             sizeof (uint64_t), OBJDB_VALUETYPE_UINT64);
>
>       api->object_key_create_typed (inst->handle,
> -             "state", mon_disabled_str, strlen (mon_disabled_str),
> +             "state", mon_stopped_str, strlen (mon_stopped_str),
>               OBJDB_VALUETYPE_STRING);
>
>       inst->fsm.name = inst->name;
>       inst->fsm.curr_entry = 0;
> -     inst->fsm.curr_state = MON_S_DISABLED;
> +     inst->fsm.curr_state = MON_S_STOPPED;
>       inst->fsm.table = mon_fsm_table;
>       inst->fsm.entries = sizeof(mon_fsm_table) / sizeof(struct cs_fsm_entry);
>       inst->fsm.state_to_str = mon_res_state_to_str;
> @@ -573,10 +579,10 @@ static void mon_instance_init (hdb_handle_t parent, 
> struct resource_instance* in
>
>       res = api->object_key_get_typed (inst->handle,
>                       "poll_period",
> -                     (void**)&mon_period_str,&mon_period_len,
> +                     (void**)&str,&mon_period_len,
>                       &mon_period_type);
>       if (res != 0) {
> -             mon_period_len = snprintf (mon_period_str, 32, "%d",
> +             mon_period_len = snprintf (mon_period_str, 32, "%"PRIu64"",
>                       inst->period);
>               api->object_key_create_typed (inst->handle,
>                       "poll_period",&mon_period_str,
> @@ -584,20 +590,19 @@ static void mon_instance_init (hdb_handle_t parent, 
> struct resource_instance* in
>                       OBJDB_VALUETYPE_STRING);
>       }
>       else {
> -             tmp_value = strtol (mon_period_str, NULL, 0);
> -             if (tmp_value>  0&&  tmp_value<  120)
> +             if (str_to_uint64_t(str,&tmp_value, MON_MIN_PERIOD, 
> MON_MAX_PERIOD) == CS_OK) {
>                       inst->period = tmp_value;
> +             } else {
> +                     log_printf (LOGSYS_LEVEL_WARNING,
> +                             "Could NOT use poll_period:%s ms for resource 
> %s",
> +                             str, inst->name);
> +             }
>       }
>       cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst);
>
> -     poll_timer_add (mon_poll,
> -             inst->period * 1000, inst,
> -             inst->update_stats_fn,
> -             &inst->timer_handle);
> -
> -     api->object_track_start (inst->handle, OBJECT_TRACK_DEPTH_ONE,
> +     api->object_track_start (inst->handle, OBJECT_TRACK_DEPTH_RECURSIVE,
>               mon_key_change_notify,
> -             NULL, NULL, NULL, NULL);
> +             NULL, mon_object_destroyed, NULL, inst);
>
>   }
>
> @@ -607,11 +612,14 @@ static int mon_exec_init_fn (
>       hdb_handle_t obj;
>       hdb_handle_t parent;
>
> +#ifdef HAVE_LIBSTATGRAB
> +     sg_init();
> +#endif /* HAVE_LIBSTATGRAB */
> +
>   #ifdef COROSYNC_SOLARIS
>       logsys_subsys_init();
>   #endif
>       api = corosync_api;
> -     ENTER();
>
>       object_find_or_create (OBJECT_PARENT_HANDLE,
>               &resources_obj,
> @@ -626,9 +634,6 @@ static int mon_exec_init_fn (
>       mon_instance_init (parent,&memory_used_inst);
>       mon_instance_init (parent,&load_15min_inst);
>
> -
> -     pthread_create (&mon_poll_thread, NULL, mon_thread_handler, NULL);
> -
>       return 0;
>   }
>
> diff --git a/services/wd.c b/services/wd.c
> index 9c9ad97..cd0729f 100644
> --- a/services/wd.c
> +++ b/services/wd.c
> @@ -39,7 +39,7 @@
>   #include<sys/ioctl.h>
>   #include<linux/types.h>
>   #include<linux/watchdog.h>
> -#include<linux/reboot.h>
> +#include<sys/reboot.h>
>
>   #include<corosync/corotypes.h>
>   #include<corosync/corodefs.h>
> @@ -60,12 +60,12 @@ typedef enum {
>   struct resource {
>       hdb_handle_t handle;
>       char *recovery;
> -     char name[128];
> +     char name[CS_MAX_NAME_LENGTH];
>       time_t last_updated;
>       struct cs_fsm fsm;
>
>       corosync_timer_handle_t check_timer;
> -     uint32_t check_timeout;
> +     uint64_t check_timeout;
>   };
>
>   LOGSYS_DECLARE_SUBSYS("WD");
> @@ -79,20 +79,23 @@ static int wd_exec_exit_fn (void);
>   static void wd_resource_check_fn (void* resource_ref);
>
>   static struct corosync_api_v1 *api;
> -#define WD_DEFAULT_TIMEOUT 6
> -static uint32_t watchdog_timeout = WD_DEFAULT_TIMEOUT;
> -static uint32_t tickle_timeout = (WD_DEFAULT_TIMEOUT / 2);
> +#define WD_DEFAULT_TIMEOUT_SEC 6
> +#define WD_DEFAULT_TIMEOUT_MS (WD_DEFAULT_TIMEOUT_SEC * CS_TIME_MS_IN_SEC)
> +#define WD_MIN_TIMEOUT_MS 500
> +#define WD_MAX_TIMEOUT_MS (120 * CS_TIME_MS_IN_SEC)
> +static uint32_t watchdog_timeout = WD_DEFAULT_TIMEOUT_SEC;
> +static uint64_t tickle_timeout = (WD_DEFAULT_TIMEOUT_MS / 2);
>   static int dog = -1;
>   static corosync_timer_handle_t wd_timer;
>   static hdb_handle_t resources_obj;
>   static int watchdog_ok = 1;
>
>   struct corosync_service_engine wd_service_engine = {
> -     .name                   = "corosync self-fencing service",
> +     .name                   = "corosync watchdog service",
>       .id                     = WD_SERVICE,
>       .priority               = 1,
>       .private_data_size      = 0,
> -     .flow_control           = CS_LIB_FLOW_CONTROL_REQUIRED,
> +     .flow_control           = CS_LIB_FLOW_CONTROL_NOT_REQUIRED,
>       .lib_init_fn            = NULL,
>       .lib_exit_fn            = NULL,
>       .lib_engine             = NULL,
> @@ -115,9 +118,9 @@ static void wd_config_changed (struct cs_fsm* fsm, 
> int32_t event, void * data);
>   static void wd_resource_failed (struct cs_fsm* fsm, int32_t event, void * 
> data);
>
>   enum wd_resource_state {
> -     WD_S_GOOD,
> +     WD_S_RUNNING,
>       WD_S_FAILED,
> -     WD_S_DISABLED
> +     WD_S_STOPPED
>   };
>
>   enum wd_resource_event {
> @@ -125,19 +128,19 @@ enum wd_resource_event {
>       WD_E_CONFIG_CHANGED
>   };
>
> -const char * wd_ok_str                       = "ok";
> +const char * wd_running_str          = "running";
>   const char * wd_failed_str          = "failed";
>   const char * wd_failure_str         = "failure";
> -const char * wd_disabled_str         = "disabled";
> +const char * wd_stopped_str          = "stopped";
>   const char * wd_config_changed_str  = "config_changed";
>
>   struct cs_fsm_entry wd_fsm_table[] = {
> -     { WD_S_DISABLED,        WD_E_CONFIG_CHANGED,    wd_config_changed,      
> {WD_S_DISABLED, WD_S_GOOD, -1} },
> -     { WD_S_DISABLED,        WD_E_FAILURE,           NULL,                   
> {-1} },
> -     { WD_S_GOOD,            WD_E_CONFIG_CHANGED,    wd_config_changed,      
> {WD_S_GOOD, WD_S_DISABLED, -1} },
> -     { WD_S_GOOD,            WD_E_FAILURE,           wd_resource_failed,     
> {WD_S_FAILED, -1} },
> -     { WD_S_FAILED,          WD_E_CONFIG_CHANGED,    wd_config_changed,      
> {WD_S_GOOD, WD_S_DISABLED, -1} },
> -     { WD_S_FAILED,          WD_E_FAILURE,           NULL,                   
> {-1} },
> +     { WD_S_STOPPED, WD_E_CONFIG_CHANGED,    wd_config_changed,      
> {WD_S_STOPPED, WD_S_RUNNING, -1} },
> +     { WD_S_STOPPED, WD_E_FAILURE,           NULL,                   {-1} },
> +     { WD_S_RUNNING, WD_E_CONFIG_CHANGED,    wd_config_changed,      
> {WD_S_RUNNING, WD_S_STOPPED, -1} },
> +     { WD_S_RUNNING, WD_E_FAILURE,           wd_resource_failed,     
> {WD_S_FAILED, -1} },
> +     { WD_S_FAILED,  WD_E_CONFIG_CHANGED,    wd_config_changed,      
> {WD_S_RUNNING, WD_S_STOPPED, -1} },
> +     { WD_S_FAILED,  WD_E_FAILURE,           NULL,                   {-1} },
>   };
>
>   /*
> @@ -217,15 +220,33 @@ static int object_find_or_create (
>       return ret;
>   }
>
> +static cs_error_t str_to_uint64_t(const char* str, uint64_t *out_value, 
> uint64_t min, uint64_t max)
> +{
> +     char *endptr;
> +
> +     errno = 0;
> +        *out_value = strtol(str,&endptr, 0);
> +
> +        /* Check for various possible errors */
> +     if (errno != 0 || endptr == str) {
> +             return CS_ERR_INVALID_PARAM;
> +     }
> +
> +     if (*out_value>  max || *out_value<  min) {
> +             return CS_ERR_INVALID_PARAM;
> +     }
> +     return CS_OK;
> +}
> +
>   static const char * wd_res_state_to_str(struct cs_fsm* fsm,
>       int32_t state)
>   {
>       switch (state) {
> -     case WD_S_DISABLED:
> -             return wd_disabled_str;
> +     case WD_S_STOPPED:
> +             return wd_stopped_str;
>               break;
> -     case WD_S_GOOD:
> -             return wd_ok_str;
> +     case WD_S_RUNNING:
> +             return wd_running_str;
>               break;
>       case WD_S_FAILED:
>               return wd_failed_str;
> @@ -249,17 +270,18 @@ static const char * wd_res_event_to_str(struct cs_fsm* 
> fsm,
>   }
>
>   /*
> - * returns (0 == OK, 1 == failed)
> + * returns (CS_TRUE == OK, CS_FALSE == failed)
>    */
> -static int32_t wd_resource_has_failed (struct resource *ref)
> +static int32_t wd_resource_state_is_ok (struct resource *ref)
>   {
>       hdb_handle_t resource = ref->handle;
>       int res;
>       char* state;
>       size_t state_len;
>       objdb_value_types_t type;
> -     time_t *last_updated;
> -     time_t my_time;
> +     uint64_t *last_updated;
> +     uint64_t my_time;
> +     uint64_t allowed_period;
>       size_t last_updated_len;
>
>       res = api->object_key_get_typed (resource,
> @@ -267,29 +289,39 @@ static int32_t wd_resource_has_failed (struct resource 
> *ref)
>       if (res != 0) {
>               /* key does not exist.
>               */
> -             return 1;
> +             return CS_FALSE;
>       }
>       res = api->object_key_get_typed (resource,
>               "state", (void**)&state,&state_len,&type);
>       if (res != 0 || strncmp (state, "disabled", strlen ("disabled")) == 0) {
>               /* key does not exist.
>               */
> -             return 1;
> +             return CS_FALSE;
> +     }
> +     if (*last_updated == 0) {
> +             /* initial value */
> +             return CS_TRUE;
>       }
>
> -     my_time = time (NULL);
> +     my_time = cs_timestamp_get();
>
> -     if ((*last_updated + ref->check_timeout)<  my_time) {
> -             log_printf (LOGSYS_LEVEL_INFO, "delayed %ld + %d<  %ld",
> -                     *last_updated, ref->check_timeout, my_time);
> -             return 1;
> +     /*
> +      * Here we check that the monitor has written a timestamp within the 
> poll_period
> +      * plus a grace factor of (0.5 * poll_period).
> +      */
> +     allowed_period = (ref->check_timeout * MILLI_2_NANO_SECONDS * 3) / 2;
> +     if ((*last_updated + allowed_period)<  my_time) {
> +             log_printf (LOGSYS_LEVEL_ERROR,
> +                     "last_updated %"PRIu64" ms too late, period:%"PRIu64".",
> +                     my_time/MILLI_2_NANO_SECONDS - ((*last_updated + 
> allowed_period) / MILLI_2_NANO_SECONDS),
> +                     ref->check_timeout);
> +             return CS_FALSE;
>       }
>
> -     if ((*last_updated + ref->check_timeout)<  my_time ||
> -             strcmp (state, "bad") == 0) {
> -             return 1;
> +     if (strcmp (state, wd_failed_str) == 0) {
> +             return CS_FALSE;
>       }
> -     return 0;
> +     return CS_TRUE;
>   }
>
>   static void wd_config_changed (struct cs_fsm* fsm, int32_t event, void * 
> data)
> @@ -298,18 +330,34 @@ static void wd_config_changed (struct cs_fsm* fsm, 
> int32_t event, void * data)
>       size_t len;
>       char *state;
>       objdb_value_types_t type;
> -     char mon_period_str[32];
> -     int32_t tmp_value;
> +     char *str;
> +     uint64_t tmp_value;
> +     uint64_t next_timeout;
>       struct resource *ref = (struct resource*)data;
>
> +     next_timeout = ref->check_timeout;
> +
>       res = api->object_key_get_typed (ref->handle,
>                       "poll_period",
> -                     (void**)&mon_period_str,&len,
> +                     (void**)&str,&len,
>                       &type);
>       if (res == 0) {
> -             tmp_value = strtol (mon_period_str, NULL, 0);
> -             if (tmp_value>  0&&  tmp_value<  120)
> -                     ref->check_timeout = (tmp_value * 5)/4;
> +             if (str_to_uint64_t(str,&tmp_value, WD_MIN_TIMEOUT_MS, 
> WD_MAX_TIMEOUT_MS) == CS_OK) {
> +                     log_printf (LOGSYS_LEVEL_DEBUG,
> +                             "poll_period changing from:%"PRIu64" to 
> %"PRIu64".",
> +                             ref->check_timeout, tmp_value);
> +                     /*
> +                      * To easy in the transition between poll_period's we 
> are going
> +                      * to make the first timeout the bigger of the new and 
> old value.
> +                      * This is to give the monitoring system time to adjust.
> +                      */
> +                     next_timeout = CS_MAX(tmp_value, ref->check_timeout);
> +                     ref->check_timeout = tmp_value;
> +             } else {
> +                     log_printf (LOGSYS_LEVEL_WARNING,
> +                             "Could NOT use poll_period:%s ms for resource 
> %s",
> +                             str, ref->name);
> +             }
>       }
>
>       res = api->object_key_get_typed (ref->handle,
> @@ -319,7 +367,7 @@ static void wd_config_changed (struct cs_fsm* fsm, 
> int32_t event, void * data)
>                */
>               log_printf (LOGSYS_LEVEL_WARNING,
>                       "resource %s missing a recovery key.", ref->name);
> -             cs_fsm_state_set(&ref->fsm, WD_S_DISABLED, ref);
> +             cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref);
>               return;
>       }
>       res = api->object_key_get_typed (ref->handle,
> @@ -329,19 +377,21 @@ static void wd_config_changed (struct cs_fsm* fsm, 
> int32_t event, void * data)
>               */
>               log_printf (LOGSYS_LEVEL_WARNING,
>                       "resource %s missing a state key.", ref->name);
> -             cs_fsm_state_set(&ref->fsm, WD_S_DISABLED, ref);
> +             cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref);
>               return;
>       }
> -
> -     cs_fsm_state_set(&ref->fsm, WD_S_GOOD, ref);
> -
>       if (ref->check_timer) {
>               api->timer_delete(ref->check_timer);
> +             ref->check_timer = NULL;
>       }
> -     api->timer_add_duration((unsigned long 
> long)ref->check_timeout*1000000000,
> -             ref,
> -             wd_resource_check_fn,&ref->check_timer);
>
> +     if (strcmp(wd_stopped_str, state) == 0) {
> +             cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref);
> +     } else {
> +             api->timer_add_duration(next_timeout * MILLI_2_NANO_SECONDS,
> +                     ref, wd_resource_check_fn,&ref->check_timer);
> +             cs_fsm_state_set(&ref->fsm, WD_S_RUNNING, ref);
> +     }
>   }
>
>   static void wd_resource_failed (struct cs_fsm* fsm, int32_t event, void * 
> data)
> @@ -350,6 +400,7 @@ static void wd_resource_failed (struct cs_fsm* fsm, 
> int32_t event, void * data)
>
>       if (ref->check_timer) {
>               api->timer_delete(ref->check_timer);
> +             ref->check_timer = NULL;
>       }
>
>       log_printf (LOGSYS_LEVEL_CRIT, "%s resource \"%s\" failed!",
> @@ -359,10 +410,10 @@ static void wd_resource_failed (struct cs_fsm* fsm, 
> int32_t event, void * data)
>               watchdog_ok = 0;
>       }
>       else if (strcmp (ref->recovery, "reboot") == 0) {
> -             //reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, 
> LINUX_REBOOT_CMD_RESTART, NULL);
> +             reboot(RB_AUTOBOOT);
>       }
>       else if (strcmp (ref->recovery, "shutdown") == 0) {
> -             //reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, 
> LINUX_REBOOT_CMD_POWER_OFF, NULL);
> +             reboot(RB_POWER_OFF);
>       }
>       cs_fsm_state_set(fsm, WD_S_FAILED, data);
>   }
> @@ -377,13 +428,10 @@ static void wd_key_changed(object_change_type_t 
> change_type,
>   {
>       struct resource* ref = (struct resource*)priv_data_pt;
>
> -     if (strcmp(key_name_pt, "last_updated") == 0 ||
> -             strcmp(key_name_pt, "current") == 0) {
> +     if (strncmp(key_name_pt, "last_updated", key_len) == 0 ||
> +             strncmp(key_name_pt, "current", key_len) == 0) {
>               return;
>       }
> -//   log_printf (LOGSYS_LEVEL_WARNING,
> -//           "watchdog resource key changed: %s.%s=%s ref=%p.",
> -//           (char*)object_name_pt, (char*)key_name_pt, (char*)key_value_pt, 
> ref);
>
>       if (ref == NULL) {
>               return;
> @@ -398,13 +446,14 @@ static void wd_object_destroyed(
>   {
>       struct resource* ref = (struct resource*)priv_data_pt;
>
> -     log_printf (LOGSYS_LEVEL_WARNING,
> -                     "watchdog resource \"%s\" deleted from objdb!",
> -                     (char*)name_pt);
> -
>       if (ref) {
> +             log_printf (LOGSYS_LEVEL_WARNING,
> +                     "resource \"%s\" deleted from objdb!",
> +                     ref->name);
> +
>               api->timer_delete(ref->check_timer);
>               ref->check_timer = NULL;
> +             free(ref);
>       }
>   }
>
> @@ -412,33 +461,31 @@ static void wd_resource_check_fn (void* resource_ref)
>   {
>       struct resource* ref = (struct resource*)resource_ref;
>
> -     log_printf (LOGSYS_LEVEL_INFO,
> -                     "checking watchdog resource \"%s\".",
> -                     ref->name);
> -     if (wd_resource_has_failed (ref) ) {
> +     if (wd_resource_state_is_ok (ref) == CS_FALSE) {
>               cs_fsm_process(&ref->fsm, WD_E_FAILURE, ref);
> -             log_printf (LOGSYS_LEVEL_CRIT,
> -                     "watchdog resource \"%s\" failed!",
> -                     (char*)ref->name);
>               return;
>       }
> -     api->timer_add_duration((unsigned long 
> long)ref->check_timeout*1000000000,
> +     api->timer_add_duration(ref->check_timeout*MILLI_2_NANO_SECONDS,
>               ref, wd_resource_check_fn,&ref->check_timer);
>   }
>
> -
> -static void wd_resource_create (hdb_handle_t resource_obj)
> +/*
> + * return 0   - fully configured
> + * return -1  - partially configured
> + */
> +static int32_t wd_resource_create (hdb_handle_t resource_obj)
>   {
>       int res;
>       size_t len;
>       char *state;
>       objdb_value_types_t type;
> -     char mon_period_str[32];
> -     int32_t tmp_value;
> +     char period_str[32];
> +     char *str;
> +     uint64_t tmp_value;
>       struct resource *ref = malloc (sizeof (struct resource));
>
>       ref->handle = resource_obj;
> -     ref->check_timeout = WD_DEFAULT_TIMEOUT;
> +     ref->check_timeout = WD_DEFAULT_TIMEOUT_MS;
>       ref->check_timer = NULL;
>       api->object_name_get (resource_obj,
>               ref->name,
> @@ -448,30 +495,33 @@ static void wd_resource_create (hdb_handle_t 
> resource_obj)
>       ref->fsm.table = wd_fsm_table;
>       ref->fsm.entries = sizeof(wd_fsm_table) / sizeof(struct cs_fsm_entry);
>       ref->fsm.curr_entry = 0;
> -     ref->fsm.curr_state = WD_S_DISABLED;
> +     ref->fsm.curr_state = WD_S_STOPPED;
>       ref->fsm.state_to_str = wd_res_state_to_str;
>       ref->fsm.event_to_str = wd_res_event_to_str;
>       api->object_priv_set (resource_obj, NULL);
>
>       res = api->object_key_get_typed (resource_obj,
>                       "poll_period",
> -                     (void**)&mon_period_str,&len,
> +                     (void**)&str,&len,
>                       &type);
>       if (res != 0) {
> -             log_printf (LOGSYS_LEVEL_ERROR, "%s : %d",__func__, res);
> -             len = snprintf (mon_period_str, 32, "%d", ref->check_timeout);
> +             len = snprintf (period_str, 32, "%"PRIu64"", 
> ref->check_timeout);
>               api->object_key_create_typed (resource_obj,
> -                     "poll_period",&mon_period_str,
> +                     "poll_period",&period_str,
>                       len,
>                       OBJDB_VALUETYPE_STRING);
>       }
>       else {
> -             tmp_value = strtol (mon_period_str, NULL, 0);
> -             if (tmp_value>  0&&  tmp_value<  120)
> -                     ref->check_timeout = (tmp_value * 5)/4;
> +             if (str_to_uint64_t(str,&tmp_value, WD_MIN_TIMEOUT_MS, 
> WD_MAX_TIMEOUT_MS) == CS_OK) {
> +                     ref->check_timeout = tmp_value;
> +             } else {
> +                     log_printf (LOGSYS_LEVEL_WARNING,
> +                             "Could NOT use poll_period:%s ms for resource 
> %s",
> +                             str, ref->name);
> +             }
>       }
>
> -     api->object_track_start (resource_obj, OBJECT_TRACK_DEPTH_ONE,
> +     api->object_track_start (resource_obj, OBJECT_TRACK_DEPTH_RECURSIVE,
>                       wd_key_changed, NULL, wd_object_destroyed,
>                       NULL, ref);
>
> @@ -482,7 +532,7 @@ static void wd_resource_create (hdb_handle_t resource_obj)
>                */
>               log_printf (LOGSYS_LEVEL_WARNING,
>                       "resource %s missing a recovery key.", ref->name);
> -             return;
> +             return -1;
>       }
>       res = api->object_key_get_typed (resource_obj,
>               "state", (void*)&state,&len,&type);
> @@ -491,7 +541,7 @@ static void wd_resource_create (hdb_handle_t resource_obj)
>               */
>               log_printf (LOGSYS_LEVEL_WARNING,
>                       "resource %s missing a state key.", ref->name);
> -             return;
> +             return -1;
>       }
>
>       res = api->object_key_get_typed (resource_obj,
> @@ -502,11 +552,16 @@ static void wd_resource_create (hdb_handle_t 
> resource_obj)
>               ref->last_updated = 0;
>       }
>
> -     api->timer_add_duration((unsigned long 
> long)ref->check_timeout*1000000000,
> +     /*
> +      * delay the first check to give the monitor time to start working.
> +      */
> +     tmp_value = CS_MAX(ref->check_timeout * 2, WD_DEFAULT_TIMEOUT_MS);
> +     api->timer_add_duration(tmp_value * MILLI_2_NANO_SECONDS,
>               ref,
>               wd_resource_check_fn,&ref->check_timer);
>
> -     cs_fsm_state_set(&ref->fsm, WD_S_GOOD, ref);
> +     cs_fsm_state_set(&ref->fsm, WD_S_RUNNING, ref);
> +     return 0;
>   }
>
>
> @@ -515,15 +570,16 @@ static void wd_tickle_fn (void* arg)
>       ENTER();
>
>       if (watchdog_ok) {
> -             if (dog>  0)
> +             if (dog>  0) {
>                       ioctl(dog, WDIOC_KEEPALIVE,&watchdog_ok);
> +             }
> +             api->timer_add_duration(tickle_timeout*MILLI_2_NANO_SECONDS, 
> NULL,
> +                     wd_tickle_fn,&wd_timer);
>       }
>       else {
>               log_printf (LOGSYS_LEVEL_ALERT, "NOT tickling the watchdog!");
>       }
>
> -     api->timer_add_duration((unsigned long long)tickle_timeout*1000000000, 
> NULL,
> -                             wd_tickle_fn,&wd_timer);
>   }
>
>   static void wd_resource_object_created(hdb_handle_t parent_object_handle,
> @@ -540,7 +596,7 @@ static void wd_scan_resources (void)
>       hdb_handle_t obj_finder2;
>       hdb_handle_t resource_type;
>       hdb_handle_t resource;
> -     int res;
> +     int res_count = 0;
>
>       ENTER();
>
> @@ -549,12 +605,8 @@ static void wd_scan_resources (void)
>               "resources", strlen ("resources"),
>               &obj_finder);
>
> -     res = api->object_find_next (obj_finder,&resources_obj);
> +     api->object_find_next (obj_finder,&resources_obj);
>       api->object_find_destroy (obj_finder);
> -     if (res != 0) {
> -             log_printf (LOGSYS_LEVEL_INFO, "no resources.");
> -             return;
> -     }
>
>       /* this will be the system or process level
>        */
> @@ -573,7 +625,9 @@ static void wd_scan_resources (void)
>               while (api->object_find_next (obj_finder2,
>                               &resource) == 0) {
>
> -                     wd_resource_create (resource);
> +                     if (wd_resource_create (resource) == 0) {
> +                             res_count++;
> +                     }
>               }
>               api->object_find_destroy (obj_finder2);
>
> @@ -582,23 +636,23 @@ static void wd_scan_resources (void)
>                       NULL, NULL);
>       }
>       api->object_find_destroy (obj_finder);
> +     if (res_count == 0) {
> +             log_printf (LOGSYS_LEVEL_INFO, "no resources configured.");
> +     }
>   }
>
>
>   static void watchdog_timeout_apply (uint32_t new)
>   {
>       struct watchdog_info ident;
> +     uint32_t original_timeout = watchdog_timeout;
>
> -     if (new<  2) {
> -             watchdog_timeout = 2;
> -     }
> -     else if (new>  120) {
> -             watchdog_timeout = 120;
> -     }
> -     else {
> -             watchdog_timeout = new;
> +     if (new == original_timeout) {
> +             return;
>       }
>
> +     watchdog_timeout = new;
> +
>       if (dog>  0) {
>               ioctl(dog, WDIOC_GETSUPPORT,&ident);
>               if (ident.options&  WDIOF_SETTIMEOUT) {
> @@ -608,10 +662,24 @@ static void watchdog_timeout_apply (uint32_t new)
>               }
>               ioctl(dog, WDIOC_GETTIMEOUT,&watchdog_timeout);
>       }
> -     tickle_timeout = watchdog_timeout / 2;
>
> -     log_printf (LOGSYS_LEVEL_DEBUG, "The Watchdog timeout is %d seconds\n", 
> watchdog_timeout);
> -     log_printf (LOGSYS_LEVEL_DEBUG, "The tickle timeout is %d seconds\n", 
> tickle_timeout);
> +     if (watchdog_timeout == new) {
> +             tickle_timeout = (watchdog_timeout * CS_TIME_MS_IN_SEC)/ 2;
> +
> +             /* reset the tickle timer in case it was reduced.
> +              */
> +             api->timer_delete (wd_timer);
> +             api->timer_add_duration(tickle_timeout*MILLI_2_NANO_SECONDS, 
> NULL,
> +                     wd_tickle_fn,&wd_timer);
> +
> +             log_printf (LOGSYS_LEVEL_DEBUG, "The Watchdog timeout is %d 
> seconds\n", watchdog_timeout);
> +             log_printf (LOGSYS_LEVEL_DEBUG, "The tickle timeout is 
> %"PRIu64" ms\n", tickle_timeout);
> +     } else {
> +             log_printf (LOGSYS_LEVEL_WARNING,
> +                     "Could not change the Watchdog timeout from %d to %d 
> seconds\n",
> +                     original_timeout, new);
> +     }
> +
>   }
>
>   static int setup_watchdog(void)
> @@ -658,28 +726,30 @@ static void 
> wd_top_level_key_changed(object_change_type_t change_type,
>       const void *key_value_pt, size_t key_value_len,
>       void *priv_data_pt)
>   {
> -     uint32_t tmp_value;
> +     uint64_t tmp_value;
> +     int32_t tmp_value_32;
>
>       ENTER();
>       if (change_type != OBJECT_KEY_DELETED&&
>               strncmp ((char*)key_name_pt, "watchdog_timeout", key_value_len) 
> == 0) {
> -             tmp_value = strtol (key_value_pt, NULL, 0);
> -             watchdog_timeout_apply (tmp_value);
> +             if (str_to_uint64_t(key_value_pt,&tmp_value, 2, 120) == CS_OK) {
> +                     tmp_value_32 = tmp_value;
> +                     watchdog_timeout_apply (tmp_value_32);
> +             }
>       }
>       else {
> -             watchdog_timeout_apply (WD_DEFAULT_TIMEOUT);
> +             watchdog_timeout_apply (WD_DEFAULT_TIMEOUT_SEC);
>       }
> -     log_printf (LOGSYS_LEVEL_INFO, "new(%d) tickle_timeout: %d", 
> change_type, tickle_timeout);
>   }
>
> -
>   static void watchdog_timeout_get_initial (void)
>   {
>       int32_t res;
>       char watchdog_timeout_str[32];
>       size_t watchdog_timeout_len;
>       objdb_value_types_t watchdog_timeout_type;
> -     uint32_t tmp_value;
> +     uint32_t tmp_value_32;
> +     uint64_t tmp_value;
>
>       ENTER();
>
> @@ -688,7 +758,7 @@ static void watchdog_timeout_get_initial (void)
>                       (void**)&watchdog_timeout_str,&watchdog_timeout_len,
>                       &watchdog_timeout_type);
>       if (res != 0) {
> -             watchdog_timeout_apply (WD_DEFAULT_TIMEOUT);
> +             watchdog_timeout_apply (WD_DEFAULT_TIMEOUT_SEC);
>
>               watchdog_timeout_len = snprintf (watchdog_timeout_str, 32, 
> "%d", watchdog_timeout);
>               api->object_key_create_typed (resources_obj,
> @@ -697,8 +767,12 @@ static void watchdog_timeout_get_initial (void)
>                       OBJDB_VALUETYPE_STRING);
>       }
>       else {
> -             tmp_value = strtol (watchdog_timeout_str, NULL, 0);
> -             watchdog_timeout_apply (tmp_value);
> +             if (str_to_uint64_t(watchdog_timeout_str,&tmp_value, 2, 120) == 
> CS_OK) {
> +                     tmp_value_32 = tmp_value;
> +                     watchdog_timeout_apply (tmp_value_32);
> +             } else {
> +                     watchdog_timeout_apply (WD_DEFAULT_TIMEOUT_SEC);
> +             }
>       }
>
>       api->object_track_start (resources_obj, OBJECT_TRACK_DEPTH_ONE,
> @@ -734,7 +808,7 @@ static int wd_exec_init_fn (
>
>       wd_scan_resources();
>
> -     api->timer_add_duration((unsigned long long)tickle_timeout*1000000000, 
> NULL,
> +     api->timer_add_duration(tickle_timeout*MILLI_2_NANO_SECONDS, NULL,
>                               wd_tickle_fn,&wd_timer);
>
>       return 0;
> diff --git a/test/testsam.c b/test/testsam.c
> index 1972d9e..d29605a 100644
> --- a/test/testsam.c
> +++ b/test/testsam.c
> @@ -876,6 +876,7 @@ static int test8 (pid_t pid, pid_t old_pid, int test_n) {
>       hdb_handle_t res_handle, proc_handle, pid_handle;
>       size_t value_len;
>       uint64_t tstamp1, tstamp2;
> +     int32_t msec_diff;
>       char key_value[256];
>       unsigned int instance_id;
>       char tmp_obj[PATH_MAX];
> @@ -1008,8 +1009,8 @@ static int test8 (pid_t pid, pid_t old_pid, int test_n) 
> {
>                       return (2);
>               }
>
> -             if (value_len != strlen ("registered") || memcmp (key_value, 
> "registered", value_len) != 0) {
> -                     printf ("State key is not \"registered\".\n");
> +             if (value_len != strlen ("stopped") || memcmp (key_value, 
> "stopped", value_len) != 0) {
> +                     printf ("State key is not \"stopped\".\n");
>                       return (2);
>               }
>
> @@ -1026,8 +1027,8 @@ static int test8 (pid_t pid, pid_t old_pid, int test_n) 
> {
>                       return (2);
>               }
>
> -             if (value_len != strlen ("started") || memcmp (key_value, 
> "started", value_len) != 0) {
> -                     printf ("State key is not \"started\".\n");
> +             if (value_len != strlen ("running") || memcmp (key_value, 
> "running", value_len) != 0) {
> +                     printf ("State key is not \"running\".\n");
>                       return (2);
>               }
>
> @@ -1044,8 +1045,8 @@ static int test8 (pid_t pid, pid_t old_pid, int test_n) 
> {
>                       return (2);
>               }
>
> -             if (value_len != strlen ("registered") || memcmp (key_value, 
> "registered", value_len) != 0) {
> -                     printf ("State key is not \"registered\".\n");
> +             if (value_len != strlen ("stopped") || memcmp (key_value, 
> "stopped", value_len) != 0) {
> +                     printf ("State key is not \"stopped\".\n");
>                       return (2);
>               }
>
> @@ -1058,8 +1059,8 @@ static int test8 (pid_t pid, pid_t old_pid, int test_n) 
> {
>                       return (2);
>               }
>
> -             if (value_len != strlen ("registered") || memcmp (key_value, 
> "registered", value_len) != 0) {
> -                     printf ("State key is not \"registered\".\n");
> +             if (value_len != strlen ("stopped") || memcmp (key_value, 
> "stopped", value_len) != 0) {
> +                     printf ("State key is not \"stopped\".\n");
>                       return (2);
>               }
>
> @@ -1076,8 +1077,8 @@ static int test8 (pid_t pid, pid_t old_pid, int test_n) 
> {
>                       return (2);
>               }
>
> -             if (value_len != strlen ("started") || memcmp (key_value, 
> "started", value_len) != 0) {
> -                     printf ("State key is not \"started\".\n");
> +             if (value_len != strlen ("running") || memcmp (key_value, 
> "running", value_len) != 0) {
> +                     printf ("State key is not \"running\".\n");
>                       return (2);
>               }
>
> @@ -1093,7 +1094,7 @@ static int test8 (pid_t pid, pid_t old_pid, int test_n) 
> {
>                               fprintf (stderr, "Can't send hc. Error %d\n", 
> err);
>                               return 2;
>                       }
> -                     err = confdb_key_get_typed (cdb_handle, pid_handle, 
> "hc_last",&tstamp1,&value_len,&cdbtype);
> +                     err = confdb_key_get_typed (cdb_handle, pid_handle, 
> "last_updated",&tstamp1,&value_len,&cdbtype);
>                       if (err != CS_OK) {
>                               printf ("Could not get \"state\" key: %d.\n", 
> err);
>                               return (2);
> @@ -1106,13 +1107,15 @@ static int test8 (pid_t pid, pid_t old_pid, int 
> test_n) {
>                               return 2;
>                       }
>                       sleep (1);
> -                     err = confdb_key_get_typed (cdb_handle, pid_handle, 
> "hc_last",&tstamp2,&value_len,&cdbtype);
> +                     err = confdb_key_get_typed (cdb_handle, pid_handle, 
> "last_updated",&tstamp2,&value_len,&cdbtype);
>                       if (err != CS_OK) {
>                               printf ("Could not get \"state\" key: %d.\n", 
> err);
>                               return (2);
>                       }
> -                     if (tstamp2 - tstamp1<  500 || tstamp2 - tstamp1>  
> 2000) {
> -                             printf ("Difference %d is not within<500, 2000> 
>  interval.\n", (int)(tstamp2 - tstamp1));
> +                     msec_diff = (tstamp2 - tstamp1)/CS_TIME_NS_IN_MSEC;
> +
> +                     if (msec_diff<  500 || msec_diff>  2000) {
> +                             printf ("Difference %d is not within<500, 2000> 
>  interval.\n", msec_diff);
>                               return (2);
>                       }
>
> @@ -1129,8 +1132,8 @@ static int test8 (pid_t pid, pid_t old_pid, int test_n) 
> {
>                               return (2);
>                       }
>
> -                     if (value_len != strlen ("registered") || memcmp 
> (key_value, "registered", value_len) != 0) {
> -                             printf ("State key is not \"registered\".\n");
> +                     if (value_len != strlen ("stopped") || memcmp 
> (key_value, "stopped", value_len) != 0) {
> +                             printf ("State key is not \"stopped\".\n");
>                               return (2);
>                       }
>
> @@ -1297,8 +1300,8 @@ static int test9 (pid_t pid, pid_t old_pid, int test_n) 
> {
>                               return (2);
>                       }
>
> -                     if (value_len != strlen ("registered") || memcmp 
> (key_value, "registered", value_len) != 0) {
> -                             printf ("State key is not \"registered\".\n");
> +                     if (value_len != strlen ("stopped") || memcmp 
> (key_value, "stopped", value_len) != 0) {
> +                             printf ("State key is not \"stopped\".\n");
>                               return (2);
>                       }
>
> @@ -1315,8 +1318,8 @@ static int test9 (pid_t pid, pid_t old_pid, int test_n) 
> {
>                               return (2);
>                       }
>
> -                     if (value_len != strlen ("started") || memcmp 
> (key_value, "started", value_len) != 0) {
> -                             printf ("State key is not \"started\".\n");
> +                     if (value_len != strlen ("running") || memcmp 
> (key_value, "running", value_len) != 0) {
> +                             printf ("State key is not \"running\".\n");
>                               return (2);
>                       }
>

_______________________________________________
Openais mailing list
Openais@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/openais

Reply via email to