From: Jan Friesse <jfrie...@redhat.com> Patch add support for Confdb integration with SAM. It's now possible to use SAM_RECOVERY_POLICY_CONFDB as flag to previous policies.
Also new function sam_mark_failed is added for ability to use RECOVERY policy together with confdb and get expected results (specially with integration with corosync watchdog) Signed-off-by: Angus Salkeld <asalk...@redhat.com> Signed-off-by: Jan Friesse <jfrie...@redhat.com> Signed-off-by: Angus Salkeld <asalk...@redhat.com> --- cts/agents/Makefile.am | 2 +- include/corosync/sam.h | 13 + lib/Makefile.am | 2 +- lib/libsam.verso | 2 +- lib/sam.c | 508 +++++++++++++++++++++++++++++++++----- man/Makefile.am | 1 + man/sam_initialize.3 | 6 +- man/sam_mark_failed.3 | 73 ++++++ man/sam_overview.8 | 35 +++- test/testsam.c | 643 +++++++++++++++++++++++++++++++++++++++++++++++- 10 files changed, 1212 insertions(+), 73 deletions(-) create mode 100644 man/sam_mark_failed.3 diff --git a/cts/agents/Makefile.am b/cts/agents/Makefile.am index c1a7f85..1d73a9b 100644 --- a/cts/agents/Makefile.am +++ b/cts/agents/Makefile.am @@ -66,7 +66,7 @@ confdb_test_agent_LDADD = -lconfdb -lcoroipcc ../../exec/coropoll.o confdb_test_agent_LDFLAGS = -L../../lib sam_test_agent_SOURCES = sam_test_agent.c common_test_agent.c -sam_test_agent_LDADD = -lsam -lquorum -lcoroipcc ../../exec/coropoll.o +sam_test_agent_LDADD = -lsam -lquorum -lcoroipcc -lconfdb ../../exec/coropoll.o sam_test_agent_LDFLAGS = -L../../lib votequorum_test_agent_SOURCES = votequorum_test_agent.c common_test_agent.c diff --git a/include/corosync/sam.h b/include/corosync/sam.h index 41727c2..30401a0 100644 --- a/include/corosync/sam.h +++ b/include/corosync/sam.h @@ -46,6 +46,7 @@ typedef enum { SAM_RECOVERY_POLICY_QUORUM = 0x08, SAM_RECOVERY_POLICY_QUORUM_QUIT = SAM_RECOVERY_POLICY_QUORUM | SAM_RECOVERY_POLICY_QUIT, SAM_RECOVERY_POLICY_QUORUM_RESTART = SAM_RECOVERY_POLICY_QUORUM | SAM_RECOVERY_POLICY_RESTART, + SAM_RECOVERY_POLICY_CONFDB = 0x10, } sam_recovery_policy_t; /* @@ -205,6 +206,18 @@ cs_error_t sam_data_store ( const void *data, size_t size); +/* + * Marks child as failed. This can be called only with SAM_RECOVERY_POLICY_CONFDB flag set and + * makes sense only for SAM_RECOVERY_POLICY_RESTART. This will kill child without sending warn + * signal. Confdb state key will be set to failed. + * + * - CS_OK in case no problem appeared + * - CS_ERR_BAD_HANDLE library was not initialized or was already finalized + * - CS_ERR_INVALID_PARAM recovery policy doesn't has SAM_RECOVERY_POLICY_CONFDB flag set + * - CS_ERR_LIBRARY if some internal error appeared (communication with parent + * process) + */ +cs_error_t sam_mark_failed (void); #ifdef __cplusplus } diff --git a/lib/Makefile.am b/lib/Makefile.am index 7aca53b..c4ee3c0 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -62,7 +62,7 @@ libvotequorum_a_SOURCES = votequorum.c libconfdb_a_SOURCES = confdb.c sa-confdb.c libconfdb_a_LIBADD = ../lcr/lcr_ifact.o CONFDB_LINKER_ADD = $(OS_DYFLAGS) $(OS_LDL) -SAM_LINKER_ADD = -L. -lquorum +SAM_LINKER_ADD = -L. -lquorum -lconfdb libcoroipcc_a_SOURCES = coroipcc.c libsam_a_SOURCES = sam.c diff --git a/lib/libsam.verso b/lib/libsam.verso index 8089590..fdc6698 100644 --- a/lib/libsam.verso +++ b/lib/libsam.verso @@ -1 +1 @@ -4.3.0 +4.4.0 diff --git a/lib/sam.c b/lib/sam.c index a3d1cd0..53020ac 100644 --- a/lib/sam.c +++ b/lib/sam.c @@ -42,6 +42,7 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> +#include <sys/time.h> #include <sys/types.h> #include <sys/socket.h> #include <errno.h> @@ -50,6 +51,7 @@ #include <corosync/coroipc_types.h> #include <corosync/coroipcc.h> #include <corosync/corodefs.h> +#include <corosync/confdb.h> #include <corosync/hdb.h> #include <corosync/quorum.h> @@ -61,6 +63,15 @@ #include <sys/wait.h> #include <signal.h> +#define SAM_CONFDB_S_FAILED "failed" +#define SAM_CONFDB_S_REGISTERED "registered" +#define SAM_CONFDB_S_STARTED "started" +#define SAM_CONFDB_S_Q_WAIT "waiting for quorum" + +#define SAM_RP_MASK_Q(pol) (pol & (~SAM_RECOVERY_POLICY_QUORUM)) +#define SAM_RP_MASK_C(pol) (pol & (~SAM_RECOVERY_POLICY_CONFDB)) +#define SAM_RP_MASK(pol) (pol & (~(SAM_RECOVERY_POLICY_QUORUM | SAM_RECOVERY_POLICY_CONFDB))) + enum sam_internal_status_t { SAM_INTERNAL_STATUS_NOT_INITIALIZED = 0, SAM_INTERNAL_STATUS_INITIALIZED, @@ -75,6 +86,7 @@ enum sam_command_t { SAM_COMMAND_HB, SAM_COMMAND_DATA_STORE, SAM_COMMAND_WARN_SIGNAL_SET, + SAM_COMMAND_MARK_FAILED, }; enum sam_reply_t { @@ -89,6 +101,13 @@ enum sam_parent_action_t { SAM_PARENT_ACTION_CONTINUE }; +enum sam_confdb_key_t { + SAM_CONFDB_KEY_RECOVERY, + SAM_CONFDB_KEY_HC_PERIOD, + SAM_CONFDB_KEY_LAST_HC, + SAM_CONFDB_KEY_STATE, +}; + static struct { int time_interval; sam_recovery_policy_t recovery_policy; @@ -109,11 +128,156 @@ static struct { size_t user_data_size; size_t user_data_allocated; + pthread_mutex_t lock; + quorum_handle_t quorum_handle; uint32_t quorate; int quorum_fd; + + confdb_handle_t confdb_handle; + hdb_handle_t confdb_pid_handle; } sam_internal_data; +extern const char *__progname; + +static cs_error_t sam_confdb_update_key (enum sam_confdb_key_t key, const char *value) +{ + cs_error_t err; + const char *svalue; + uint64_t hc_period, last_hc; + struct timeval tv; + const char *ssvalue[] = { [SAM_RECOVERY_POLICY_QUIT] = "quit", [SAM_RECOVERY_POLICY_RESTART] = "restart" }; + + switch (key) { + case SAM_CONFDB_KEY_RECOVERY: + svalue = ssvalue[SAM_RP_MASK (sam_internal_data.recovery_policy)]; + + if ((err = confdb_key_create_typed (sam_internal_data.confdb_handle, sam_internal_data.confdb_pid_handle, + "recovery", svalue, strlen ((const char *)svalue), CONFDB_VALUETYPE_STRING)) != CS_OK) { + goto exit_error; + } + break; + case SAM_CONFDB_KEY_HC_PERIOD: + hc_period = sam_internal_data.time_interval; + + if ((err = confdb_key_create_typed (sam_internal_data.confdb_handle, sam_internal_data.confdb_pid_handle, + "hc_period", &hc_period, sizeof (uint64_t), CONFDB_VALUETYPE_UINT64)) != CS_OK) { + goto exit_error; + } + break; + case SAM_CONFDB_KEY_LAST_HC: + if (gettimeofday (&tv, NULL) == -1) { + last_hc = 0; + } else { + last_hc = ((uint64_t)tv.tv_sec * 1000) + ((uint64_t)tv.tv_usec / 1000); + } + + if ((err = confdb_key_create_typed (sam_internal_data.confdb_handle, sam_internal_data.confdb_pid_handle, + "hc_last", &last_hc, sizeof (uint64_t), CONFDB_VALUETYPE_UINT64)) != CS_OK) { + goto exit_error; + } + break; + case SAM_CONFDB_KEY_STATE: + svalue = value; + if ((err = confdb_key_create_typed (sam_internal_data.confdb_handle, sam_internal_data.confdb_pid_handle, + "state", svalue, strlen ((const char *)svalue), CONFDB_VALUETYPE_STRING)) != CS_OK) { + goto exit_error; + } + break; + } + + return (CS_OK); + +exit_error: + return (err); +} + +static cs_error_t sam_confdb_destroy_pid_obj (void) +{ + return (confdb_object_destroy (sam_internal_data.confdb_handle, sam_internal_data.confdb_pid_handle)); +} + +static cs_error_t sam_confdb_register (void) +{ + const char *obj_name; + cs_error_t err; + confdb_handle_t confdb_handle; + hdb_handle_t resource_handle, process_handle, pid_handle, obj_handle; + hdb_handle_t *res_handle; + char tmp_obj[PATH_MAX]; + int i; + + if ((err = confdb_initialize (&confdb_handle, NULL)) != CS_OK) { + return (err); + } + + for (i = 0; i < 3; i++) { + switch (i) { + case 0: + obj_name = "resources"; + obj_handle = OBJECT_PARENT_HANDLE; + res_handle = &resource_handle; + break; + case 1: + obj_name = "process"; + obj_handle = resource_handle; + res_handle = &process_handle; + break; + case 2: + if (snprintf (tmp_obj, sizeof (tmp_obj), "%s:%d", __progname, getpid ()) >= sizeof (tmp_obj)) { + snprintf (tmp_obj, sizeof (tmp_obj), "%d", getpid ()); + } + + obj_name = tmp_obj; + obj_handle = process_handle; + res_handle = &pid_handle; + break; + } + + if ((err = confdb_object_find_start (confdb_handle, obj_handle)) != CS_OK) { + goto finalize_error; + } + + if ((err = confdb_object_find (confdb_handle, obj_handle, obj_name, strlen (obj_name), + res_handle)) != CS_OK) { + if (err == CONFDB_ERR_ACCESS) { + /* + * Try to create object + */ + if ((err = confdb_object_create (confdb_handle, obj_handle, obj_name, + strlen (obj_name), res_handle)) != CS_OK) { + goto finalize_error; + } + } else { + goto finalize_error; + } + } else { + if ((err = confdb_object_find_destroy (confdb_handle, obj_handle)) != CS_OK) { + goto finalize_error; + } + } + } + + sam_internal_data.confdb_pid_handle = pid_handle; + sam_internal_data.confdb_handle = confdb_handle; + + if ((err = sam_confdb_update_key (SAM_CONFDB_KEY_RECOVERY, NULL)) != CS_OK) { + goto destroy_finalize_error; + } + + if ((err = sam_confdb_update_key (SAM_CONFDB_KEY_HC_PERIOD, NULL)) != CS_OK) { + goto destroy_finalize_error; + } + + return (CS_OK); + +destroy_finalize_error: + sam_confdb_destroy_pid_obj (); +finalize_error: + confdb_finalize (confdb_handle); + return (err); +} + static void quorum_notification_fn ( quorum_handle_t handle, uint32_t quorate, @@ -135,8 +299,8 @@ cs_error_t sam_initialize ( return (CS_ERR_BAD_HANDLE); } - if (recovery_policy != SAM_RECOVERY_POLICY_QUIT && recovery_policy != SAM_RECOVERY_POLICY_RESTART && - recovery_policy != SAM_RECOVERY_POLICY_QUORUM_QUIT && recovery_policy != SAM_RECOVERY_POLICY_QUORUM_RESTART) { + if (SAM_RP_MASK (recovery_policy) != SAM_RECOVERY_POLICY_QUIT && + SAM_RP_MASK (recovery_policy) != SAM_RECOVERY_POLICY_RESTART) { return (CS_ERR_INVALID_PARAM); } @@ -178,6 +342,8 @@ cs_error_t sam_initialize ( sam_internal_data.user_data_size = 0; sam_internal_data.user_data_allocated = 0; + pthread_mutex_init (&sam_internal_data.lock, NULL); + return (CS_OK); exit_error_quorum: @@ -290,8 +456,12 @@ cs_error_t sam_data_getsize (size_t *size) return (CS_ERR_BAD_HANDLE); } + pthread_mutex_lock (&sam_internal_data.lock); + *size = sam_internal_data.user_data_size; + pthread_mutex_unlock (&sam_internal_data.lock); + return (CS_OK); } @@ -299,6 +469,10 @@ cs_error_t sam_data_restore ( void *data, size_t size) { + cs_error_t err; + + err = CS_OK; + if (data == NULL) { return (CS_ERR_INVALID_PARAM); } @@ -310,17 +484,30 @@ cs_error_t sam_data_restore ( return (CS_ERR_BAD_HANDLE); } + pthread_mutex_lock (&sam_internal_data.lock); + if (sam_internal_data.user_data_size == 0) { - return (CS_OK); + err = CS_OK; + + goto error_unlock; } if (size < sam_internal_data.user_data_size) { - return (CS_ERR_INVALID_PARAM); + err = CS_ERR_INVALID_PARAM; + + goto error_unlock; } memcpy (data, sam_internal_data.user_data, sam_internal_data.user_data_size); + pthread_mutex_unlock (&sam_internal_data.lock); + return (CS_OK); + +error_unlock: + pthread_mutex_unlock (&sam_internal_data.lock); + + return (err); } cs_error_t sam_data_store ( @@ -343,28 +530,36 @@ cs_error_t sam_data_store ( size = 0; } + pthread_mutex_lock (&sam_internal_data.lock); + if (sam_internal_data.am_i_child) { /* * We are child so we must send data to parent */ command = SAM_COMMAND_DATA_STORE; if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) != sizeof (command)) { - return (CS_ERR_LIBRARY); + err = CS_ERR_LIBRARY; + + goto error_unlock; } if (sam_safe_write (sam_internal_data.child_fd_out, &size, sizeof (size)) != sizeof (size)) { - return (CS_ERR_LIBRARY); + err = CS_ERR_LIBRARY; + + goto error_unlock; } if (data != NULL && sam_safe_write (sam_internal_data.child_fd_out, data, size) != size) { - return (CS_ERR_LIBRARY); + err = CS_ERR_LIBRARY; + + goto error_unlock; } /* * And wait for reply */ if ((err = sam_read_reply (sam_internal_data.child_fd_in)) != CS_OK) { - return (err); + goto error_unlock; } } @@ -379,7 +574,9 @@ cs_error_t sam_data_store ( } else { if (sam_internal_data.user_data_allocated < size) { if ((new_data = realloc (sam_internal_data.user_data, size)) == NULL) { - return (CS_ERR_NO_MEMORY); + err = CS_ERR_NO_MEMORY; + + goto error_unlock; } sam_internal_data.user_data_allocated = size; @@ -392,30 +589,53 @@ cs_error_t sam_data_store ( memcpy (sam_internal_data.user_data, data, size); } + pthread_mutex_unlock (&sam_internal_data.lock); + return (CS_OK); + +error_unlock: + pthread_mutex_unlock (&sam_internal_data.lock); + + return (err); } cs_error_t sam_start (void) { char command; cs_error_t err; + sam_recovery_policy_t recpol; if (sam_internal_data.internal_status != SAM_INTERNAL_STATUS_REGISTERED) { return (CS_ERR_BAD_HANDLE); } + recpol = sam_internal_data.recovery_policy; + + if (recpol & SAM_RECOVERY_POLICY_QUORUM || recpol & SAM_RECOVERY_POLICY_CONFDB) { + pthread_mutex_lock (&sam_internal_data.lock); + } + command = SAM_COMMAND_START; - if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) != sizeof (command)) + if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) != sizeof (command)) { + if (recpol & SAM_RECOVERY_POLICY_QUORUM || recpol & SAM_RECOVERY_POLICY_CONFDB) { + pthread_mutex_unlock (&sam_internal_data.lock); + } + return (CS_ERR_LIBRARY); + } - if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_QUORUM) { + if (recpol & SAM_RECOVERY_POLICY_QUORUM || recpol & SAM_RECOVERY_POLICY_CONFDB) { /* * Wait for parent reply */ if ((err = sam_read_reply (sam_internal_data.child_fd_in)) != CS_OK) { + pthread_mutex_unlock (&sam_internal_data.lock); + return (err); } + + pthread_mutex_unlock (&sam_internal_data.lock); } if (sam_internal_data.hc_callback) @@ -430,6 +650,7 @@ cs_error_t sam_start (void) cs_error_t sam_stop (void) { char command; + cs_error_t err; if (sam_internal_data.internal_status != SAM_INTERNAL_STATUS_STARTED) { return (CS_ERR_BAD_HANDLE); @@ -437,8 +658,30 @@ cs_error_t sam_stop (void) command = SAM_COMMAND_STOP; - if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) != sizeof (command)) + if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_CONFDB) { + pthread_mutex_lock (&sam_internal_data.lock); + } + + if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) != sizeof (command)) { + if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_CONFDB) { + pthread_mutex_unlock (&sam_internal_data.lock); + } + return (CS_ERR_LIBRARY); + } + + if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_CONFDB) { + /* + * Wait for parent reply + */ + if ((err = sam_read_reply (sam_internal_data.child_fd_in)) != CS_OK) { + pthread_mutex_unlock (&sam_internal_data.lock); + + return (err); + } + + pthread_mutex_unlock (&sam_internal_data.lock); + } if (sam_internal_data.hc_callback) if (sam_safe_write (sam_internal_data.cb_wpipe_fd, &command, sizeof (command)) != sizeof (command)) @@ -489,6 +732,26 @@ exit_error: return (CS_OK); } +cs_error_t sam_mark_failed (void) +{ + char command; + + if (sam_internal_data.internal_status != SAM_INTERNAL_STATUS_STARTED && + sam_internal_data.internal_status != SAM_INTERNAL_STATUS_REGISTERED) { + return (CS_ERR_BAD_HANDLE); + } + + if (!(sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_CONFDB)) { + return (CS_ERR_INVALID_PARAM); + } + + command = SAM_COMMAND_MARK_FAILED; + + if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) != sizeof (command)) + return (CS_ERR_LIBRARY); + + return (CS_OK); +} cs_error_t sam_warn_signal_set (int warn_signal) { @@ -501,25 +764,31 @@ cs_error_t sam_warn_signal_set (int warn_signal) return (CS_ERR_BAD_HANDLE); } + pthread_mutex_lock (&sam_internal_data.lock); + if (sam_internal_data.am_i_child) { /* * We are child so we must send data to parent */ command = SAM_COMMAND_WARN_SIGNAL_SET; if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) != sizeof (command)) { - return (CS_ERR_LIBRARY); + err = CS_ERR_LIBRARY; + + goto error_unlock; } if (sam_safe_write (sam_internal_data.child_fd_out, &warn_signal, sizeof (warn_signal)) != sizeof (warn_signal)) { - return (CS_ERR_LIBRARY); + err = CS_ERR_LIBRARY; + + goto error_unlock; } /* * And wait for reply */ if ((err = sam_read_reply (sam_internal_data.child_fd_in)) != CS_OK) { - return (err); + goto error_unlock; } } @@ -528,14 +797,51 @@ cs_error_t sam_warn_signal_set (int warn_signal) */ sam_internal_data.warn_signal = warn_signal; + pthread_mutex_unlock (&sam_internal_data.lock); + return (CS_OK); + +error_unlock: + pthread_mutex_unlock (&sam_internal_data.lock); + + return (err); } -static cs_error_t sam_parent_warn_signal_set ( +static cs_error_t sam_parent_reply_send ( + cs_error_t err, int parent_fd_in, int parent_fd_out) { char reply; + + if (err == CS_OK) { + reply = SAM_REPLY_OK; + + if (sam_safe_write (parent_fd_out, &reply, sizeof (reply)) != sizeof (reply)) { + err = CS_ERR_LIBRARY; + goto error_reply; + } + + return (CS_OK); + } + +error_reply: + reply = SAM_REPLY_ERROR; + if (sam_safe_write (parent_fd_out, &reply, sizeof (reply)) != sizeof (reply)) { + return (CS_ERR_LIBRARY); + } + if (sam_safe_write (parent_fd_out, &err, sizeof (err)) != sizeof (err)) { + return (CS_ERR_LIBRARY); + } + + return (err); +} + + +static cs_error_t sam_parent_warn_signal_set ( + int parent_fd_in, + int parent_fd_out) +{ char *user_data; int warn_signal; cs_error_t err; @@ -553,35 +859,27 @@ static cs_error_t sam_parent_warn_signal_set ( goto error_reply; } - reply = SAM_REPLY_OK; - if (sam_safe_write (parent_fd_out, &reply, sizeof (reply)) != sizeof (reply)) { - err = CS_ERR_LIBRARY; - goto error_reply; - } - return (CS_OK); + return (sam_parent_reply_send (CS_OK, parent_fd_in, parent_fd_out)); error_reply: - reply = SAM_REPLY_ERROR; - if (sam_safe_write (parent_fd_out, &reply, sizeof (reply)) != sizeof (reply)) { - return (CS_ERR_LIBRARY); - } - if (sam_safe_write (parent_fd_out, &err, sizeof (err)) != sizeof (err)) { - return (CS_ERR_LIBRARY); - } - - return (err); + return (sam_parent_reply_send (err, parent_fd_in, parent_fd_out)); } static cs_error_t sam_parent_wait_for_quorum ( int parent_fd_in, int parent_fd_out) { - char reply; cs_error_t err; struct pollfd pfds[2]; int poll_err; + if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_CONFDB) { + if ((err = sam_confdb_update_key (SAM_CONFDB_KEY_STATE, SAM_CONFDB_S_Q_WAIT)) != CS_OK) { + goto error_reply; + } + } + /* * Update current quorum */ @@ -630,24 +928,44 @@ static cs_error_t sam_parent_wait_for_quorum ( } } - reply = SAM_REPLY_OK; - if (sam_safe_write (parent_fd_out, &reply, sizeof (reply)) != sizeof (reply)) { - err = CS_ERR_LIBRARY; - goto error_reply; + if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_CONFDB) { + if ((err = sam_confdb_update_key (SAM_CONFDB_KEY_STATE, SAM_CONFDB_S_STARTED)) != CS_OK) { + goto error_reply; + } } - return (CS_OK); + return (sam_parent_reply_send (CS_OK, parent_fd_in, parent_fd_out)); error_reply: - reply = SAM_REPLY_ERROR; - if (sam_safe_write (parent_fd_out, &reply, sizeof (reply)) != sizeof (reply)) { - return (CS_ERR_LIBRARY); + if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_CONFDB) { + sam_confdb_update_key (SAM_CONFDB_KEY_STATE, SAM_CONFDB_S_REGISTERED); } - if (sam_safe_write (parent_fd_out, &err, sizeof (err)) != sizeof (err)) { - return (CS_ERR_LIBRARY); + + return (sam_parent_reply_send (err, parent_fd_in, parent_fd_out)); +} + +static cs_error_t sam_parent_confdb_state_set ( + int parent_fd_in, + int parent_fd_out, + int state) +{ + cs_error_t err; + const char *state_s; + + if (state == 1) { + state_s = SAM_CONFDB_S_STARTED; + } else { + state_s = SAM_CONFDB_S_REGISTERED; } - return (err); + if ((err = sam_confdb_update_key (SAM_CONFDB_KEY_STATE, state_s)) != CS_OK) { + goto error_reply; + } + + return (sam_parent_reply_send (CS_OK, parent_fd_in, parent_fd_out)); + +error_reply: + return (sam_parent_reply_send (err, parent_fd_in, parent_fd_out)); } static cs_error_t sam_parent_kill_child ( @@ -675,12 +993,26 @@ static cs_error_t sam_parent_kill_child ( return (CS_OK); } +static cs_error_t sam_parent_mark_child_failed ( + int *action, + pid_t child_pid) +{ + sam_recovery_policy_t recpol; + + recpol = sam_internal_data.recovery_policy; + + sam_internal_data.term_send = 1; + sam_internal_data.recovery_policy = SAM_RECOVERY_POLICY_QUIT | + (SAM_RP_MASK_C (recpol) ? SAM_RECOVERY_POLICY_CONFDB : 0) | + (SAM_RP_MASK_Q (recpol) ? SAM_RECOVERY_POLICY_QUORUM : 0); + + return (sam_parent_kill_child (action, child_pid)); +} static cs_error_t sam_parent_data_store ( int parent_fd_in, int parent_fd_out) { - char reply; char *user_data; ssize_t size; cs_error_t err; @@ -711,28 +1043,14 @@ static cs_error_t sam_parent_data_store ( goto free_error_reply; } - reply = SAM_REPLY_OK; - if (sam_safe_write (parent_fd_out, &reply, sizeof (reply)) != sizeof (reply)) { - err = CS_ERR_LIBRARY; - goto free_error_reply; - } - free (user_data); - return (CS_OK); + return (sam_parent_reply_send (CS_OK, parent_fd_in, parent_fd_out)); free_error_reply: free (user_data); error_reply: - reply = SAM_REPLY_ERROR; - if (sam_safe_write (parent_fd_out, &reply, sizeof (reply)) != sizeof (reply)) { - return (CS_ERR_LIBRARY); - } - if (sam_safe_write (parent_fd_out, &err, sizeof (err)) != sizeof (err)) { - return (CS_ERR_LIBRARY); - } - - return (err); + return (sam_parent_reply_send (err, parent_fd_in, parent_fd_out)); } static enum sam_parent_action_t sam_parent_handler ( @@ -749,10 +1067,12 @@ static enum sam_parent_action_t sam_parent_handler ( struct pollfd pfds[2]; nfds_t nfds; cs_error_t err; + sam_recovery_policy_t recpol; status = 0; action = SAM_PARENT_ACTION_CONTINUE; + recpol = sam_internal_data.recovery_policy; while (action == SAM_PARENT_ACTION_CONTINUE) { pfds[0].fd = parent_fd_in; @@ -766,7 +1086,7 @@ static enum sam_parent_action_t sam_parent_handler ( time_interval = -1; } - if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_QUORUM) { + if (recpol & SAM_RECOVERY_POLICY_QUORUM) { pfds[nfds].fd = sam_internal_data.quorum_fd; pfds[nfds].events = POLLIN; pfds[nfds].revents = 0; @@ -820,6 +1140,10 @@ static enum sam_parent_action_t sam_parent_handler ( goto action_exit; } + if (recpol & SAM_RECOVERY_POLICY_CONFDB) { + sam_confdb_update_key (SAM_CONFDB_KEY_LAST_HC, NULL); + } + /* * We have read command */ @@ -829,13 +1153,20 @@ static enum sam_parent_action_t sam_parent_handler ( /* * Not started yet */ - if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_QUORUM) { + if (recpol & SAM_RECOVERY_POLICY_QUORUM) { if (sam_parent_wait_for_quorum (parent_fd_in, parent_fd_out) != CS_OK) { continue; } } + if (recpol & SAM_RECOVERY_POLICY_CONFDB) { + if (sam_parent_confdb_state_set (parent_fd_in, + parent_fd_out, 1) != CS_OK) { + continue; + } + } + status = 1; } break; @@ -844,6 +1175,13 @@ static enum sam_parent_action_t sam_parent_handler ( /* * Started */ + if (recpol & SAM_RECOVERY_POLICY_CONFDB) { + if (sam_parent_confdb_state_set (parent_fd_in, + parent_fd_out, 0) != CS_OK) { + continue; + } + } + status = 0; } break; @@ -853,6 +1191,10 @@ static enum sam_parent_action_t sam_parent_handler ( case SAM_COMMAND_WARN_SIGNAL_SET: sam_parent_warn_signal_set (parent_fd_in, parent_fd_out); break; + case SAM_COMMAND_MARK_FAILED: + status = 1; + sam_parent_mark_child_failed (&action, child_pid); + break; } } /* if (pfds[0].revents != 0) */ @@ -882,13 +1224,25 @@ cs_error_t sam_register ( pid_t pid; int pipe_error; int pipe_fd_out[2], pipe_fd_in[2]; - enum sam_parent_action_t action; + enum sam_parent_action_t action, old_action; int child_status; + sam_recovery_policy_t recpol; if (sam_internal_data.internal_status != SAM_INTERNAL_STATUS_INITIALIZED) { return (CS_ERR_BAD_HANDLE); } + recpol = sam_internal_data.recovery_policy; + + if (recpol & SAM_RECOVERY_POLICY_CONFDB) { + /* + * Register to objdb + */ + if ((error = sam_confdb_register ()) != CS_OK) { + goto error_exit; + } + } + error = CS_OK; while (1) { @@ -905,6 +1259,12 @@ cs_error_t sam_register ( goto error_exit; } + if (recpol & SAM_RECOVERY_POLICY_CONFDB) { + if ((error = sam_confdb_update_key (SAM_CONFDB_KEY_STATE, SAM_CONFDB_S_REGISTERED)) != CS_OK) { + goto error_exit; + } + } + sam_internal_data.instance_id++; sam_internal_data.term_send = 0; @@ -937,6 +1297,8 @@ cs_error_t sam_register ( sam_internal_data.am_i_child = 1; sam_internal_data.internal_status = SAM_INTERNAL_STATUS_REGISTERED; + pthread_mutex_init (&sam_internal_data.lock, NULL); + goto error_exit; } else { /* @@ -961,20 +1323,34 @@ cs_error_t sam_register ( while (waitpid (pid, &child_status, 0) == -1 && errno == EINTR) ; + old_action = action; + if (action == SAM_PARENT_ACTION_RECOVERY) { - if (sam_internal_data.recovery_policy == SAM_RECOVERY_POLICY_QUIT || - sam_internal_data.recovery_policy == SAM_RECOVERY_POLICY_QUORUM_QUIT) + if (SAM_RP_MASK (sam_internal_data.recovery_policy) == SAM_RECOVERY_POLICY_QUIT) action = SAM_PARENT_ACTION_QUIT; } + if (action == SAM_PARENT_ACTION_QUIT) { - if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_QUORUM) { + if (recpol & SAM_RECOVERY_POLICY_QUORUM) { quorum_finalize (sam_internal_data.quorum_handle); } + if (recpol & SAM_RECOVERY_POLICY_CONFDB) { + if (old_action == SAM_PARENT_ACTION_RECOVERY) { + /* + * Mark as failed + */ + sam_confdb_update_key (SAM_CONFDB_KEY_STATE, SAM_CONFDB_S_FAILED); + } else { + sam_confdb_destroy_pid_obj (); + } + } + exit (WEXITSTATUS (child_status)); } + } } diff --git a/man/Makefile.am b/man/Makefile.am index 27a12db..58923f0 100644 --- a/man/Makefile.am +++ b/man/Makefile.am @@ -116,6 +116,7 @@ dist_man_MANS = \ sam_hc_callback_register.3 \ sam_hc_send.3 \ sam_initialize.3 \ + sam_mark_failed.3 \ sam_overview.8 \ sam_register.3 \ sam_start.3 \ diff --git a/man/sam_initialize.3 b/man/sam_initialize.3 index 1043954..5a3334f 100644 --- a/man/sam_initialize.3 +++ b/man/sam_initialize.3 @@ -31,7 +31,7 @@ .\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF .\" * THE POSSIBILITY OF SUCH DAMAGE. .\" */ -.TH "SAM_INITIALIZE" 3 "30/04/2010" "corosync Man Page" "Corosync Cluster Engine Programmer's Manual" +.TH "SAM_INITIALIZE" 3 "21/05/2010" "corosync Man Page" "Corosync Cluster Engine Programmer's Manual" .SH NAME .P @@ -71,6 +71,7 @@ The \fIrecovery_policy\fR is defined as type: SAM_RECOVERY_POLICY_QUORUM = 0x08, SAM_RECOVERY_POLICY_QUORUM_QUIT = SAM_RECOVERY_POLICY_QUORUM | SAM_RECOVERY_POLICY_QUIT, SAM_RECOVERY_POLICY_QUORUM_RESTART = SAM_RECOVERY_POLICY_QUORUM | SAM_RECOVERY_POLICY_RESTART, + SAM_RECOVERY_POLICY_CONFDB = 0x10, } sam_recovery_policy_t; .fi @@ -94,6 +95,9 @@ quorate and process will be terminated if quorum is lost. SAM_RECOVERY_POLICY_QUORUM_RESTART same as \fISAM_RECOVERY_POLICY_RESTART\fR but \fBsam_start (3)\fR will block until corosync becomes quorate and process will be restarted if quorum is lost. +.TP +SAM_RECOVERY_POLICY_CONFDB +is not policy. Used only as flag meaning confdb integration. It can be used with all previous policies. .P To perform event driven healthchecking, \fBsam_register(3)\fR and diff --git a/man/sam_mark_failed.3 b/man/sam_mark_failed.3 new file mode 100644 index 0000000..dabe2da --- /dev/null +++ b/man/sam_mark_failed.3 @@ -0,0 +1,73 @@ +.\"/* +.\" * Copyright (c) 2010 Red Hat, Inc. +.\" * +.\" * All rights reserved. +.\" * +.\" * Author: Jan Friesse (jfrie...@redhat.com) +.\" * +.\" * This software licensed under BSD license, the text of which follows: +.\" * +.\" * Redistribution and use in source and binary forms, with or without +.\" * modification, are permitted provided that the following conditions are met: +.\" * +.\" * - Redistributions of source code must retain the above copyright notice, +.\" * this list of conditions and the following disclaimer. +.\" * - Redistributions in binary form must reproduce the above copyright notice, +.\" * this list of conditions and the following disclaimer in the documentation +.\" * and/or other materials provided with the distribution. +.\" * - Neither the name of the Red Hat, Inc. nor the names of its +.\" * contributors may be used to endorse or promote products derived from this +.\" * software without specific prior written permission. +.\" * +.\" * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +.\" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +.\" * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +.\" * THE POSSIBILITY OF SUCH DAMAGE. +.\" */ +.TH "SAM_STOP" 3 "21/05/2010" "corosync Man Page" "Corosync Cluster Engine Programmer's Manual" + +.SH NAME +.P +sam_mark_failed \- Mark process failed + +.SH SYNOPSIS +.P +\fB#include <corosync/sam.h>\fR + +.P +\fBcs_error_t sam_mark_failed (void);\fR + +.SH DESCRIPTION +.P +The \fBsam_mark_failed\fR function is used with SAM_RECOVERY_POLICY_CONFDB mostly +together with SAM_RECOVERY_POLICY_RESTART to mark process failed. Process marked +failed is killed without sending warn signal and control process will exit +as with SAM_RECOVERY_POLICY_QUIT policy. Condb key state will be set to failed so +corosync watchdog can take required action. + +.SH RETURN VALUE +.P +This call return CS_OK value if successful, otherwise and error is returned. + +.SH ERRORS +.TP +CS_ERR_BAD_HANDLE +library was not initialized by calling \fBsam_initialize(3)\fR or was already finalized + +.TP +CS_ERR_INVALID_PARAM +recovery policy doesn't has SAM_RECOVERY_POLICY_CONFDB flag set + +.TP +CS_ERR_LIBRARY +some internal error appeared (communication with parent process) + +.SH "SEE ALSO" +.BR sam_initialize (3) diff --git a/man/sam_overview.8 b/man/sam_overview.8 index d521a8a..a5807cf 100644 --- a/man/sam_overview.8 +++ b/man/sam_overview.8 @@ -32,7 +32,7 @@ .\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF .\" * THE POSSIBILITY OF SUCH DAMAGE. .\" */ -.TH "SAM_OVERVIEW" 8 "30/04/2010" "corosync Man Page" "Corosync Cluster Engine Programmer's Manual" +.TH "SAM_OVERVIEW" 8 "21/05/2010" "corosync Man Page" "Corosync Cluster Engine Programmer's Manual" .SH NAME .P @@ -133,6 +133,38 @@ One can in such case use files, databases, ... or much simpler in memory solutio presented by \fBsam_data_store(3)\fR, \fBsam_data_restore(3)\fR and \fBsam_data_getsize(3)\fR functions. +.SH Confdb integration +.P +SAM has policy flag used for confdb system integration (\fISAM_RECOVERY_POLICY_CONFDB\fR). +If process is registered with this flag, new confdb object PROCESS_NAME:PID is created with following +keys: +.RS +.IP \(bu 3 +\fIrecovery\fR - will be quit or restart depending on policy +.IP \(bu 3 +\fIhc_period\fR - period of health checking in milliseconds +.IP \(bu 3 +\fIhc_last\fR - last known GMT time in milliseconds when health check was received +.IP \(bu 3 +\fIstate\fR - state of process (can be one of registered, started, failed, waiting for quorum) +.RE + +.P +Object is automatically deleted if process exits with stopped health checking. + +.P +Confdb integration with corosync wathdog can be used in implicit and explicit way. + +.P +Implicit way is achieved by setting recovery policy to QUIT and let process exit with started health checking. +If this happened, object is not deleted and corosync watchdog will take required action. + +.P +Explicit way is usefull for situations, when developer can deal with some non-fatal fall of application. +This mode is achieved by setting policy to RESTART and using SAM same as without Confdb integration. +If real fail is needed (like too many restarts at all, per/sec, ...), it's possible to use \fBsam_mark_failed(3)\fR +and let corosync watchdog take required action. + .SH BUGS .SH "SEE ALSO" .BR sam_initialize (3), @@ -140,6 +172,7 @@ functions. .BR sam_data_restore (3), .BR sam_data_store (3), .BR sam_finalize (3), +.BR sam_mark_failed (3), .BR sam_start (3), .BR sam_stop (3), .BR sam_register (3), diff --git a/test/testsam.c b/test/testsam.c index 95d8e12..1972d9e 100644 --- a/test/testsam.c +++ b/test/testsam.c @@ -38,6 +38,7 @@ #include <config.h> +#include <limits.h> #include <sys/types.h> #include <stdio.h> #include <stdint.h> @@ -50,6 +51,8 @@ #include <string.h> #include <sys/wait.h> +extern const char *__progname; + static int test2_sig_delivered = 0; static int test5_hc_cb_count = 0; static int test6_sig_delivered = 0; @@ -864,9 +867,551 @@ static int test7 (void) { return (2); } +/* + * Test confdb integration + quit policy + */ +static int test8 (pid_t pid, pid_t old_pid, int test_n) { + confdb_handle_t cdb_handle; + cs_error_t err; + hdb_handle_t res_handle, proc_handle, pid_handle; + size_t value_len; + uint64_t tstamp1, tstamp2; + char key_value[256]; + unsigned int instance_id; + char tmp_obj[PATH_MAX]; + confdb_value_types_t cdbtype; + + err = confdb_initialize (&cdb_handle, NULL); + if (err != CS_OK) { + printf ("Could not initialize Cluster Configuration Database API instance error %d. Test skipped\n", err); + return (1); + } + + printf ("%s test %d\n", __FUNCTION__, test_n); + + if (test_n == 2) { + /* + * Object should not exist + */ + printf ("%s Testing if object exists (it shouldn't)\n", __FUNCTION__); + + err = confdb_object_find_start(cdb_handle, OBJECT_PARENT_HANDLE); + if (err != CS_OK) { + printf ("Could not start object_find %d.\n", err); + return (2); + } + + err = confdb_object_find(cdb_handle, OBJECT_PARENT_HANDLE, "resources", strlen("resources"), &res_handle); + if (err != CS_OK) { + printf ("Could not object_find \"resources\": %d.\n", err); + return (2); + } + + err = confdb_object_find_start(cdb_handle, res_handle); + if (err != CS_OK) { + printf ("Could not start object_find %d.\n", err); + return (2); + } + + err = confdb_object_find(cdb_handle, res_handle, "process", strlen("process"), &proc_handle); + if (err != CS_OK) { + printf ("Could not object_find \"process\": %d.\n", err); + return (2); + } + + if (snprintf (tmp_obj, sizeof (tmp_obj), "%s:%d", __progname, pid) >= sizeof (tmp_obj)) { + snprintf (tmp_obj, sizeof (tmp_obj), "%d", pid); + } + + err = confdb_object_find_start(cdb_handle, proc_handle); + if (err != CS_OK) { + printf ("Could not start object_find %d.\n", err); + return (2); + } + + err = confdb_object_find(cdb_handle, proc_handle, tmp_obj, strlen(tmp_obj), &pid_handle); + if (err == CS_OK) { + printf ("Could find object \"%s\": %d.\n", tmp_obj, err); + return (2); + } + } + + if (test_n == 1 || test_n == 2) { + printf ("%s: initialize\n", __FUNCTION__); + err = sam_initialize (2000, SAM_RECOVERY_POLICY_QUIT | SAM_RECOVERY_POLICY_CONFDB); + if (err != CS_OK) { + fprintf (stderr, "Can't initialize SAM API. Error %d\n", err); + return 2; + } + + printf ("%s: register\n", __FUNCTION__); + err = sam_register (&instance_id); + if (err != CS_OK) { + fprintf (stderr, "Can't register. Error %d\n", err); + return 2; + } + + err = confdb_object_find_start(cdb_handle, OBJECT_PARENT_HANDLE); + if (err != CS_OK) { + printf ("Could not start object_find %d.\n", err); + return (2); + } + + err = confdb_object_find(cdb_handle, OBJECT_PARENT_HANDLE, "resources", strlen("resources"), &res_handle); + if (err != CS_OK) { + printf ("Could not object_find \"resources\": %d.\n", err); + return (2); + } + + err = confdb_object_find_start(cdb_handle, res_handle); + if (err != CS_OK) { + printf ("Could not start object_find %d.\n", err); + return (2); + } + + err = confdb_object_find(cdb_handle, res_handle, "process", strlen("process"), &proc_handle); + if (err != CS_OK) { + printf ("Could not object_find \"process\": %d.\n", err); + return (2); + } + + if (snprintf (tmp_obj, sizeof (tmp_obj), "%s:%d", __progname, pid) >= sizeof (tmp_obj)) { + snprintf (tmp_obj, sizeof (tmp_obj), "%d", pid); + } + + err = confdb_object_find_start(cdb_handle, proc_handle); + if (err != CS_OK) { + printf ("Could not start object_find %d.\n", err); + return (2); + } + + err = confdb_object_find(cdb_handle, proc_handle, tmp_obj, strlen(tmp_obj), &pid_handle); + if (err != CS_OK) { + printf ("Could not object_find \"%s\": %d.\n", tmp_obj, err); + return (2); + } + + err = confdb_key_get(cdb_handle, pid_handle, "recovery", strlen("recovery"), key_value, &value_len); + if (err != CS_OK) { + printf ("Could not get \"recovery\" key: %d.\n", err); + return (2); + } + + if (value_len != strlen ("quit") || memcmp (key_value, "quit", value_len) != 0) { + printf ("Recovery key \"%s\" is not \"watchdog\".\n", key_value); + return (2); + } + + err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len); + if (err != CS_OK) { + printf ("Could not get \"state\" key: %d.\n", err); + return (2); + } + + if (value_len != strlen ("registered") || memcmp (key_value, "registered", value_len) != 0) { + printf ("State key is not \"registered\".\n"); + return (2); + } + + printf ("%s iid %d: start\n", __FUNCTION__, instance_id); + err = sam_start (); + if (err != CS_OK) { + fprintf (stderr, "Can't start hc. Error %d\n", err); + return 2; + } + + err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len); + if (err != CS_OK) { + printf ("Could not get \"state\" key: %d.\n", err); + return (2); + } + + if (value_len != strlen ("started") || memcmp (key_value, "started", value_len) != 0) { + printf ("State key is not \"started\".\n"); + return (2); + } + + printf ("%s iid %d: stop\n", __FUNCTION__, instance_id); + err = sam_stop (); + if (err != CS_OK) { + fprintf (stderr, "Can't stop hc. Error %d\n", err); + return 2; + } + + err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len); + if (err != CS_OK) { + printf ("Could not get \"state\" key: %d.\n", err); + return (2); + } + + if (value_len != strlen ("registered") || memcmp (key_value, "registered", value_len) != 0) { + printf ("State key is not \"registered\".\n"); + return (2); + } + + printf ("%s iid %d: sleeping 5\n", __FUNCTION__, instance_id); + sleep (5); + + err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len); + if (err != CS_OK) { + printf ("Could not get \"state\" key: %d.\n", err); + return (2); + } + + if (value_len != strlen ("registered") || memcmp (key_value, "registered", value_len) != 0) { + printf ("State key is not \"registered\".\n"); + return (2); + } + + printf ("%s iid %d: start 2\n", __FUNCTION__, instance_id); + err = sam_start (); + if (err != CS_OK) { + fprintf (stderr, "Can't start hc. Error %d\n", err); + return 2; + } + + err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len); + if (err != CS_OK) { + printf ("Could not get \"state\" key: %d.\n", err); + return (2); + } + + if (value_len != strlen ("started") || memcmp (key_value, "started", value_len) != 0) { + printf ("State key is not \"started\".\n"); + return (2); + } + + if (test_n == 2) { + printf ("%s iid %d: sleeping 5. Should be killed\n", __FUNCTION__, instance_id); + sleep (5); + + return (2); + } else { + printf ("%s iid %d: Test HC\n", __FUNCTION__, instance_id); + err = sam_hc_send (); + if (err != CS_OK) { + fprintf (stderr, "Can't send hc. Error %d\n", err); + return 2; + } + err = confdb_key_get_typed (cdb_handle, pid_handle, "hc_last", &tstamp1, &value_len, &cdbtype); + if (err != CS_OK) { + printf ("Could not get \"state\" key: %d.\n", err); + return (2); + } + printf ("%s iid %d: Sleep 1\n", __FUNCTION__, instance_id); + sleep (1); + err = sam_hc_send (); + if (err != CS_OK) { + fprintf (stderr, "Can't send hc. Error %d\n", err); + return 2; + } + sleep (1); + err = confdb_key_get_typed (cdb_handle, pid_handle, "hc_last", &tstamp2, &value_len, &cdbtype); + if (err != CS_OK) { + printf ("Could not get \"state\" key: %d.\n", err); + return (2); + } + if (tstamp2 - tstamp1 < 500 || tstamp2 - tstamp1 > 2000) { + printf ("Difference %d is not within <500, 2000> interval.\n", (int)(tstamp2 - tstamp1)); + return (2); + } + + printf ("%s iid %d: stop 2\n", __FUNCTION__, instance_id); + err = sam_stop (); + if (err != CS_OK) { + fprintf (stderr, "Can't stop hc. Error %d\n", err); + return 2; + } + + err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len); + if (err != CS_OK) { + printf ("Could not get \"state\" key: %d.\n", err); + return (2); + } + + if (value_len != strlen ("registered") || memcmp (key_value, "registered", value_len) != 0) { + printf ("State key is not \"registered\".\n"); + return (2); + } + + printf ("%s iid %d: exiting\n", __FUNCTION__, instance_id); + return (0); + } + } + + if (test_n == 3) { + printf ("%s Testing if status is failed\n", __FUNCTION__); + + /* + * Previous should be FAILED + */ + err = confdb_object_find_start(cdb_handle, OBJECT_PARENT_HANDLE); + if (err != CS_OK) { + printf ("Could not start object_find %d.\n", err); + return (2); + } + + err = confdb_object_find(cdb_handle, OBJECT_PARENT_HANDLE, "resources", strlen("resources"), &res_handle); + if (err != CS_OK) { + printf ("Could not object_find \"resources\": %d.\n", err); + return (2); + } + + err = confdb_object_find_start(cdb_handle, res_handle); + if (err != CS_OK) { + printf ("Could not start object_find %d.\n", err); + return (2); + } + + err = confdb_object_find(cdb_handle, res_handle, "process", strlen("process"), &proc_handle); + if (err != CS_OK) { + printf ("Could not object_find \"process\": %d.\n", err); + return (2); + } + + if (snprintf (tmp_obj, sizeof (tmp_obj), "%s:%d", __progname, pid) >= sizeof (tmp_obj)) { + snprintf (tmp_obj, sizeof (tmp_obj), "%d", pid); + } + + err = confdb_object_find_start(cdb_handle, proc_handle); + if (err != CS_OK) { + printf ("Could not start object_find %d.\n", err); + return (2); + } + + err = confdb_object_find(cdb_handle, proc_handle, tmp_obj, strlen(tmp_obj), &pid_handle); + if (err != CS_OK) { + printf ("Could not object_find \"%s\": %d.\n", tmp_obj, err); + return (2); + } + + err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len); + if (err != CS_OK) { + printf ("Could not get \"state\" key: %d.\n", err); + return (2); + } + + if (value_len != strlen ("failed") || memcmp (key_value, "failed", value_len) != 0) { + printf ("State key is not \"failed\".\n"); + return (2); + } + + return (0); + } + + return (2); +} + +/* + * Test confdb integration + restart policy + */ +static int test9 (pid_t pid, pid_t old_pid, int test_n) { + confdb_handle_t cdb_handle; + cs_error_t err; + hdb_handle_t res_handle, proc_handle, pid_handle; + size_t value_len; + char key_value[256]; + unsigned int instance_id; + char tmp_obj[PATH_MAX]; + + err = confdb_initialize (&cdb_handle, NULL); + if (err != CS_OK) { + printf ("Could not initialize Cluster Configuration Database API instance error %d. Test skipped\n", err); + return (1); + } + + printf ("%s test %d\n", __FUNCTION__, test_n); + + if (test_n == 1) { + printf ("%s: initialize\n", __FUNCTION__); + err = sam_initialize (2000, SAM_RECOVERY_POLICY_RESTART | SAM_RECOVERY_POLICY_CONFDB); + if (err != CS_OK) { + fprintf (stderr, "Can't initialize SAM API. Error %d\n", err); + return 2; + } + + printf ("%s: register\n", __FUNCTION__); + err = sam_register (&instance_id); + if (err != CS_OK) { + fprintf (stderr, "Can't register. Error %d\n", err); + return 2; + } + printf ("%s: iid %d\n", __FUNCTION__, instance_id); + + if (instance_id < 3) { + err = confdb_object_find_start(cdb_handle, OBJECT_PARENT_HANDLE); + if (err != CS_OK) { + printf ("Could not start object_find %d.\n", err); + return (2); + } + + err = confdb_object_find(cdb_handle, OBJECT_PARENT_HANDLE, "resources", strlen("resources"), + &res_handle); + if (err != CS_OK) { + printf ("Could not object_find \"resources\": %d.\n", err); + return (2); + } + + err = confdb_object_find_start(cdb_handle, res_handle); + if (err != CS_OK) { + printf ("Could not start object_find %d.\n", err); + return (2); + } + + err = confdb_object_find(cdb_handle, res_handle, "process", strlen("process"), &proc_handle); + if (err != CS_OK) { + printf ("Could not object_find \"process\": %d.\n", err); + return (2); + } + + if (snprintf (tmp_obj, sizeof (tmp_obj), "%s:%d", __progname, pid) >= sizeof (tmp_obj)) { + snprintf (tmp_obj, sizeof (tmp_obj), "%d", pid); + } + + err = confdb_object_find_start(cdb_handle, proc_handle); + if (err != CS_OK) { + printf ("Could not start object_find %d.\n", err); + return (2); + } + + err = confdb_object_find(cdb_handle, proc_handle, tmp_obj, strlen(tmp_obj), &pid_handle); + if (err != CS_OK) { + printf ("Could not object_find \"%s\": %d.\n", tmp_obj, err); + return (2); + } + + err = confdb_key_get(cdb_handle, pid_handle, "recovery", strlen("recovery"), key_value, &value_len); + if (err != CS_OK) { + printf ("Could not get \"recovery\" key: %d.\n", err); + return (2); + } + + if (value_len != strlen ("restart") || memcmp (key_value, "restart", value_len) != 0) { + printf ("Recovery key \"%s\" is not \"restart\".\n", key_value); + return (2); + } + + err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len); + if (err != CS_OK) { + printf ("Could not get \"state\" key: %d.\n", err); + return (2); + } + + if (value_len != strlen ("registered") || memcmp (key_value, "registered", value_len) != 0) { + printf ("State key is not \"registered\".\n"); + return (2); + } + + printf ("%s iid %d: start\n", __FUNCTION__, instance_id); + err = sam_start (); + if (err != CS_OK) { + fprintf (stderr, "Can't start hc. Error %d\n", err); + return 2; + } + + err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len); + if (err != CS_OK) { + printf ("Could not get \"state\" key: %d.\n", err); + return (2); + } + + if (value_len != strlen ("started") || memcmp (key_value, "started", value_len) != 0) { + printf ("State key is not \"started\".\n"); + return (2); + } + + printf ("%s iid %d: waiting for kill\n", __FUNCTION__, instance_id); + sleep (10); + + return (2); + } + + if (instance_id == 3) { + printf ("%s iid %d: mark failed\n", __FUNCTION__, instance_id); + if (err != CS_OK) { + fprintf (stderr, "Can't start hc. Error %d\n", err); + return 2; + } + err = sam_mark_failed (); + if (err != CS_OK) { + fprintf (stderr, "Can't mark failed. Error %d\n", err); + return 2; + } + + sleep (10); + + return (2); + } + + return (2); + } + + if (test_n == 2) { + printf ("%s Testing if status is failed\n", __FUNCTION__); + + /* + * Previous should be FAILED + */ + err = confdb_object_find_start(cdb_handle, OBJECT_PARENT_HANDLE); + if (err != CS_OK) { + printf ("Could not start object_find %d.\n", err); + return (2); + } + + err = confdb_object_find(cdb_handle, OBJECT_PARENT_HANDLE, "resources", strlen("resources"), &res_handle); + if (err != CS_OK) { + printf ("Could not object_find \"resources\": %d.\n", err); + return (2); + } + + err = confdb_object_find_start(cdb_handle, res_handle); + if (err != CS_OK) { + printf ("Could not start object_find %d.\n", err); + return (2); + } + + err = confdb_object_find(cdb_handle, res_handle, "process", strlen("process"), &proc_handle); + if (err != CS_OK) { + printf ("Could not object_find \"process\": %d.\n", err); + return (2); + } + + if (snprintf (tmp_obj, sizeof (tmp_obj), "%s:%d", __progname, pid) >= sizeof (tmp_obj)) { + snprintf (tmp_obj, sizeof (tmp_obj), "%d", pid); + } + + err = confdb_object_find_start(cdb_handle, proc_handle); + if (err != CS_OK) { + printf ("Could not start object_find %d.\n", err); + return (2); + } + + err = confdb_object_find(cdb_handle, proc_handle, tmp_obj, strlen(tmp_obj), &pid_handle); + if (err != CS_OK) { + printf ("Could not object_find \"%s\": %d.\n", tmp_obj, err); + return (2); + } + + err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len); + if (err != CS_OK) { + printf ("Could not get \"state\" key: %d.\n", err); + return (2); + } + + if (value_len != strlen ("failed") || memcmp (key_value, "failed", value_len) != 0) { + printf ("State key is not \"failed\".\n"); + return (2); + } + + return (0); + } + + return (2); +} + int main(int argc, char *argv[]) { - pid_t pid; + pid_t pid, old_pid; int err; int stat; int all_passed = 1; @@ -990,7 +1535,7 @@ int main(int argc, char *argv[]) if (pid == -1) { fprintf (stderr, "Can't fork\n"); - return 1; + return 2; } if (pid == 0) { @@ -1003,6 +1548,100 @@ int main(int argc, char *argv[]) fprintf (stderr, "test7 %s\n", (WEXITSTATUS (stat) == 0 ? "passed" : (WEXITSTATUS (stat) == 1 ? "skipped" : "failed"))); if (WEXITSTATUS (stat) == 1) no_skipped++; + if (WEXITSTATUS (stat) > 1) + all_passed = 0; + + pid = fork (); + + if (pid == -1) { + fprintf (stderr, "Can't fork\n"); + return 2; + } + + if (pid == 0) { + err = test8 (getpid (), 0, 1); + sam_finalize (); + return (err); + } + + waitpid (pid, &stat, 0); + old_pid = pid; + + if (WEXITSTATUS (stat) == 0) { + pid = fork (); + + if (pid == -1) { + fprintf (stderr, "Can't fork\n"); + return 2; + } + + if (pid == 0) { + err = test8 (getpid (), old_pid, 2); + sam_finalize (); + return (err); + } + + waitpid (pid, &stat, 0); + old_pid = pid; + + if (WEXITSTATUS (stat) == 0) { + pid = fork (); + + if (pid == -1) { + fprintf (stderr, "Can't fork\n"); + return 2; + } + + if (pid == 0) { + err = test8 (old_pid, 0, 3); + sam_finalize (); + return (err); + } + + waitpid (pid, &stat, 0); + } + } + + if (WEXITSTATUS (stat) == 1) + no_skipped++; + if (WEXITSTATUS (stat) > 1) + all_passed = 0; + + pid = fork (); + + if (pid == -1) { + fprintf (stderr, "Can't fork\n"); + return 2; + } + + if (pid == 0) { + err = test9 (getpid (), 0, 1); + sam_finalize (); + return (err); + } + + waitpid (pid, &stat, 0); + old_pid = pid; + + if (WEXITSTATUS (stat) == 0) { + pid = fork (); + + if (pid == -1) { + fprintf (stderr, "Can't fork\n"); + return 2; + } + + if (pid == 0) { + err = test9 (old_pid, 0, 2); + sam_finalize (); + return (err); + } + + waitpid (pid, &stat, 0); + } + fprintf (stderr, "test9 %s\n", (WEXITSTATUS (stat) == 0 ? "passed" : (WEXITSTATUS (stat) == 1 ? "skipped" : "failed"))); + if (WEXITSTATUS (stat) == 1) + no_skipped++; if (WEXITSTATUS (stat) > 1) all_passed = 0; -- 1.7.2.2 _______________________________________________ Openais mailing list Openais@lists.linux-foundation.org https://lists.linux-foundation.org/mailman/listinfo/openais