On 09/23/2010 06:55 PM, Angus Salkeld wrote: > From: Jan Friesse<jfrie...@redhat.com> > > Patch add support for Confdb integration with SAM. It's now possible to > use SAM_RECOVERY_POLICY_CONFDB as flag to previous policies. > > Also new function sam_mark_failed is added for ability to use RECOVERY > policy together with confdb and get expected results (specially with > integration with corosync watchdog) > > Signed-off-by: Angus Salkeld<asalk...@redhat.com> > Signed-off-by: Jan Friesse<jfrie...@redhat.com> > Signed-off-by: Angus Salkeld<asalk...@redhat.com> > --- > cts/agents/Makefile.am | 2 +- > include/corosync/sam.h | 13 + > lib/Makefile.am | 2 +- > lib/libsam.verso | 2 +- > lib/sam.c | 508 +++++++++++++++++++++++++++++++++----- > man/Makefile.am | 1 + > man/sam_initialize.3 | 6 +- > man/sam_mark_failed.3 | 73 ++++++ > man/sam_overview.8 | 35 +++- > test/testsam.c | 643 > +++++++++++++++++++++++++++++++++++++++++++++++- > 10 files changed, 1212 insertions(+), 73 deletions(-) > create mode 100644 man/sam_mark_failed.3 > > diff --git a/cts/agents/Makefile.am b/cts/agents/Makefile.am > index c1a7f85..1d73a9b 100644 > --- a/cts/agents/Makefile.am > +++ b/cts/agents/Makefile.am > @@ -66,7 +66,7 @@ confdb_test_agent_LDADD = -lconfdb -lcoroipcc > ../../exec/coropoll.o > confdb_test_agent_LDFLAGS = -L../../lib > > sam_test_agent_SOURCES = sam_test_agent.c common_test_agent.c > -sam_test_agent_LDADD = -lsam -lquorum -lcoroipcc ../../exec/coropoll.o > +sam_test_agent_LDADD = -lsam -lquorum -lcoroipcc -lconfdb > ../../exec/coropoll.o > sam_test_agent_LDFLAGS = -L../../lib > > votequorum_test_agent_SOURCES = votequorum_test_agent.c common_test_agent.c > diff --git a/include/corosync/sam.h b/include/corosync/sam.h > index 41727c2..30401a0 100644 > --- a/include/corosync/sam.h > +++ b/include/corosync/sam.h > @@ -46,6 +46,7 @@ typedef enum { > SAM_RECOVERY_POLICY_QUORUM = 0x08, > SAM_RECOVERY_POLICY_QUORUM_QUIT = SAM_RECOVERY_POLICY_QUORUM | > SAM_RECOVERY_POLICY_QUIT, > SAM_RECOVERY_POLICY_QUORUM_RESTART = SAM_RECOVERY_POLICY_QUORUM | > SAM_RECOVERY_POLICY_RESTART, > + SAM_RECOVERY_POLICY_CONFDB = 0x10, > } sam_recovery_policy_t; > > /* > @@ -205,6 +206,18 @@ cs_error_t sam_data_store ( > const void *data, > size_t size); > > +/* > + * Marks child as failed. This can be called only with > SAM_RECOVERY_POLICY_CONFDB flag set and > + * makes sense only for SAM_RECOVERY_POLICY_RESTART. This will kill child > without sending warn > + * signal. Confdb state key will be set to failed. > + * > + * - CS_OK in case no problem appeared > + * - CS_ERR_BAD_HANDLE library was not initialized or was already finalized > + * - CS_ERR_INVALID_PARAM recovery policy doesn't has > SAM_RECOVERY_POLICY_CONFDB flag set > + * - CS_ERR_LIBRARY if some internal error appeared (communication with > parent > + * process) > + */ > +cs_error_t sam_mark_failed (void); > > #ifdef __cplusplus > } > diff --git a/lib/Makefile.am b/lib/Makefile.am > index 7aca53b..c4ee3c0 100644 > --- a/lib/Makefile.am > +++ b/lib/Makefile.am > @@ -62,7 +62,7 @@ libvotequorum_a_SOURCES = votequorum.c > libconfdb_a_SOURCES = confdb.c sa-confdb.c > libconfdb_a_LIBADD = ../lcr/lcr_ifact.o > CONFDB_LINKER_ADD = $(OS_DYFLAGS) $(OS_LDL) > -SAM_LINKER_ADD = -L. -lquorum > +SAM_LINKER_ADD = -L. -lquorum -lconfdb > libcoroipcc_a_SOURCES = coroipcc.c > libsam_a_SOURCES = sam.c > > diff --git a/lib/libsam.verso b/lib/libsam.verso > index 8089590..fdc6698 100644 > --- a/lib/libsam.verso > +++ b/lib/libsam.verso > @@ -1 +1 @@ > -4.3.0 > +4.4.0 > diff --git a/lib/sam.c b/lib/sam.c > index a3d1cd0..53020ac 100644 > --- a/lib/sam.c > +++ b/lib/sam.c > @@ -42,6 +42,7 @@ > #include<stdlib.h> > #include<string.h> > #include<unistd.h> > +#include<sys/time.h> > #include<sys/types.h> > #include<sys/socket.h> > #include<errno.h> > @@ -50,6 +51,7 @@ > #include<corosync/coroipc_types.h> > #include<corosync/coroipcc.h> > #include<corosync/corodefs.h> > +#include<corosync/confdb.h> > #include<corosync/hdb.h> > #include<corosync/quorum.h> > > @@ -61,6 +63,15 @@ > #include<sys/wait.h> > #include<signal.h> > > +#define SAM_CONFDB_S_FAILED "failed" > +#define SAM_CONFDB_S_REGISTERED "registered" > +#define SAM_CONFDB_S_STARTED "started" > +#define SAM_CONFDB_S_Q_WAIT "waiting for quorum" > + > +#define SAM_RP_MASK_Q(pol) (pol& (~SAM_RECOVERY_POLICY_QUORUM)) > +#define SAM_RP_MASK_C(pol) (pol& (~SAM_RECOVERY_POLICY_CONFDB)) > +#define SAM_RP_MASK(pol) (pol& (~(SAM_RECOVERY_POLICY_QUORUM | > SAM_RECOVERY_POLICY_CONFDB))) > + > enum sam_internal_status_t { > SAM_INTERNAL_STATUS_NOT_INITIALIZED = 0, > SAM_INTERNAL_STATUS_INITIALIZED, > @@ -75,6 +86,7 @@ enum sam_command_t { > SAM_COMMAND_HB, > SAM_COMMAND_DATA_STORE, > SAM_COMMAND_WARN_SIGNAL_SET, > + SAM_COMMAND_MARK_FAILED, > }; > > enum sam_reply_t { > @@ -89,6 +101,13 @@ enum sam_parent_action_t { > SAM_PARENT_ACTION_CONTINUE > }; > > +enum sam_confdb_key_t { > + SAM_CONFDB_KEY_RECOVERY, > + SAM_CONFDB_KEY_HC_PERIOD, > + SAM_CONFDB_KEY_LAST_HC, > + SAM_CONFDB_KEY_STATE, > +}; > + > static struct { > int time_interval; > sam_recovery_policy_t recovery_policy; > @@ -109,11 +128,156 @@ static struct { > size_t user_data_size; > size_t user_data_allocated; > > + pthread_mutex_t lock; > + > quorum_handle_t quorum_handle; > uint32_t quorate; > int quorum_fd; > + > + confdb_handle_t confdb_handle; > + hdb_handle_t confdb_pid_handle; > } sam_internal_data; > > +extern const char *__progname; > + > +static cs_error_t sam_confdb_update_key (enum sam_confdb_key_t key, const > char *value) > +{ > + cs_error_t err; > + const char *svalue; > + uint64_t hc_period, last_hc; > + struct timeval tv; > + const char *ssvalue[] = { [SAM_RECOVERY_POLICY_QUIT] = "quit", > [SAM_RECOVERY_POLICY_RESTART] = "restart" }; > + > + switch (key) { > + case SAM_CONFDB_KEY_RECOVERY: > + svalue = ssvalue[SAM_RP_MASK > (sam_internal_data.recovery_policy)]; > + > + if ((err = confdb_key_create_typed > (sam_internal_data.confdb_handle, sam_internal_data.confdb_pid_handle, > + "recovery", svalue, strlen ((const char *)svalue), > CONFDB_VALUETYPE_STRING)) != CS_OK) { > + goto exit_error; > + } > + break; > + case SAM_CONFDB_KEY_HC_PERIOD: > + hc_period = sam_internal_data.time_interval; > + > + if ((err = confdb_key_create_typed > (sam_internal_data.confdb_handle, sam_internal_data.confdb_pid_handle, > + "hc_period",&hc_period, sizeof (uint64_t), > CONFDB_VALUETYPE_UINT64)) != CS_OK) { > + goto exit_error; > + } > + break; > + case SAM_CONFDB_KEY_LAST_HC: > + if (gettimeofday (&tv, NULL) == -1) { > + last_hc = 0; > + } else { > + last_hc = ((uint64_t)tv.tv_sec * 1000) + > ((uint64_t)tv.tv_usec / 1000); > + } > + > + if ((err = confdb_key_create_typed > (sam_internal_data.confdb_handle, sam_internal_data.confdb_pid_handle, > + "hc_last",&last_hc, sizeof (uint64_t), > CONFDB_VALUETYPE_UINT64)) != CS_OK) { > + goto exit_error; > + } > + break; > + case SAM_CONFDB_KEY_STATE: > + svalue = value; > + if ((err = confdb_key_create_typed > (sam_internal_data.confdb_handle, sam_internal_data.confdb_pid_handle, > + "state", svalue, strlen ((const char *)svalue), > CONFDB_VALUETYPE_STRING)) != CS_OK) { > + goto exit_error; > + } > + break; > + } > + > + return (CS_OK); > + > +exit_error: > + return (err); > +} > + > +static cs_error_t sam_confdb_destroy_pid_obj (void) > +{ > + return (confdb_object_destroy (sam_internal_data.confdb_handle, > sam_internal_data.confdb_pid_handle)); > +} > + > +static cs_error_t sam_confdb_register (void) > +{ > + const char *obj_name; > + cs_error_t err; > + confdb_handle_t confdb_handle; > + hdb_handle_t resource_handle, process_handle, pid_handle, obj_handle; > + hdb_handle_t *res_handle; > + char tmp_obj[PATH_MAX]; > + int i; > + > + if ((err = confdb_initialize (&confdb_handle, NULL)) != CS_OK) { > + return (err); > + } > + > + for (i = 0; i< 3; i++) { > + switch (i) { > + case 0: > + obj_name = "resources"; > + obj_handle = OBJECT_PARENT_HANDLE; > + res_handle =&resource_handle; > + break; > + case 1: > + obj_name = "process"; > + obj_handle = resource_handle; > + res_handle =&process_handle; > + break; > + case 2: > + if (snprintf (tmp_obj, sizeof (tmp_obj), "%s:%d", > __progname, getpid ())>= sizeof (tmp_obj)) { > + snprintf (tmp_obj, sizeof (tmp_obj), "%d", > getpid ()); > + } > + > + obj_name = tmp_obj; > + obj_handle = process_handle; > + res_handle =&pid_handle; > + break; > + } > + > + if ((err = confdb_object_find_start (confdb_handle, > obj_handle)) != CS_OK) { > + goto finalize_error; > + } > + > + if ((err = confdb_object_find (confdb_handle, obj_handle, > obj_name, strlen (obj_name), > + res_handle)) != CS_OK) { > + if (err == CONFDB_ERR_ACCESS) { > + /* > + * Try to create object > + */ > + if ((err = confdb_object_create (confdb_handle, > obj_handle, obj_name, > + strlen (obj_name), res_handle)) != > CS_OK) { > + goto finalize_error; > + } > + } else { > + goto finalize_error; > + } > + } else { > + if ((err = confdb_object_find_destroy (confdb_handle, > obj_handle)) != CS_OK) { > + goto finalize_error; > + } > + } > + } > + > + sam_internal_data.confdb_pid_handle = pid_handle; > + sam_internal_data.confdb_handle = confdb_handle; > + > + if ((err = sam_confdb_update_key (SAM_CONFDB_KEY_RECOVERY, NULL)) != > CS_OK) { > + goto destroy_finalize_error; > + } > + > + if ((err = sam_confdb_update_key (SAM_CONFDB_KEY_HC_PERIOD, NULL)) != > CS_OK) { > + goto destroy_finalize_error; > + } > + > + return (CS_OK); > + > +destroy_finalize_error: > + sam_confdb_destroy_pid_obj (); > +finalize_error: > + confdb_finalize (confdb_handle); > + return (err); > +} > + > static void quorum_notification_fn ( > quorum_handle_t handle, > uint32_t quorate, > @@ -135,8 +299,8 @@ cs_error_t sam_initialize ( > return (CS_ERR_BAD_HANDLE); > } > > - if (recovery_policy != SAM_RECOVERY_POLICY_QUIT&& recovery_policy != > SAM_RECOVERY_POLICY_RESTART&& > - recovery_policy != SAM_RECOVERY_POLICY_QUORUM_QUIT&& > recovery_policy != SAM_RECOVERY_POLICY_QUORUM_RESTART) { > + if (SAM_RP_MASK (recovery_policy) != SAM_RECOVERY_POLICY_QUIT&& > + SAM_RP_MASK (recovery_policy) != SAM_RECOVERY_POLICY_RESTART) { > return (CS_ERR_INVALID_PARAM); > } > > @@ -178,6 +342,8 @@ cs_error_t sam_initialize ( > sam_internal_data.user_data_size = 0; > sam_internal_data.user_data_allocated = 0; > > + pthread_mutex_init (&sam_internal_data.lock, NULL); > + > return (CS_OK); > > exit_error_quorum: > @@ -290,8 +456,12 @@ cs_error_t sam_data_getsize (size_t *size) > return (CS_ERR_BAD_HANDLE); > } > > + pthread_mutex_lock (&sam_internal_data.lock); > + > *size = sam_internal_data.user_data_size; > > + pthread_mutex_unlock (&sam_internal_data.lock); > + > return (CS_OK); > } > > @@ -299,6 +469,10 @@ cs_error_t sam_data_restore ( > void *data, > size_t size) > { > + cs_error_t err; > + > + err = CS_OK; > + > if (data == NULL) { > return (CS_ERR_INVALID_PARAM); > } > @@ -310,17 +484,30 @@ cs_error_t sam_data_restore ( > return (CS_ERR_BAD_HANDLE); > } > > + pthread_mutex_lock (&sam_internal_data.lock); > + > if (sam_internal_data.user_data_size == 0) { > - return (CS_OK); > + err = CS_OK; > + > + goto error_unlock; > } > > if (size< sam_internal_data.user_data_size) { > - return (CS_ERR_INVALID_PARAM); > + err = CS_ERR_INVALID_PARAM; > + > + goto error_unlock; > } > > memcpy (data, sam_internal_data.user_data, > sam_internal_data.user_data_size); > > + pthread_mutex_unlock (&sam_internal_data.lock); > + > return (CS_OK); > + > +error_unlock: > + pthread_mutex_unlock (&sam_internal_data.lock); > + > + return (err); > } > > cs_error_t sam_data_store ( > @@ -343,28 +530,36 @@ cs_error_t sam_data_store ( > size = 0; > } > > + pthread_mutex_lock (&sam_internal_data.lock); > + > if (sam_internal_data.am_i_child) { > /* > * We are child so we must send data to parent > */ > command = SAM_COMMAND_DATA_STORE; > if (sam_safe_write (sam_internal_data.child_fd_out,&command, > sizeof (command)) != sizeof (command)) { > - return (CS_ERR_LIBRARY); > + err = CS_ERR_LIBRARY; > + > + goto error_unlock; > } > > if (sam_safe_write (sam_internal_data.child_fd_out,&size, > sizeof (size)) != sizeof (size)) { > - return (CS_ERR_LIBRARY); > + err = CS_ERR_LIBRARY; > + > + goto error_unlock; > } > > if (data != NULL&& sam_safe_write > (sam_internal_data.child_fd_out, data, size) != size) { > - return (CS_ERR_LIBRARY); > + err = CS_ERR_LIBRARY; > + > + goto error_unlock; > } > > /* > * And wait for reply > */ > if ((err = sam_read_reply (sam_internal_data.child_fd_in)) != > CS_OK) { > - return (err); > + goto error_unlock; > } > } > > @@ -379,7 +574,9 @@ cs_error_t sam_data_store ( > } else { > if (sam_internal_data.user_data_allocated< size) { > if ((new_data = realloc (sam_internal_data.user_data, > size)) == NULL) { > - return (CS_ERR_NO_MEMORY); > + err = CS_ERR_NO_MEMORY; > + > + goto error_unlock; > } > > sam_internal_data.user_data_allocated = size; > @@ -392,30 +589,53 @@ cs_error_t sam_data_store ( > memcpy (sam_internal_data.user_data, data, size); > } > > + pthread_mutex_unlock (&sam_internal_data.lock); > + > return (CS_OK); > + > +error_unlock: > + pthread_mutex_unlock (&sam_internal_data.lock); > + > + return (err); > } > > cs_error_t sam_start (void) > { > char command; > cs_error_t err; > + sam_recovery_policy_t recpol; > > if (sam_internal_data.internal_status != > SAM_INTERNAL_STATUS_REGISTERED) { > return (CS_ERR_BAD_HANDLE); > } > > + recpol = sam_internal_data.recovery_policy; > + > + if (recpol& SAM_RECOVERY_POLICY_QUORUM || recpol& > SAM_RECOVERY_POLICY_CONFDB) { > + pthread_mutex_lock (&sam_internal_data.lock); > + } > + > command = SAM_COMMAND_START; > > - if (sam_safe_write (sam_internal_data.child_fd_out,&command, sizeof > (command)) != sizeof (command)) > + if (sam_safe_write (sam_internal_data.child_fd_out,&command, sizeof > (command)) != sizeof (command)) { > + if (recpol& SAM_RECOVERY_POLICY_QUORUM || recpol& > SAM_RECOVERY_POLICY_CONFDB) { > + pthread_mutex_unlock (&sam_internal_data.lock); > + } > + > return (CS_ERR_LIBRARY); > + } > > - if (sam_internal_data.recovery_policy& SAM_RECOVERY_POLICY_QUORUM) { > + if (recpol& SAM_RECOVERY_POLICY_QUORUM || recpol& > SAM_RECOVERY_POLICY_CONFDB) { > /* > * Wait for parent reply > */ > if ((err = sam_read_reply (sam_internal_data.child_fd_in)) != > CS_OK) { > + pthread_mutex_unlock (&sam_internal_data.lock); > + > return (err); > } > + > + pthread_mutex_unlock (&sam_internal_data.lock); > } > > if (sam_internal_data.hc_callback) > @@ -430,6 +650,7 @@ cs_error_t sam_start (void) > cs_error_t sam_stop (void) > { > char command; > + cs_error_t err; > > if (sam_internal_data.internal_status != SAM_INTERNAL_STATUS_STARTED) { > return (CS_ERR_BAD_HANDLE); > @@ -437,8 +658,30 @@ cs_error_t sam_stop (void) > > command = SAM_COMMAND_STOP; > > - if (sam_safe_write (sam_internal_data.child_fd_out,&command, sizeof > (command)) != sizeof (command)) > + if (sam_internal_data.recovery_policy& SAM_RECOVERY_POLICY_CONFDB) { > + pthread_mutex_lock (&sam_internal_data.lock); > + } > + > + if (sam_safe_write (sam_internal_data.child_fd_out,&command, sizeof > (command)) != sizeof (command)) { > + if (sam_internal_data.recovery_policy& > SAM_RECOVERY_POLICY_CONFDB) { > + pthread_mutex_unlock (&sam_internal_data.lock); > + } > + > return (CS_ERR_LIBRARY); > + } > + > + if (sam_internal_data.recovery_policy& SAM_RECOVERY_POLICY_CONFDB) { > + /* > + * Wait for parent reply > + */ > + if ((err = sam_read_reply (sam_internal_data.child_fd_in)) != > CS_OK) { > + pthread_mutex_unlock (&sam_internal_data.lock); > + > + return (err); > + } > + > + pthread_mutex_unlock (&sam_internal_data.lock); > + } > > if (sam_internal_data.hc_callback) > if (sam_safe_write (sam_internal_data.cb_wpipe_fd,&command, > sizeof (command)) != sizeof (command)) > @@ -489,6 +732,26 @@ exit_error: > return (CS_OK); > } > > +cs_error_t sam_mark_failed (void) > +{ > + char command; > + > + if (sam_internal_data.internal_status != SAM_INTERNAL_STATUS_STARTED&& > + sam_internal_data.internal_status != > SAM_INTERNAL_STATUS_REGISTERED) { > + return (CS_ERR_BAD_HANDLE); > + } > + > + if (!(sam_internal_data.recovery_policy& SAM_RECOVERY_POLICY_CONFDB)) { > + return (CS_ERR_INVALID_PARAM); > + } > + > + command = SAM_COMMAND_MARK_FAILED; > + > + if (sam_safe_write (sam_internal_data.child_fd_out,&command, sizeof > (command)) != sizeof (command)) > + return (CS_ERR_LIBRARY); > + > + return (CS_OK); > +} > > cs_error_t sam_warn_signal_set (int warn_signal) > { > @@ -501,25 +764,31 @@ cs_error_t sam_warn_signal_set (int warn_signal) > return (CS_ERR_BAD_HANDLE); > } > > + pthread_mutex_lock (&sam_internal_data.lock); > + > if (sam_internal_data.am_i_child) { > /* > * We are child so we must send data to parent > */ > command = SAM_COMMAND_WARN_SIGNAL_SET; > if (sam_safe_write (sam_internal_data.child_fd_out,&command, > sizeof (command)) != sizeof (command)) { > - return (CS_ERR_LIBRARY); > + err = CS_ERR_LIBRARY; > + > + goto error_unlock; > } > > if (sam_safe_write > (sam_internal_data.child_fd_out,&warn_signal, sizeof (warn_signal)) != > sizeof (warn_signal)) { > - return (CS_ERR_LIBRARY); > + err = CS_ERR_LIBRARY; > + > + goto error_unlock; > } > > /* > * And wait for reply > */ > if ((err = sam_read_reply (sam_internal_data.child_fd_in)) != > CS_OK) { > - return (err); > + goto error_unlock; > } > } > > @@ -528,14 +797,51 @@ cs_error_t sam_warn_signal_set (int warn_signal) > */ > sam_internal_data.warn_signal = warn_signal; > > + pthread_mutex_unlock (&sam_internal_data.lock); > + > return (CS_OK); > + > +error_unlock: > + pthread_mutex_unlock (&sam_internal_data.lock); > + > + return (err); > } > > -static cs_error_t sam_parent_warn_signal_set ( > +static cs_error_t sam_parent_reply_send ( > + cs_error_t err, > int parent_fd_in, > int parent_fd_out) > { > char reply; > + > + if (err == CS_OK) { > + reply = SAM_REPLY_OK; > + > + if (sam_safe_write (parent_fd_out,&reply, sizeof (reply)) != > sizeof (reply)) { > + err = CS_ERR_LIBRARY; > + goto error_reply; > + } > + > + return (CS_OK); > + } > + > +error_reply: > + reply = SAM_REPLY_ERROR; > + if (sam_safe_write (parent_fd_out,&reply, sizeof (reply)) != sizeof > (reply)) { > + return (CS_ERR_LIBRARY); > + } > + if (sam_safe_write (parent_fd_out,&err, sizeof (err)) != sizeof (err)) { > + return (CS_ERR_LIBRARY); > + } > + > + return (err); > +} > + > + > +static cs_error_t sam_parent_warn_signal_set ( > + int parent_fd_in, > + int parent_fd_out) > +{ > char *user_data; > int warn_signal; > cs_error_t err; > @@ -553,35 +859,27 @@ static cs_error_t sam_parent_warn_signal_set ( > goto error_reply; > } > > - reply = SAM_REPLY_OK; > - if (sam_safe_write (parent_fd_out,&reply, sizeof (reply)) != sizeof > (reply)) { > - err = CS_ERR_LIBRARY; > - goto error_reply; > - } > > - return (CS_OK); > + return (sam_parent_reply_send (CS_OK, parent_fd_in, parent_fd_out)); > > error_reply: > - reply = SAM_REPLY_ERROR; > - if (sam_safe_write (parent_fd_out,&reply, sizeof (reply)) != sizeof > (reply)) { > - return (CS_ERR_LIBRARY); > - } > - if (sam_safe_write (parent_fd_out,&err, sizeof (err)) != sizeof (err)) { > - return (CS_ERR_LIBRARY); > - } > - > - return (err); > + return (sam_parent_reply_send (err, parent_fd_in, parent_fd_out)); > } > > static cs_error_t sam_parent_wait_for_quorum ( > int parent_fd_in, > int parent_fd_out) > { > - char reply; > cs_error_t err; > struct pollfd pfds[2]; > int poll_err; > > + if (sam_internal_data.recovery_policy& SAM_RECOVERY_POLICY_CONFDB) { > + if ((err = sam_confdb_update_key (SAM_CONFDB_KEY_STATE, > SAM_CONFDB_S_Q_WAIT)) != CS_OK) { > + goto error_reply; > + } > + } > + > /* > * Update current quorum > */ > @@ -630,24 +928,44 @@ static cs_error_t sam_parent_wait_for_quorum ( > } > } > > - reply = SAM_REPLY_OK; > - if (sam_safe_write (parent_fd_out,&reply, sizeof (reply)) != sizeof > (reply)) { > - err = CS_ERR_LIBRARY; > - goto error_reply; > + if (sam_internal_data.recovery_policy& SAM_RECOVERY_POLICY_CONFDB) { > + if ((err = sam_confdb_update_key (SAM_CONFDB_KEY_STATE, > SAM_CONFDB_S_STARTED)) != CS_OK) { > + goto error_reply; > + } > } > > - return (CS_OK); > + return (sam_parent_reply_send (CS_OK, parent_fd_in, parent_fd_out)); > > error_reply: > - reply = SAM_REPLY_ERROR; > - if (sam_safe_write (parent_fd_out,&reply, sizeof (reply)) != sizeof > (reply)) { > - return (CS_ERR_LIBRARY); > + if (sam_internal_data.recovery_policy& SAM_RECOVERY_POLICY_CONFDB) { > + sam_confdb_update_key (SAM_CONFDB_KEY_STATE, > SAM_CONFDB_S_REGISTERED); > } > - if (sam_safe_write (parent_fd_out,&err, sizeof (err)) != sizeof (err)) { > - return (CS_ERR_LIBRARY); > + > + return (sam_parent_reply_send (err, parent_fd_in, parent_fd_out)); > +} > + > +static cs_error_t sam_parent_confdb_state_set ( > + int parent_fd_in, > + int parent_fd_out, > + int state) > +{ > + cs_error_t err; > + const char *state_s; > + > + if (state == 1) { > + state_s = SAM_CONFDB_S_STARTED; > + } else { > + state_s = SAM_CONFDB_S_REGISTERED; > } > > - return (err); > + if ((err = sam_confdb_update_key (SAM_CONFDB_KEY_STATE, state_s)) != > CS_OK) { > + goto error_reply; > + } > + > + return (sam_parent_reply_send (CS_OK, parent_fd_in, parent_fd_out)); > + > +error_reply: > + return (sam_parent_reply_send (err, parent_fd_in, parent_fd_out)); > } > > static cs_error_t sam_parent_kill_child ( > @@ -675,12 +993,26 @@ static cs_error_t sam_parent_kill_child ( > return (CS_OK); > } > > +static cs_error_t sam_parent_mark_child_failed ( > + int *action, > + pid_t child_pid) > +{ > + sam_recovery_policy_t recpol; > + > + recpol = sam_internal_data.recovery_policy; > + > + sam_internal_data.term_send = 1; > + sam_internal_data.recovery_policy = SAM_RECOVERY_POLICY_QUIT | > + (SAM_RP_MASK_C (recpol) ? SAM_RECOVERY_POLICY_CONFDB : 0) | > + (SAM_RP_MASK_Q (recpol) ? SAM_RECOVERY_POLICY_QUORUM : 0); > + > + return (sam_parent_kill_child (action, child_pid)); > +} > > static cs_error_t sam_parent_data_store ( > int parent_fd_in, > int parent_fd_out) > { > - char reply; > char *user_data; > ssize_t size; > cs_error_t err; > @@ -711,28 +1043,14 @@ static cs_error_t sam_parent_data_store ( > goto free_error_reply; > } > > - reply = SAM_REPLY_OK; > - if (sam_safe_write (parent_fd_out,&reply, sizeof (reply)) != sizeof > (reply)) { > - err = CS_ERR_LIBRARY; > - goto free_error_reply; > - } > - > free (user_data); > > - return (CS_OK); > + return (sam_parent_reply_send (CS_OK, parent_fd_in, parent_fd_out)); > > free_error_reply: > free (user_data); > error_reply: > - reply = SAM_REPLY_ERROR; > - if (sam_safe_write (parent_fd_out,&reply, sizeof (reply)) != sizeof > (reply)) { > - return (CS_ERR_LIBRARY); > - } > - if (sam_safe_write (parent_fd_out,&err, sizeof (err)) != sizeof (err)) { > - return (CS_ERR_LIBRARY); > - } > - > - return (err); > + return (sam_parent_reply_send (err, parent_fd_in, parent_fd_out)); > } > > static enum sam_parent_action_t sam_parent_handler ( > @@ -749,10 +1067,12 @@ static enum sam_parent_action_t sam_parent_handler ( > struct pollfd pfds[2]; > nfds_t nfds; > cs_error_t err; > + sam_recovery_policy_t recpol; > > status = 0; > > action = SAM_PARENT_ACTION_CONTINUE; > + recpol = sam_internal_data.recovery_policy; > > while (action == SAM_PARENT_ACTION_CONTINUE) { > pfds[0].fd = parent_fd_in; > @@ -766,7 +1086,7 @@ static enum sam_parent_action_t sam_parent_handler ( > time_interval = -1; > } > > - if (sam_internal_data.recovery_policy& > SAM_RECOVERY_POLICY_QUORUM) { > + if (recpol& SAM_RECOVERY_POLICY_QUORUM) { > pfds[nfds].fd = sam_internal_data.quorum_fd; > pfds[nfds].events = POLLIN; > pfds[nfds].revents = 0; > @@ -820,6 +1140,10 @@ static enum sam_parent_action_t sam_parent_handler ( > goto action_exit; > } > > + if (recpol& SAM_RECOVERY_POLICY_CONFDB) { > + sam_confdb_update_key > (SAM_CONFDB_KEY_LAST_HC, NULL); > + } > + > /* > * We have read command > */ > @@ -829,13 +1153,20 @@ static enum sam_parent_action_t sam_parent_handler ( > /* > * Not started yet > */ > - if > (sam_internal_data.recovery_policy& SAM_RECOVERY_POLICY_QUORUM) { > + if (recpol& > SAM_RECOVERY_POLICY_QUORUM) { > if > (sam_parent_wait_for_quorum (parent_fd_in, > parent_fd_out) != > CS_OK) { > continue; > } > } > > + if (recpol& > SAM_RECOVERY_POLICY_CONFDB) { > + if > (sam_parent_confdb_state_set (parent_fd_in, > + parent_fd_out, 1) > != CS_OK) { > + continue; > + } > + } > + > status = 1; > } > break; > @@ -844,6 +1175,13 @@ static enum sam_parent_action_t sam_parent_handler ( > /* > * Started > */ > + if (recpol& > SAM_RECOVERY_POLICY_CONFDB) { > + if > (sam_parent_confdb_state_set (parent_fd_in, > + parent_fd_out, 0) > != CS_OK) { > + continue; > + } > + } > + > status = 0; > } > break; > @@ -853,6 +1191,10 @@ static enum sam_parent_action_t sam_parent_handler ( > case SAM_COMMAND_WARN_SIGNAL_SET: > sam_parent_warn_signal_set > (parent_fd_in, parent_fd_out); > break; > + case SAM_COMMAND_MARK_FAILED: > + status = 1; > + sam_parent_mark_child_failed (&action, > child_pid); > + break; > } > } /* if (pfds[0].revents != 0) */ > > @@ -882,13 +1224,25 @@ cs_error_t sam_register ( > pid_t pid; > int pipe_error; > int pipe_fd_out[2], pipe_fd_in[2]; > - enum sam_parent_action_t action; > + enum sam_parent_action_t action, old_action; > int child_status; > + sam_recovery_policy_t recpol; > > if (sam_internal_data.internal_status != > SAM_INTERNAL_STATUS_INITIALIZED) { > return (CS_ERR_BAD_HANDLE); > } > > + recpol = sam_internal_data.recovery_policy; > + > + if (recpol& SAM_RECOVERY_POLICY_CONFDB) { > + /* > + * Register to objdb > + */ > + if ((error = sam_confdb_register ()) != CS_OK) { > + goto error_exit; > + } > + } > + > error = CS_OK; > > while (1) { > @@ -905,6 +1259,12 @@ cs_error_t sam_register ( > goto error_exit; > } > > + if (recpol& SAM_RECOVERY_POLICY_CONFDB) { > + if ((error = sam_confdb_update_key > (SAM_CONFDB_KEY_STATE, SAM_CONFDB_S_REGISTERED)) != CS_OK) { > + goto error_exit; > + } > + } > + > sam_internal_data.instance_id++; > > sam_internal_data.term_send = 0; > @@ -937,6 +1297,8 @@ cs_error_t sam_register ( > sam_internal_data.am_i_child = 1; > sam_internal_data.internal_status = > SAM_INTERNAL_STATUS_REGISTERED; > > + pthread_mutex_init (&sam_internal_data.lock, NULL); > + > goto error_exit; > } else { > /* > @@ -961,20 +1323,34 @@ cs_error_t sam_register ( > while (waitpid (pid,&child_status, 0) == -1&& errno == > EINTR) > ; > > + old_action = action; > + > if (action == SAM_PARENT_ACTION_RECOVERY) { > - if (sam_internal_data.recovery_policy == > SAM_RECOVERY_POLICY_QUIT || > - sam_internal_data.recovery_policy == > SAM_RECOVERY_POLICY_QUORUM_QUIT) > + if (SAM_RP_MASK > (sam_internal_data.recovery_policy) == SAM_RECOVERY_POLICY_QUIT) > action = SAM_PARENT_ACTION_QUIT; > } > > + > if (action == SAM_PARENT_ACTION_QUIT) { > - if (sam_internal_data.recovery_policy& > SAM_RECOVERY_POLICY_QUORUM) { > + if (recpol& SAM_RECOVERY_POLICY_QUORUM) { > quorum_finalize > (sam_internal_data.quorum_handle); > } > > + if (recpol& SAM_RECOVERY_POLICY_CONFDB) { > + if (old_action == > SAM_PARENT_ACTION_RECOVERY) { > + /* > + * Mark as failed > + */ > + sam_confdb_update_key > (SAM_CONFDB_KEY_STATE, SAM_CONFDB_S_FAILED); > + } else { > + sam_confdb_destroy_pid_obj (); > + } > + } > + > exit (WEXITSTATUS (child_status)); > } > > + > } > } > > diff --git a/man/Makefile.am b/man/Makefile.am > index 27a12db..58923f0 100644 > --- a/man/Makefile.am > +++ b/man/Makefile.am > @@ -116,6 +116,7 @@ dist_man_MANS = \ > sam_hc_callback_register.3 \ > sam_hc_send.3 \ > sam_initialize.3 \ > + sam_mark_failed.3 \ > sam_overview.8 \ > sam_register.3 \ > sam_start.3 \ > diff --git a/man/sam_initialize.3 b/man/sam_initialize.3 > index 1043954..5a3334f 100644 > --- a/man/sam_initialize.3 > +++ b/man/sam_initialize.3 > @@ -31,7 +31,7 @@ > .\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF > .\" * THE POSSIBILITY OF SUCH DAMAGE. > .\" */ > -.TH "SAM_INITIALIZE" 3 "30/04/2010" "corosync Man Page" "Corosync Cluster > Engine Programmer's Manual" > +.TH "SAM_INITIALIZE" 3 "21/05/2010" "corosync Man Page" "Corosync Cluster > Engine Programmer's Manual" > > .SH NAME > .P > @@ -71,6 +71,7 @@ The \fIrecovery_policy\fR is defined as type: > SAM_RECOVERY_POLICY_QUORUM = 0x08, > SAM_RECOVERY_POLICY_QUORUM_QUIT = SAM_RECOVERY_POLICY_QUORUM | > SAM_RECOVERY_POLICY_QUIT, > SAM_RECOVERY_POLICY_QUORUM_RESTART = SAM_RECOVERY_POLICY_QUORUM | > SAM_RECOVERY_POLICY_RESTART, > + SAM_RECOVERY_POLICY_CONFDB = 0x10, > } sam_recovery_policy_t; > .fi > > @@ -94,6 +95,9 @@ quorate and process will be terminated if quorum is lost. > SAM_RECOVERY_POLICY_QUORUM_RESTART > same as \fISAM_RECOVERY_POLICY_RESTART\fR but \fBsam_start (3)\fR will > block until corosync becomes > quorate and process will be restarted if quorum is lost. > +.TP > +SAM_RECOVERY_POLICY_CONFDB > +is not policy. Used only as flag meaning confdb integration. It can be used > with all previous policies. > > .P > To perform event driven healthchecking, \fBsam_register(3)\fR and > diff --git a/man/sam_mark_failed.3 b/man/sam_mark_failed.3 > new file mode 100644 > index 0000000..dabe2da > --- /dev/null > +++ b/man/sam_mark_failed.3 > @@ -0,0 +1,73 @@ > +.\"/* > +.\" * Copyright (c) 2010 Red Hat, Inc. > +.\" * > +.\" * All rights reserved. > +.\" * > +.\" * Author: Jan Friesse (jfrie...@redhat.com) > +.\" * > +.\" * This software licensed under BSD license, the text of which follows: > +.\" * > +.\" * Redistribution and use in source and binary forms, with or without > +.\" * modification, are permitted provided that the following conditions are > met: > +.\" * > +.\" * - Redistributions of source code must retain the above copyright > notice, > +.\" * this list of conditions and the following disclaimer. > +.\" * - Redistributions in binary form must reproduce the above copyright > notice, > +.\" * this list of conditions and the following disclaimer in the > documentation > +.\" * and/or other materials provided with the distribution. > +.\" * - Neither the name of the Red Hat, Inc. nor the names of its > +.\" * contributors may be used to endorse or promote products derived from > this > +.\" * software without specific prior written permission. > +.\" * > +.\" * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > "AS IS" > +.\" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, > THE > +.\" * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR > PURPOSE > +.\" * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS > BE > +.\" * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR > +.\" * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF > +.\" * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR > BUSINESS > +.\" * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN > +.\" * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) > +.\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF > +.\" * THE POSSIBILITY OF SUCH DAMAGE. > +.\" */ > +.TH "SAM_STOP" 3 "21/05/2010" "corosync Man Page" "Corosync Cluster Engine > Programmer's Manual" > + > +.SH NAME > +.P > +sam_mark_failed \- Mark process failed > + > +.SH SYNOPSIS > +.P > +\fB#include<corosync/sam.h>\fR > + > +.P > +\fBcs_error_t sam_mark_failed (void);\fR > + > +.SH DESCRIPTION > +.P > +The \fBsam_mark_failed\fR function is used with SAM_RECOVERY_POLICY_CONFDB > mostly > +together with SAM_RECOVERY_POLICY_RESTART to mark process failed. Process > marked > +failed is killed without sending warn signal and control process will exit > +as with SAM_RECOVERY_POLICY_QUIT policy. Condb key state will be set to > failed so > +corosync watchdog can take required action. > + > +.SH RETURN VALUE > +.P > +This call return CS_OK value if successful, otherwise and error is returned. > + > +.SH ERRORS > +.TP > +CS_ERR_BAD_HANDLE > +library was not initialized by calling \fBsam_initialize(3)\fR or was > already finalized > + > +.TP > +CS_ERR_INVALID_PARAM > +recovery policy doesn't has SAM_RECOVERY_POLICY_CONFDB flag set > + > +.TP > +CS_ERR_LIBRARY > +some internal error appeared (communication with parent process) > + > +.SH "SEE ALSO" > +.BR sam_initialize (3) > diff --git a/man/sam_overview.8 b/man/sam_overview.8 > index d521a8a..a5807cf 100644 > --- a/man/sam_overview.8 > +++ b/man/sam_overview.8 > @@ -32,7 +32,7 @@ > .\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF > .\" * THE POSSIBILITY OF SUCH DAMAGE. > .\" */ > -.TH "SAM_OVERVIEW" 8 "30/04/2010" "corosync Man Page" "Corosync Cluster > Engine Programmer's Manual" > +.TH "SAM_OVERVIEW" 8 "21/05/2010" "corosync Man Page" "Corosync Cluster > Engine Programmer's Manual" > > .SH NAME > .P > @@ -133,6 +133,38 @@ One can in such case use files, databases, ... or much > simpler in memory solutio > presented by \fBsam_data_store(3)\fR, \fBsam_data_restore(3)\fR and > \fBsam_data_getsize(3)\fR > functions. > > +.SH Confdb integration > +.P > +SAM has policy flag used for confdb system integration > (\fISAM_RECOVERY_POLICY_CONFDB\fR). > +If process is registered with this flag, new confdb object PROCESS_NAME:PID > is created with following > +keys: > +.RS > +.IP \(bu 3 > +\fIrecovery\fR - will be quit or restart depending on policy > +.IP \(bu 3 > +\fIhc_period\fR - period of health checking in milliseconds > +.IP \(bu 3 > +\fIhc_last\fR - last known GMT time in milliseconds when health check was > received > +.IP \(bu 3 > +\fIstate\fR - state of process (can be one of registered, started, failed, > waiting for quorum) > +.RE > + > +.P > +Object is automatically deleted if process exits with stopped health > checking. > + > +.P > +Confdb integration with corosync wathdog can be used in implicit and > explicit way. > + > +.P > +Implicit way is achieved by setting recovery policy to QUIT and let process > exit with started health checking. > +If this happened, object is not deleted and corosync watchdog will take > required action. > + > +.P > +Explicit way is usefull for situations, when developer can deal with some > non-fatal fall of application. > +This mode is achieved by setting policy to RESTART and using SAM same as > without Confdb integration. > +If real fail is needed (like too many restarts at all, per/sec, ...), it's > possible to use \fBsam_mark_failed(3)\fR > +and let corosync watchdog take required action. > + > .SH BUGS > .SH "SEE ALSO" > .BR sam_initialize (3), > @@ -140,6 +172,7 @@ functions. > .BR sam_data_restore (3), > .BR sam_data_store (3), > .BR sam_finalize (3), > +.BR sam_mark_failed (3), > .BR sam_start (3), > .BR sam_stop (3), > .BR sam_register (3), > diff --git a/test/testsam.c b/test/testsam.c > index 95d8e12..1972d9e 100644 > --- a/test/testsam.c > +++ b/test/testsam.c > @@ -38,6 +38,7 @@ > > #include<config.h> > > +#include<limits.h> > #include<sys/types.h> > #include<stdio.h> > #include<stdint.h> > @@ -50,6 +51,8 @@ > #include<string.h> > #include<sys/wait.h> > > +extern const char *__progname; > + > static int test2_sig_delivered = 0; > static int test5_hc_cb_count = 0; > static int test6_sig_delivered = 0; > @@ -864,9 +867,551 @@ static int test7 (void) { > return (2); > } > > +/* > + * Test confdb integration + quit policy > + */ > +static int test8 (pid_t pid, pid_t old_pid, int test_n) { > + confdb_handle_t cdb_handle; > + cs_error_t err; > + hdb_handle_t res_handle, proc_handle, pid_handle; > + size_t value_len; > + uint64_t tstamp1, tstamp2; > + char key_value[256]; > + unsigned int instance_id; > + char tmp_obj[PATH_MAX]; > + confdb_value_types_t cdbtype; > + > + err = confdb_initialize (&cdb_handle, NULL); > + if (err != CS_OK) { > + printf ("Could not initialize Cluster Configuration Database > API instance error %d. Test skipped\n", err); > + return (1); > + } > + > + printf ("%s test %d\n", __FUNCTION__, test_n); > + > + if (test_n == 2) { > + /* > + * Object should not exist > + */ > + printf ("%s Testing if object exists (it shouldn't)\n", > __FUNCTION__); > + > + err = confdb_object_find_start(cdb_handle, > OBJECT_PARENT_HANDLE); > + if (err != CS_OK) { > + printf ("Could not start object_find %d.\n", err); > + return (2); > + } > + > + err = confdb_object_find(cdb_handle, OBJECT_PARENT_HANDLE, > "resources", strlen("resources"),&res_handle); > + if (err != CS_OK) { > + printf ("Could not object_find \"resources\": %d.\n", > err); > + return (2); > + } > + > + err = confdb_object_find_start(cdb_handle, res_handle); > + if (err != CS_OK) { > + printf ("Could not start object_find %d.\n", err); > + return (2); > + } > + > + err = confdb_object_find(cdb_handle, res_handle, "process", > strlen("process"),&proc_handle); > + if (err != CS_OK) { > + printf ("Could not object_find \"process\": %d.\n", > err); > + return (2); > + } > + > + if (snprintf (tmp_obj, sizeof (tmp_obj), "%s:%d", __progname, > pid)>= sizeof (tmp_obj)) { > + snprintf (tmp_obj, sizeof (tmp_obj), "%d", pid); > + } > + > + err = confdb_object_find_start(cdb_handle, proc_handle); > + if (err != CS_OK) { > + printf ("Could not start object_find %d.\n", err); > + return (2); > + } > + > + err = confdb_object_find(cdb_handle, proc_handle, tmp_obj, > strlen(tmp_obj),&pid_handle); > + if (err == CS_OK) { > + printf ("Could find object \"%s\": %d.\n", tmp_obj, > err); > + return (2); > + } > + } > + > + if (test_n == 1 || test_n == 2) { > + printf ("%s: initialize\n", __FUNCTION__); > + err = sam_initialize (2000, SAM_RECOVERY_POLICY_QUIT | > SAM_RECOVERY_POLICY_CONFDB); > + if (err != CS_OK) { > + fprintf (stderr, "Can't initialize SAM API. Error > %d\n", err); > + return 2; > + } > + > + printf ("%s: register\n", __FUNCTION__); > + err = sam_register (&instance_id); > + if (err != CS_OK) { > + fprintf (stderr, "Can't register. Error %d\n", err); > + return 2; > + } > + > + err = confdb_object_find_start(cdb_handle, > OBJECT_PARENT_HANDLE); > + if (err != CS_OK) { > + printf ("Could not start object_find %d.\n", err); > + return (2); > + } > + > + err = confdb_object_find(cdb_handle, OBJECT_PARENT_HANDLE, > "resources", strlen("resources"),&res_handle); > + if (err != CS_OK) { > + printf ("Could not object_find \"resources\": %d.\n", > err); > + return (2); > + } > + > + err = confdb_object_find_start(cdb_handle, res_handle); > + if (err != CS_OK) { > + printf ("Could not start object_find %d.\n", err); > + return (2); > + } > + > + err = confdb_object_find(cdb_handle, res_handle, "process", > strlen("process"),&proc_handle); > + if (err != CS_OK) { > + printf ("Could not object_find \"process\": %d.\n", > err); > + return (2); > + } > + > + if (snprintf (tmp_obj, sizeof (tmp_obj), "%s:%d", __progname, > pid)>= sizeof (tmp_obj)) { > + snprintf (tmp_obj, sizeof (tmp_obj), "%d", pid); > + } > + > + err = confdb_object_find_start(cdb_handle, proc_handle); > + if (err != CS_OK) { > + printf ("Could not start object_find %d.\n", err); > + return (2); > + } > + > + err = confdb_object_find(cdb_handle, proc_handle, tmp_obj, > strlen(tmp_obj),&pid_handle); > + if (err != CS_OK) { > + printf ("Could not object_find \"%s\": %d.\n", tmp_obj, > err); > + return (2); > + } > + > + err = confdb_key_get(cdb_handle, pid_handle, "recovery", > strlen("recovery"), key_value,&value_len); > + if (err != CS_OK) { > + printf ("Could not get \"recovery\" key: %d.\n", err); > + return (2); > + } > + > + if (value_len != strlen ("quit") || memcmp (key_value, "quit", > value_len) != 0) { > + printf ("Recovery key \"%s\" is not \"watchdog\".\n", > key_value); > + return (2); > + } > + > + err = confdb_key_get(cdb_handle, pid_handle, "state", > strlen("state"), key_value,&value_len); > + if (err != CS_OK) { > + printf ("Could not get \"state\" key: %d.\n", err); > + return (2); > + } > + > + if (value_len != strlen ("registered") || memcmp (key_value, > "registered", value_len) != 0) { > + printf ("State key is not \"registered\".\n"); > + return (2); > + } > + > + printf ("%s iid %d: start\n", __FUNCTION__, instance_id); > + err = sam_start (); > + if (err != CS_OK) { > + fprintf (stderr, "Can't start hc. Error %d\n", err); > + return 2; > + } > + > + err = confdb_key_get(cdb_handle, pid_handle, "state", > strlen("state"), key_value,&value_len); > + if (err != CS_OK) { > + printf ("Could not get \"state\" key: %d.\n", err); > + return (2); > + } > + > + if (value_len != strlen ("started") || memcmp (key_value, > "started", value_len) != 0) { > + printf ("State key is not \"started\".\n"); > + return (2); > + } > + > + printf ("%s iid %d: stop\n", __FUNCTION__, instance_id); > + err = sam_stop (); > + if (err != CS_OK) { > + fprintf (stderr, "Can't stop hc. Error %d\n", err); > + return 2; > + } > + > + err = confdb_key_get(cdb_handle, pid_handle, "state", > strlen("state"), key_value,&value_len); > + if (err != CS_OK) { > + printf ("Could not get \"state\" key: %d.\n", err); > + return (2); > + } > + > + if (value_len != strlen ("registered") || memcmp (key_value, > "registered", value_len) != 0) { > + printf ("State key is not \"registered\".\n"); > + return (2); > + } > + > + printf ("%s iid %d: sleeping 5\n", __FUNCTION__, instance_id); > + sleep (5); > + > + err = confdb_key_get(cdb_handle, pid_handle, "state", > strlen("state"), key_value,&value_len); > + if (err != CS_OK) { > + printf ("Could not get \"state\" key: %d.\n", err); > + return (2); > + } > + > + if (value_len != strlen ("registered") || memcmp (key_value, > "registered", value_len) != 0) { > + printf ("State key is not \"registered\".\n"); > + return (2); > + } > + > + printf ("%s iid %d: start 2\n", __FUNCTION__, instance_id); > + err = sam_start (); > + if (err != CS_OK) { > + fprintf (stderr, "Can't start hc. Error %d\n", err); > + return 2; > + } > + > + err = confdb_key_get(cdb_handle, pid_handle, "state", > strlen("state"), key_value,&value_len); > + if (err != CS_OK) { > + printf ("Could not get \"state\" key: %d.\n", err); > + return (2); > + } > + > + if (value_len != strlen ("started") || memcmp (key_value, > "started", value_len) != 0) { > + printf ("State key is not \"started\".\n"); > + return (2); > + } > + > + if (test_n == 2) { > + printf ("%s iid %d: sleeping 5. Should be killed\n", > __FUNCTION__, instance_id); > + sleep (5); > + > + return (2); > + } else { > + printf ("%s iid %d: Test HC\n", __FUNCTION__, > instance_id); > + err = sam_hc_send (); > + if (err != CS_OK) { > + fprintf (stderr, "Can't send hc. Error %d\n", > err); > + return 2; > + } > + err = confdb_key_get_typed (cdb_handle, pid_handle, > "hc_last",&tstamp1,&value_len,&cdbtype); > + if (err != CS_OK) { > + printf ("Could not get \"state\" key: %d.\n", > err); > + return (2); > + } > + printf ("%s iid %d: Sleep 1\n", __FUNCTION__, > instance_id); > + sleep (1); > + err = sam_hc_send (); > + if (err != CS_OK) { > + fprintf (stderr, "Can't send hc. Error %d\n", > err); > + return 2; > + } > + sleep (1); > + err = confdb_key_get_typed (cdb_handle, pid_handle, > "hc_last",&tstamp2,&value_len,&cdbtype); > + if (err != CS_OK) { > + printf ("Could not get \"state\" key: %d.\n", > err); > + return (2); > + } > + if (tstamp2 - tstamp1< 500 || tstamp2 - tstamp1> > 2000) { > + printf ("Difference %d is not within<500, 2000> > interval.\n", (int)(tstamp2 - tstamp1)); > + return (2); > + } > + > + printf ("%s iid %d: stop 2\n", __FUNCTION__, > instance_id); > + err = sam_stop (); > + if (err != CS_OK) { > + fprintf (stderr, "Can't stop hc. Error %d\n", > err); > + return 2; > + } > + > + err = confdb_key_get(cdb_handle, pid_handle, "state", > strlen("state"), key_value,&value_len); > + if (err != CS_OK) { > + printf ("Could not get \"state\" key: %d.\n", > err); > + return (2); > + } > + > + if (value_len != strlen ("registered") || memcmp > (key_value, "registered", value_len) != 0) { > + printf ("State key is not \"registered\".\n"); > + return (2); > + } > + > + printf ("%s iid %d: exiting\n", __FUNCTION__, > instance_id); > + return (0); > + } > + } > + > + if (test_n == 3) { > + printf ("%s Testing if status is failed\n", __FUNCTION__); > + > + /* > + * Previous should be FAILED > + */ > + err = confdb_object_find_start(cdb_handle, > OBJECT_PARENT_HANDLE); > + if (err != CS_OK) { > + printf ("Could not start object_find %d.\n", err); > + return (2); > + } > + > + err = confdb_object_find(cdb_handle, OBJECT_PARENT_HANDLE, > "resources", strlen("resources"),&res_handle); > + if (err != CS_OK) { > + printf ("Could not object_find \"resources\": %d.\n", > err); > + return (2); > + } > + > + err = confdb_object_find_start(cdb_handle, res_handle); > + if (err != CS_OK) { > + printf ("Could not start object_find %d.\n", err); > + return (2); > + } > + > + err = confdb_object_find(cdb_handle, res_handle, "process", > strlen("process"),&proc_handle); > + if (err != CS_OK) { > + printf ("Could not object_find \"process\": %d.\n", > err); > + return (2); > + } > + > + if (snprintf (tmp_obj, sizeof (tmp_obj), "%s:%d", __progname, > pid)>= sizeof (tmp_obj)) { > + snprintf (tmp_obj, sizeof (tmp_obj), "%d", pid); > + } > + > + err = confdb_object_find_start(cdb_handle, proc_handle); > + if (err != CS_OK) { > + printf ("Could not start object_find %d.\n", err); > + return (2); > + } > + > + err = confdb_object_find(cdb_handle, proc_handle, tmp_obj, > strlen(tmp_obj),&pid_handle); > + if (err != CS_OK) { > + printf ("Could not object_find \"%s\": %d.\n", tmp_obj, > err); > + return (2); > + } > + > + err = confdb_key_get(cdb_handle, pid_handle, "state", > strlen("state"), key_value,&value_len); > + if (err != CS_OK) { > + printf ("Could not get \"state\" key: %d.\n", err); > + return (2); > + } > + > + if (value_len != strlen ("failed") || memcmp (key_value, > "failed", value_len) != 0) { > + printf ("State key is not \"failed\".\n"); > + return (2); > + } > + > + return (0); > + } > + > + return (2); > +} > + > +/* > + * Test confdb integration + restart policy > + */ > +static int test9 (pid_t pid, pid_t old_pid, int test_n) { > + confdb_handle_t cdb_handle; > + cs_error_t err; > + hdb_handle_t res_handle, proc_handle, pid_handle; > + size_t value_len; > + char key_value[256]; > + unsigned int instance_id; > + char tmp_obj[PATH_MAX]; > + > + err = confdb_initialize (&cdb_handle, NULL); > + if (err != CS_OK) { > + printf ("Could not initialize Cluster Configuration Database > API instance error %d. Test skipped\n", err); > + return (1); > + } > + > + printf ("%s test %d\n", __FUNCTION__, test_n); > + > + if (test_n == 1) { > + printf ("%s: initialize\n", __FUNCTION__); > + err = sam_initialize (2000, SAM_RECOVERY_POLICY_RESTART | > SAM_RECOVERY_POLICY_CONFDB); > + if (err != CS_OK) { > + fprintf (stderr, "Can't initialize SAM API. Error > %d\n", err); > + return 2; > + } > + > + printf ("%s: register\n", __FUNCTION__); > + err = sam_register (&instance_id); > + if (err != CS_OK) { > + fprintf (stderr, "Can't register. Error %d\n", err); > + return 2; > + } > + printf ("%s: iid %d\n", __FUNCTION__, instance_id); > + > + if (instance_id< 3) { > + err = confdb_object_find_start(cdb_handle, > OBJECT_PARENT_HANDLE); > + if (err != CS_OK) { > + printf ("Could not start object_find %d.\n", > err); > + return (2); > + } > + > + err = confdb_object_find(cdb_handle, > OBJECT_PARENT_HANDLE, "resources", strlen("resources"), > + &res_handle); > + if (err != CS_OK) { > + printf ("Could not object_find \"resources\": > %d.\n", err); > + return (2); > + } > + > + err = confdb_object_find_start(cdb_handle, res_handle); > + if (err != CS_OK) { > + printf ("Could not start object_find %d.\n", > err); > + return (2); > + } > + > + err = confdb_object_find(cdb_handle, res_handle, > "process", strlen("process"),&proc_handle); > + if (err != CS_OK) { > + printf ("Could not object_find \"process\": > %d.\n", err); > + return (2); > + } > + > + if (snprintf (tmp_obj, sizeof (tmp_obj), "%s:%d", > __progname, pid)>= sizeof (tmp_obj)) { > + snprintf (tmp_obj, sizeof (tmp_obj), "%d", pid); > + } > + > + err = confdb_object_find_start(cdb_handle, proc_handle); > + if (err != CS_OK) { > + printf ("Could not start object_find %d.\n", > err); > + return (2); > + } > + > + err = confdb_object_find(cdb_handle, proc_handle, > tmp_obj, strlen(tmp_obj),&pid_handle); > + if (err != CS_OK) { > + printf ("Could not object_find \"%s\": %d.\n", > tmp_obj, err); > + return (2); > + } > + > + err = confdb_key_get(cdb_handle, pid_handle, > "recovery", strlen("recovery"), key_value,&value_len); > + if (err != CS_OK) { > + printf ("Could not get \"recovery\" key: > %d.\n", err); > + return (2); > + } > + > + if (value_len != strlen ("restart") || memcmp > (key_value, "restart", value_len) != 0) { > + printf ("Recovery key \"%s\" is not > \"restart\".\n", key_value); > + return (2); > + } > + > + err = confdb_key_get(cdb_handle, pid_handle, "state", > strlen("state"), key_value,&value_len); > + if (err != CS_OK) { > + printf ("Could not get \"state\" key: %d.\n", > err); > + return (2); > + } > + > + if (value_len != strlen ("registered") || memcmp > (key_value, "registered", value_len) != 0) { > + printf ("State key is not \"registered\".\n"); > + return (2); > + } > + > + printf ("%s iid %d: start\n", __FUNCTION__, > instance_id); > + err = sam_start (); > + if (err != CS_OK) { > + fprintf (stderr, "Can't start hc. Error %d\n", > err); > + return 2; > + } > + > + err = confdb_key_get(cdb_handle, pid_handle, "state", > strlen("state"), key_value,&value_len); > + if (err != CS_OK) { > + printf ("Could not get \"state\" key: %d.\n", > err); > + return (2); > + } > + > + if (value_len != strlen ("started") || memcmp > (key_value, "started", value_len) != 0) { > + printf ("State key is not \"started\".\n"); > + return (2); > + } > + > + printf ("%s iid %d: waiting for kill\n", __FUNCTION__, > instance_id); > + sleep (10); > + > + return (2); > + } > + > + if (instance_id == 3) { > + printf ("%s iid %d: mark failed\n", __FUNCTION__, > instance_id); > + if (err != CS_OK) { > + fprintf (stderr, "Can't start hc. Error %d\n", > err); > + return 2; > + } > + err = sam_mark_failed (); > + if (err != CS_OK) { > + fprintf (stderr, "Can't mark failed. Error > %d\n", err); > + return 2; > + } > + > + sleep (10); > + > + return (2); > + } > + > + return (2); > + } > + > + if (test_n == 2) { > + printf ("%s Testing if status is failed\n", __FUNCTION__); > + > + /* > + * Previous should be FAILED > + */ > + err = confdb_object_find_start(cdb_handle, > OBJECT_PARENT_HANDLE); > + if (err != CS_OK) { > + printf ("Could not start object_find %d.\n", err); > + return (2); > + } > + > + err = confdb_object_find(cdb_handle, OBJECT_PARENT_HANDLE, > "resources", strlen("resources"),&res_handle); > + if (err != CS_OK) { > + printf ("Could not object_find \"resources\": %d.\n", > err); > + return (2); > + } > + > + err = confdb_object_find_start(cdb_handle, res_handle); > + if (err != CS_OK) { > + printf ("Could not start object_find %d.\n", err); > + return (2); > + } > + > + err = confdb_object_find(cdb_handle, res_handle, "process", > strlen("process"),&proc_handle); > + if (err != CS_OK) { > + printf ("Could not object_find \"process\": %d.\n", > err); > + return (2); > + } > + > + if (snprintf (tmp_obj, sizeof (tmp_obj), "%s:%d", __progname, > pid)>= sizeof (tmp_obj)) { > + snprintf (tmp_obj, sizeof (tmp_obj), "%d", pid); > + } > + > + err = confdb_object_find_start(cdb_handle, proc_handle); > + if (err != CS_OK) { > + printf ("Could not start object_find %d.\n", err); > + return (2); > + } > + > + err = confdb_object_find(cdb_handle, proc_handle, tmp_obj, > strlen(tmp_obj),&pid_handle); > + if (err != CS_OK) { > + printf ("Could not object_find \"%s\": %d.\n", tmp_obj, > err); > + return (2); > + } > + > + err = confdb_key_get(cdb_handle, pid_handle, "state", > strlen("state"), key_value,&value_len); > + if (err != CS_OK) { > + printf ("Could not get \"state\" key: %d.\n", err); > + return (2); > + } > + > + if (value_len != strlen ("failed") || memcmp (key_value, > "failed", value_len) != 0) { > + printf ("State key is not \"failed\".\n"); > + return (2); > + } > + > + return (0); > + } > + > + return (2); > +} > + > int main(int argc, char *argv[]) > { > - pid_t pid; > + pid_t pid, old_pid; > int err; > int stat; > int all_passed = 1; > @@ -990,7 +1535,7 @@ int main(int argc, char *argv[]) > > if (pid == -1) { > fprintf (stderr, "Can't fork\n"); > - return 1; > + return 2; > } > > if (pid == 0) { > @@ -1003,6 +1548,100 @@ int main(int argc, char *argv[]) > fprintf (stderr, "test7 %s\n", (WEXITSTATUS (stat) == 0 ? "passed" : > (WEXITSTATUS (stat) == 1 ? "skipped" : "failed"))); > if (WEXITSTATUS (stat) == 1) > no_skipped++; > + if (WEXITSTATUS (stat)> 1) > + all_passed = 0; > + > + pid = fork (); > + > + if (pid == -1) { > + fprintf (stderr, "Can't fork\n"); > + return 2; > + } > + > + if (pid == 0) { > + err = test8 (getpid (), 0, 1); > + sam_finalize (); > + return (err); > + } > + > + waitpid (pid,&stat, 0); > + old_pid = pid; > + > + if (WEXITSTATUS (stat) == 0) { > + pid = fork (); > + > + if (pid == -1) { > + fprintf (stderr, "Can't fork\n"); > + return 2; > + } > + > + if (pid == 0) { > + err = test8 (getpid (), old_pid, 2); > + sam_finalize (); > + return (err); > + } > + > + waitpid (pid,&stat, 0); > + old_pid = pid; > + > + if (WEXITSTATUS (stat) == 0) { > + pid = fork (); > + > + if (pid == -1) { > + fprintf (stderr, "Can't fork\n"); > + return 2; > + } > + > + if (pid == 0) { > + err = test8 (old_pid, 0, 3); > + sam_finalize (); > + return (err); > + } > + > + waitpid (pid,&stat, 0); > + } > + } > + > + if (WEXITSTATUS (stat) == 1) > + no_skipped++; > + if (WEXITSTATUS (stat)> 1) > + all_passed = 0; > + > + pid = fork (); > + > + if (pid == -1) { > + fprintf (stderr, "Can't fork\n"); > + return 2; > + } > + > + if (pid == 0) { > + err = test9 (getpid (), 0, 1); > + sam_finalize (); > + return (err); > + } > + > + waitpid (pid,&stat, 0); > + old_pid = pid; > + > + if (WEXITSTATUS (stat) == 0) { > + pid = fork (); > + > + if (pid == -1) { > + fprintf (stderr, "Can't fork\n"); > + return 2; > + } > + > + if (pid == 0) { > + err = test9 (old_pid, 0, 2); > + sam_finalize (); > + return (err); > + } > + > + waitpid (pid,&stat, 0); > + } > + fprintf (stderr, "test9 %s\n", (WEXITSTATUS (stat) == 0 ? "passed" : > (WEXITSTATUS (stat) == 1 ? "skipped" : "failed"))); > + if (WEXITSTATUS (stat) == 1) > + no_skipped++; > > if (WEXITSTATUS (stat)> 1) > all_passed = 0; Good for merge
_______________________________________________ Openais mailing list Openais@lists.linux-foundation.org https://lists.linux-foundation.org/mailman/listinfo/openais