This patch automatically recovers redundant ring failures. Please note that this patch introduced rrp_autorecovery_check_timeout in totem config hence breaks internal ABI. So the internal ABI users of totem.h need to rebuild their binnaries.
Signed-off-by: Jiaju Zhang <jjzh...@suse.de> Signed-off-by: Steven Dake <sd...@redhat.com> Tested-by: Jan Friesse <jfrie...@redhat.com> Tested-by: Florian Haas <florian.h...@linbit.com> Tested-by: Jiaju Zhang <jjzh...@suse.de> --- exec/totemconfig.c | 7 + exec/totemrrp.c | 242 +++++++++++++++++++++++++++++++++------- exec/totemrrp.h | 3 +- exec/totemsrp.c | 6 +- include/corosync/totem/totem.h | 2 + man/corosync.conf.5 | 7 + 6 files changed, 226 insertions(+), 41 deletions(-) diff --git a/exec/totemconfig.c b/exec/totemconfig.c index b2c6f43..5135672 100644 --- a/exec/totemconfig.c +++ b/exec/totemconfig.c @@ -83,6 +83,7 @@ #define RRP_PROBLEM_COUNT_TIMEOUT 2000 #define RRP_PROBLEM_COUNT_THRESHOLD_DEFAULT 10 #define RRP_PROBLEM_COUNT_THRESHOLD_MIN 5 +#define RRP_AUTORECOVERY_CHECK_TIMEOUT 1000 static char error_string_response[512]; static struct objdb_iface_ver0 *global_objdb; @@ -212,6 +213,8 @@ static void totem_volatile_config_read ( objdb_get_int (objdb,object_totem_handle, "rrp_problem_count_threshold", &totem_config->rrp_problem_count_threshold); + objdb_get_int (objdb,object_totem_handle, "rrp_autorecovery_check_timeout", &totem_config->rrp_autorecovery_check_timeout); + objdb_get_int (objdb,object_totem_handle, "heartbeat_failures_allowed", &totem_config->heartbeat_failures_allowed); objdb_get_int (objdb,object_totem_handle, "max_network_delay", &totem_config->max_network_delay); @@ -682,6 +685,10 @@ int totem_config_validate ( goto parse_error; } + if (totem_config->rrp_autorecovery_check_timeout == 0) { + totem_config->rrp_autorecovery_check_timeout = RRP_AUTORECOVERY_CHECK_TIMEOUT; + } + if (strcmp (totem_config->rrp_mode, "none") == 0) { interface_max = 1; } diff --git a/exec/totemrrp.c b/exec/totemrrp.c index 8107a1c..6fb5772 100644 --- a/exec/totemrrp.c +++ b/exec/totemrrp.c @@ -159,7 +159,8 @@ struct rrp_algo { unsigned int iface_no); void (*ring_reenable) ( - struct totemrrp_instance *instance); + struct totemrrp_instance *instance, + unsigned int iface_no); int (*mcast_recv_empty) ( struct totemrrp_instance *instance); @@ -237,7 +238,13 @@ struct totemrrp_instance { int processor_count; + int my_nodeid; + struct totem_config *totem_config; + + void *deliver_fn_context[INTERFACE_MAX]; + + poll_timer_handle timer_active_test_ring_timeout[INTERFACE_MAX]; }; /* @@ -289,7 +296,8 @@ static void none_token_target_set ( unsigned int iface_no); static void none_ring_reenable ( - struct totemrrp_instance *instance); + struct totemrrp_instance *instance, + unsigned int iface_no); static int none_mcast_recv_empty ( struct totemrrp_instance *instance); @@ -356,7 +364,8 @@ static void passive_token_target_set ( unsigned int iface_no); static void passive_ring_reenable ( - struct totemrrp_instance *instance); + struct totemrrp_instance *instance, + unsigned int iface_no); static int passive_mcast_recv_empty ( struct totemrrp_instance *instance); @@ -423,7 +432,8 @@ static void active_token_target_set ( unsigned int iface_no); static void active_ring_reenable ( - struct totemrrp_instance *instance); + struct totemrrp_instance *instance, + unsigned int iface_no); static int active_mcast_recv_empty ( struct totemrrp_instance *instance); @@ -450,6 +460,28 @@ static void active_timer_problem_decrementer_start ( static void active_timer_problem_decrementer_cancel ( struct active_instance *active_instance); +/* + * 0-5 reserved for totemsrp.c + */ +#define MESSAGE_TYPE_RING_TEST_ACTIVE 6 +#define MESSAGE_TYPE_RING_TEST_ACTIVATE 7 + +#define ENDIAN_LOCAL 0xff22 + +struct message_header { + char type; + char encapsulated; + unsigned short endian_detector; + int ring_number; + int nodeid_activator; +} __attribute__((packed)); + +struct deliver_fn_context { + struct totemrrp_instance *instance; + void *context; + int iface_no; +}; + struct rrp_algo none_algo = { .name = "none", .initialize = NULL, @@ -522,6 +554,47 @@ do { \ format, ##args); \ } while (0); +static void test_active_msg_endian_convert(const struct message_header *in, struct message_header *out) +{ + out->type = in->type; + out->encapsulated = in->encapsulated; + out->endian_detector = ENDIAN_LOCAL; + out->ring_number = swab32 (in->ring_number); + out->nodeid_activator = swab32(in->nodeid_activator); +} + +static void timer_function_test_ring_timeout (void *context) +{ + struct deliver_fn_context *deliver_fn_context = (struct deliver_fn_context *)context; + struct totemrrp_instance *rrp_instance = deliver_fn_context->instance; + unsigned int *faulty = NULL; + int iface_no = deliver_fn_context->iface_no; + struct message_header msg = { + .type = MESSAGE_TYPE_RING_TEST_ACTIVE, + .endian_detector = ENDIAN_LOCAL, + }; + + if (strcmp(rrp_instance->totem_config->rrp_mode, "active") == 0) + faulty = ((struct active_instance *)(rrp_instance->rrp_algo_instance))->faulty; + if (strcmp(rrp_instance->totem_config->rrp_mode, "passive") == 0) + faulty = ((struct passive_instance *)(rrp_instance->rrp_algo_instance))->faulty; + + assert (faulty != NULL); + + if (faulty[iface_no] == 1) { + msg.ring_number = iface_no; + msg.nodeid_activator = rrp_instance->my_nodeid; + totemnet_token_send ( + rrp_instance->net_handles[iface_no], + &msg, sizeof (struct message_header)); + poll_timer_add (rrp_instance->poll_handle, + rrp_instance->totem_config->rrp_autorecovery_check_timeout, + (void *)deliver_fn_context, + timer_function_test_ring_timeout, + &rrp_instance->timer_active_test_ring_timeout[iface_no]); + } +} + /* * None Replication Implementation */ @@ -606,7 +679,8 @@ static void none_token_target_set ( } static void none_ring_reenable ( - struct totemrrp_instance *instance) + struct totemrrp_instance *instance, + unsigned int iface_no) { /* * No operation @@ -797,8 +871,14 @@ static void passive_mcast_recv ( (max - passive_instance->mcast_recv_count[i] > rrp_instance->totem_config->rrp_problem_count_threshold)) { passive_instance->faulty[i] = 1; + poll_timer_add (rrp_instance->poll_handle, + rrp_instance->totem_config->rrp_autorecovery_check_timeout, + rrp_instance->deliver_fn_context[i], + timer_function_test_ring_timeout, + &rrp_instance->timer_active_test_ring_timeout[i]); + sprintf (rrp_instance->status[i], - "Marking ringid %u interface %s FAULTY - administrative intervention required.", + "Marking ringid %u interface %s FAULTY", i, totemnet_iface_print (rrp_instance->net_handles[i])); log_printf ( @@ -880,8 +960,14 @@ static void passive_token_recv ( (max - passive_instance->token_recv_count[i] > rrp_instance->totem_config->rrp_problem_count_threshold)) { passive_instance->faulty[i] = 1; + poll_timer_add (rrp_instance->poll_handle, + rrp_instance->totem_config->rrp_autorecovery_check_timeout, + rrp_instance->deliver_fn_context[i], + timer_function_test_ring_timeout, + &rrp_instance->timer_active_test_ring_timeout[i]); + sprintf (rrp_instance->status[i], - "Marking seqid %d ringid %u interface %s FAULTY - administrative intervention required.", + "Marking seqid %d ringid %u interface %s FAULTY", token_seq, i, totemnet_iface_print (rrp_instance->net_handles[i])); @@ -1002,7 +1088,8 @@ static int passive_member_remove ( static void passive_ring_reenable ( - struct totemrrp_instance *instance) + struct totemrrp_instance *instance, + unsigned int iface_no) { struct passive_instance *rrp_algo_instance = (struct passive_instance *)instance->rrp_algo_instance; @@ -1010,8 +1097,13 @@ static void passive_ring_reenable ( instance->interface_count); memset (rrp_algo_instance->token_recv_count, 0, sizeof (unsigned int) * instance->interface_count); - memset (rrp_algo_instance->faulty, 0, sizeof (unsigned int) * - instance->interface_count); + + if (iface_no == instance->interface_count) { + memset (rrp_algo_instance->faulty, 0, sizeof (unsigned int) * + instance->interface_count); + } else { + rrp_algo_instance->faulty[iface_no] = 0; + } } /* @@ -1128,8 +1220,14 @@ static void timer_function_active_token_expired (void *context) if (active_instance->counter_problems[i] >= rrp_instance->totem_config->rrp_problem_count_threshold) { active_instance->faulty[i] = 1; + poll_timer_add (rrp_instance->poll_handle, + rrp_instance->totem_config->rrp_autorecovery_check_timeout, + rrp_instance->deliver_fn_context[i], + timer_function_test_ring_timeout, + &rrp_instance->timer_active_test_ring_timeout[i]); + sprintf (rrp_instance->status[i], - "Marking seqid %d ringid %u interface %s FAULTY - administrative intervention required.", + "Marking seqid %d ringid %u interface %s FAULTY", active_instance->last_token_seq, i, totemnet_iface_print (rrp_instance->net_handles[i])); @@ -1233,7 +1331,7 @@ static void active_mcast_noflush_send ( } static void active_token_recv ( - struct totemrrp_instance *instance, + struct totemrrp_instance *rrp_instance, unsigned int iface_no, void *context, const void *msg, @@ -1241,13 +1339,13 @@ static void active_token_recv ( unsigned int token_seq) { int i; - struct active_instance *active_instance = (struct active_instance *)instance->rrp_algo_instance; + struct active_instance *active_instance = (struct active_instance *)rrp_instance->rrp_algo_instance; - active_instance->totemrrp_context = context; // this should be in totemrrp_instance ? + active_instance->totemrrp_context = context; if (token_seq > active_instance->last_token_seq) { memcpy (active_instance->token, msg, msg_len); active_instance->token_len = msg_len; - for (i = 0; i < instance->interface_count; i++) { + for (i = 0; i < rrp_instance->interface_count; i++) { active_instance->last_token_recv[i] = 0; } @@ -1259,7 +1357,7 @@ static void active_token_recv ( if (token_seq == active_instance->last_token_seq) { active_instance->last_token_recv[iface_no] = 1; - for (i = 0; i < instance->interface_count; i++) { + for (i = 0; i < rrp_instance->interface_count; i++) { if ((active_instance->last_token_recv[i] == 0) && active_instance->faulty[i] == 0) { return; /* don't deliver token */ @@ -1267,7 +1365,7 @@ static void active_token_recv ( } active_timer_expired_token_cancel (active_instance); - instance->totemrrp_deliver_fn ( + rrp_instance->totemrrp_deliver_fn ( context, msg, msg_len); @@ -1383,24 +1481,25 @@ static int active_mcast_recv_empty ( } static void active_ring_reenable ( - struct totemrrp_instance *instance) + struct totemrrp_instance *instance, + unsigned int iface_no) { struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance; - memset (rrp_algo_instance->last_token_recv, 0, sizeof (unsigned int) * - instance->interface_count); - memset (rrp_algo_instance->faulty, 0, sizeof (unsigned int) * - instance->interface_count); - memset (rrp_algo_instance->counter_problems, 0, sizeof (unsigned int) * - instance->interface_count); + if (iface_no == instance->interface_count) { + memset (rrp_algo_instance->last_token_recv, 0, sizeof (unsigned int) * + instance->interface_count); + memset (rrp_algo_instance->faulty, 0, sizeof (unsigned int) * + instance->interface_count); + memset (rrp_algo_instance->counter_problems, 0, sizeof (unsigned int) * + instance->interface_count); + } else { + rrp_algo_instance->last_token_recv[iface_no] = 0; + rrp_algo_instance->faulty[iface_no] = 0; + rrp_algo_instance->counter_problems[iface_no] = 0; + } } -struct deliver_fn_context { - struct totemrrp_instance *instance; - void *context; - int iface_no; -}; - static void totemrrp_instance_initialize (struct totemrrp_instance *instance) { memset (instance, 0, sizeof (struct totemrrp_instance)); @@ -1441,18 +1540,71 @@ void rrp_deliver_fn ( unsigned int token_is; struct deliver_fn_context *deliver_fn_context = (struct deliver_fn_context *)context; + struct totemrrp_instance *rrp_instance = deliver_fn_context->instance; + const struct message_header *hdr = msg; + struct message_header tmp_msg, activate_msg; - deliver_fn_context->instance->totemrrp_token_seqid_get ( + memset(&tmp_msg, 0, sizeof(struct message_header)); + memset(&activate_msg, 0, sizeof(struct message_header)); + + rrp_instance->totemrrp_token_seqid_get ( msg, &token_seqid, &token_is); + if (hdr->type == MESSAGE_TYPE_RING_TEST_ACTIVE) { + log_printf ( + rrp_instance->totemrrp_log_level_debug, + "received message requesting test of ring now active\n"); + + if (hdr->endian_detector != ENDIAN_LOCAL) { + test_active_msg_endian_convert(hdr, &tmp_msg); + hdr = &tmp_msg; + } + + if (hdr->nodeid_activator == rrp_instance->my_nodeid) { + /* + * Send an activate message + */ + activate_msg.type = MESSAGE_TYPE_RING_TEST_ACTIVATE; + activate_msg.endian_detector = ENDIAN_LOCAL; + activate_msg.ring_number = hdr->ring_number; + activate_msg.nodeid_activator = rrp_instance->my_nodeid; + totemnet_token_send ( + rrp_instance->net_handles[deliver_fn_context->iface_no], + &activate_msg, sizeof (struct message_header)); + } else { + /* + * Send a ring test message + */ + totemnet_token_send ( + rrp_instance->net_handles[deliver_fn_context->iface_no], + msg, msg_len); + } + } else + if (hdr->type == MESSAGE_TYPE_RING_TEST_ACTIVATE) { + log_printf ( + rrp_instance->totemrrp_log_level_notice, + "Automatically recovered ring %d\n", hdr->ring_number); + + if (hdr->endian_detector != ENDIAN_LOCAL) { + test_active_msg_endian_convert(hdr, &tmp_msg); + hdr = &tmp_msg; + } + + totemrrp_ring_reenable (rrp_instance, deliver_fn_context->iface_no); + if (hdr->nodeid_activator != rrp_instance->my_nodeid) { + totemnet_token_send ( + rrp_instance->net_handles[deliver_fn_context->iface_no], + msg, msg_len); + } + } else if (token_is) { /* * Deliver to the token receiver for this rrp algorithm */ - deliver_fn_context->instance->rrp_algo->token_recv ( - deliver_fn_context->instance, + rrp_instance->rrp_algo->token_recv ( + rrp_instance, deliver_fn_context->iface_no, deliver_fn_context->context, msg, @@ -1462,8 +1614,8 @@ void rrp_deliver_fn ( /* * Deliver to the mcast receiver for this rrp algorithm */ - deliver_fn_context->instance->rrp_algo->mcast_recv ( - deliver_fn_context->instance, + rrp_instance->rrp_algo->mcast_recv ( + rrp_instance, deliver_fn_context->iface_no, deliver_fn_context->context, msg, @@ -1477,6 +1629,7 @@ void rrp_iface_change_fn ( { struct deliver_fn_context *deliver_fn_context = (struct deliver_fn_context *)context; + deliver_fn_context->instance->my_nodeid = iface_addr->nodeid; deliver_fn_context->instance->totemrrp_iface_change_fn ( deliver_fn_context->context, iface_addr, @@ -1597,6 +1750,7 @@ int totemrrp_initialize ( deliver_fn_context->instance = instance; deliver_fn_context->context = context; deliver_fn_context->iface_no = i; + instance->deliver_fn_context[i] = (void *)deliver_fn_context; totemnet_initialize ( poll_handle, @@ -1746,17 +1900,27 @@ int totemrrp_crypto_set ( } +/* + * iface_no indicates the interface number [0, ..., interface_count-1] of the + * specific ring which will be reenabled. We specify iface_no == interface_count + * means reenabling all the rings. + */ int totemrrp_ring_reenable ( - void *rrp_context) + void *rrp_context, + unsigned int iface_no) { struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context; int res = 0; unsigned int i; - instance->rrp_algo->ring_reenable (instance); + instance->rrp_algo->ring_reenable (instance, iface_no); - for (i = 0; i < instance->interface_count; i++) { - sprintf (instance->status[i], "ring %d active with no faults", i); + if (iface_no == instance->interface_count) { + for (i = 0; i < instance->interface_count; i++) { + sprintf (instance->status[i], "ring %d active with no faults", i); + } + } else { + sprintf (instance->status[iface_no], "ring %d active with no faults", iface_no); } return (res); diff --git a/exec/totemrrp.h b/exec/totemrrp.h index 7cec356..1ce119e 100644 --- a/exec/totemrrp.h +++ b/exec/totemrrp.h @@ -129,7 +129,8 @@ extern int totemrrp_crypto_set ( unsigned int type); extern int totemrrp_ring_reenable ( - void *rrp_context); + void *rrp_context, + unsigned int iface_no); extern int totemrrp_mcast_recv_empty ( void *rrp_context); diff --git a/exec/totemsrp.c b/exec/totemsrp.c index ddcde32..3dcc05e 100644 --- a/exec/totemsrp.c +++ b/exec/totemsrp.c @@ -864,6 +864,9 @@ int totemsrp_initialize ( "RRP threshold (%d problem count)\n", totem_config->rrp_problem_count_threshold); log_printf (instance->totemsrp_log_level_debug, + "RRP automatic recovery check timeout (%d ms)\n", + totem_config->rrp_autorecovery_check_timeout); + log_printf (instance->totemsrp_log_level_debug, "RRP mode set to %s.\n", instance->totem_config->rrp_mode); log_printf (instance->totemsrp_log_level_debug, @@ -1054,7 +1057,8 @@ int totemsrp_ring_reenable ( { struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; - totemrrp_ring_reenable (instance->totemrrp_context); + totemrrp_ring_reenable (instance->totemrrp_context, + instance->totem_config->interface_count); return (0); } diff --git a/include/corosync/totem/totem.h b/include/corosync/totem/totem.h index eb1a908..090a0db 100644 --- a/include/corosync/totem/totem.h +++ b/include/corosync/totem/totem.h @@ -142,6 +142,8 @@ struct totem_config { unsigned int rrp_problem_count_threshold; + unsigned int rrp_autorecovery_check_timeout; + char rrp_mode[TOTEM_RRP_MODE_BYTES]; struct totem_logging_configuration totem_logging_configuration; diff --git a/man/corosync.conf.5 b/man/corosync.conf.5 index 2ebf802..d092064 100644 --- a/man/corosync.conf.5 +++ b/man/corosync.conf.5 @@ -483,6 +483,13 @@ override this value without guidance from the corosync community. The default is 47 milliseconds. +.TP +rrp_autorecovery_check_timeout +This specifies the time in milliseconds to check if the failed ring can be +auto-recovered. + +The default is 1000 milliseconds. + .PP Within the .B logging _______________________________________________ Openais mailing list Openais@lists.linux-foundation.org https://lists.linux-foundation.org/mailman/listinfo/openais