- immnd send re-introduce refresh=3 with ex-immd (active) node id. - immd set very high priority for re-introduce msg of local immnd and choose coord if re-introduce refresh=3 from local immnd. - immd reply re-intro to reboot if ex-immd is not same as ex-immd of selected coord. - immd use new INTRO_RSP_2 to checkpoint ex-immd to standby. - immnd use MDS_RED_SUBSCRIBE for immd to know active/standby immd and help detect headless in multi partition clusters rejoin. - immnd discard FEVS from unknown immd or during re-introduce to avoid immnd OUT OF ORDER restart and lost ex-immd info. - Update README.SC_ABSENCE for this new feature. - Allow to configure disable/enable this new feature. - immd standby will reboot if see two actives immd to avoid sync with wrong active. --- scripts/opensaf_reboot | 1 + src/imm/README.SC_ABSENCE | 21 ++++++++++ src/imm/common/immsv_evt.c | 17 +++++++- src/imm/common/immsv_evt.h | 4 ++ src/imm/immd/immd.conf | 7 ++++ src/imm/immd/immd_cb.h | 3 ++ src/imm/immd/immd_evt.c | 86 ++++++++++++++++++++++++++++++++------ src/imm/immd/immd_main.c | 9 ++++ src/imm/immd/immd_mbcsv.c | 24 +++++++++-- src/imm/immd/immd_mds.c | 17 +++++--- src/imm/immd/immd_proc.c | 15 ++++--- src/imm/immd/immd_red.h | 1 + src/imm/immd/immd_sbevt.c | 9 +++- src/imm/immnd/immnd_cb.h | 3 ++ src/imm/immnd/immnd_evt.c | 84 +++++++++++++++++++++++++++++-------- src/imm/immnd/immnd_main.c | 2 + src/imm/immnd/immnd_mds.c | 35 ++++++++++++---- src/imm/immnd/immnd_proc.c | 19 ++++----- 18 files changed, 290 insertions(+), 67 deletions(-)
diff --git a/scripts/opensaf_reboot b/scripts/opensaf_reboot index bcbc689f0..bb3cee5a1 100644 --- a/scripts/opensaf_reboot +++ b/scripts/opensaf_reboot @@ -143,6 +143,7 @@ unset tipc # argument 3 is set to 1, "safe reboot" request. if [ "$#" = 0 ]; then $icmd pkill -STOP osafamfd + $icmd pkill -STOP osafimmd quick_local_node_reboot elif [ "$safe_reboot" = 1 ]; then opensaf_safe_reboot diff --git a/src/imm/README.SC_ABSENCE b/src/imm/README.SC_ABSENCE index 9cae5d519..644cbb546 100644 --- a/src/imm/README.SC_ABSENCE +++ b/src/imm/README.SC_ABSENCE @@ -76,3 +76,24 @@ Support for absent IMMD is incompatible with 2PBE. If both are configured then 2PBE will win and the absence of IMMD feature will be ignored. An error message is printed in this case to the syslog at startup. + +SC ABSENCE and ROAMING SC +========================= +Under SC absence enable and Roaming SC cluster, multiple partitioned clusters +can occur due to network split. If PBE database is configured on local node +then many diverted IMM databases can occur. If rejoin these clusters into one +cluster, any undefined behavior may happen. To avoid this, IMM implements +mechanism to reboot nodes used to be on different partition with selected +coordinator [ticket #2936] + +- IMMND send re-introduce use refresh id 3 with ex-immd node id. +- When payload become controller, the IMMD will select coordinator +(prioritize local IMMND) and send reply to reboot nodes which have ex-immd +node id different with ex-immd of selected coordinator. +- Active IMMD use new IMMD_A2S_MSG_INTRO_RSP_2 to checkpoint node info with +ex-immd to standby IMMD. +- IMMND use MDS_RED_SUBSCRIBE to know Active/Standby. Discard FEVS from +unknown IMMD or during waiting accept of re-introduce to avoid IMMND restart +due to OUT OR ORDER. This also detect headless in multi partitions rejoin. + +To enable this mechanism, please export IMMSV_COORD_SELECT_NODE=1 in immd.conf diff --git a/src/imm/common/immsv_evt.c b/src/imm/common/immsv_evt.c index c93f82a0f..1c43ec719 100644 --- a/src/imm/common/immsv_evt.c +++ b/src/imm/common/immsv_evt.c @@ -3395,7 +3395,7 @@ static uint32_t immsv_evt_enc_toplevel(IMMSV_EVT *i_evt, NCS_UBAID *o_ub) * sublevel */ } - if ((immdevt->info.ctrl_msg.refresh == 2) && + if ((immdevt->info.ctrl_msg.refresh >= 2) && (immdevt->type == IMMD_EVT_ND2D_INTRO)) { /* Intro after IMMD restart. */ @@ -3419,6 +3419,12 @@ static uint32_t immsv_evt_enc_toplevel(IMMSV_EVT *i_evt, NCS_UBAID *o_ub) ncs_encode_32bit( &p8, immdevt->info.ctrl_msg.impl_count); ncs_enc_claim_space(o_ub, 4); + if (immdevt->info.ctrl_msg.refresh == 3) { + IMMSV_RSRV_SPACE_ASSERT(p8, o_ub, 4); + ncs_encode_32bit( + &p8, immdevt->info.ctrl_msg.ex_immd_node_id); + ncs_enc_claim_space(o_ub, 4); + } } break; @@ -5127,7 +5133,7 @@ static uint32_t immsv_evt_dec_toplevel(NCS_UBAID *i_ub, IMMSV_EVT *o_evt) * sublevel */ } - if ((immdevt->info.ctrl_msg.refresh == 2) && + if ((immdevt->info.ctrl_msg.refresh >= 2) && (immdevt->type == IMMD_EVT_ND2D_INTRO)) { /* Intro after IMMD restart. */ @@ -5155,6 +5161,13 @@ static uint32_t immsv_evt_dec_toplevel(NCS_UBAID *i_ub, IMMSV_EVT *o_evt) immdevt->info.ctrl_msg.impl_count = ncs_decode_32bit(&p8); ncs_dec_skip_space(i_ub, 4); + if (immdevt->info.ctrl_msg.refresh == 3) { + IMMSV_FLTN_SPACE_ASSERT(p8, local_data, i_ub, + 4); + immdevt->info.ctrl_msg.ex_immd_node_id = + ncs_decode_32bit(&p8); + ncs_dec_skip_space(i_ub, 4); + } } break; diff --git a/src/imm/common/immsv_evt.h b/src/imm/common/immsv_evt.h index 156220d5d..18aeca447 100644 --- a/src/imm/common/immsv_evt.h +++ b/src/imm/common/immsv_evt.h @@ -324,6 +324,7 @@ typedef struct immsv_mds_info { typedef struct immsv_send_info { MDS_SVC_ID to_svc; /* The service at the destination */ MDS_DEST dest; /* Who to send */ + NODE_ID node_id; /* Node ID of sender */ MDS_SENDTYPES stype; /* Send type */ MDS_SYNC_SND_CTXT ctxt; /* MDS Opaque context */ uint8_t mSynReqCount; @@ -342,6 +343,7 @@ typedef struct immsv_fevs { /* Holds nodeId and connection */ IMMSV_OCTET_STRING msg; uint8_t isObjSync; /* Used by coord to avoid unpacking, saves exec.*/ + NODE_ID ex_immd_node_id; } IMMSV_FEVS; /**************************************************************************** @@ -451,6 +453,7 @@ typedef struct immsv_d2nd_control { IMMSV_OCTET_STRING xmlFile; IMMSV_OCTET_STRING pbeFile; + NODE_ID ex_immd_node_id; } IMMSV_D2ND_CONTROL; /**************************************************************************** @@ -486,6 +489,7 @@ typedef struct immsv_nd2d_control { SaUint32T admo_id_count; // Max received at IMMND for AdminOwner ID SaUint32T ccb_id_count; // Max received at IMMND for CCB ID SaUint32T impl_count; // Max received at IMMND for Implementer ID + NODE_ID ex_immd_node_id; } IMMSV_ND2D_CONTROL; typedef struct immsv_nd2d_2_pbe { diff --git a/src/imm/immd/immd.conf b/src/imm/immd/immd.conf index 140d458cf..a5b327314 100644 --- a/src/imm/immd/immd.conf +++ b/src/imm/immd/immd.conf @@ -66,6 +66,13 @@ export IMMSV_ENV_HEALTHCHECK_KEY="Default" # The default value (when the environment variable is not set) is 3 seconds. #export IMMSV_SC_ABSENCE_VETERAN_MAX_WAIT=3 +# In roaming SC with headless enable, multiple partitioned clusters occur +# if network split. When network rejoin, IMMND coordinator is selected. +# Then if IMMD receive re-introduce from IMMNDs it will order reboot the +# node which used to be on different partition with current coordinator. +# This feature is disable by default, to enable uncomment the next line. +#export IMMSV_COORD_SELECT_NODE=1 + # Only log priority LOG_WARNING and higher to the system log file. # All logging will be recorded in a new node local log file $PKGLOGDIR/osaf.log. # Uncomment the next line to enable this service to log to OpenSAF node local log file. diff --git a/src/imm/immd/immd_cb.h b/src/imm/immd/immd_cb.h index 38d6f1e2d..bf8dc0d7f 100644 --- a/src/imm/immd/immd_cb.h +++ b/src/imm/immd/immd_cb.h @@ -61,6 +61,7 @@ typedef struct immd_immnd_info_node { bool syncStarted; bool pbeConfigured; /* Pbe-file-name configured. Pbe may still be disabled. */ bool isUp; /* True if received the MDS UP event */ + NODE_ID ex_immd_node_id; } IMMD_IMMND_INFO_NODE; typedef struct immd_immnd_detached_node { /* IMMD SBY tracking of departed @@ -155,6 +156,8 @@ typedef struct immd_cb_tag { NCS_LOCK veteran_sync_lock; /* Sync up with veteran payload after headless */ NCS_SEL_OBJ veteran_sync_sel; /* Sync up with veteran payload after headless */ + NODE_ID ex_immd_node_id; + bool coord_select_node; } IMMD_CB; uint32_t immd_immnd_info_tree_init(IMMD_CB *cb); diff --git a/src/imm/immd/immd_evt.c b/src/imm/immd/immd_evt.c index a0f632b7e..83831258f 100644 --- a/src/imm/immd/immd_evt.c +++ b/src/imm/immd/immd_evt.c @@ -88,6 +88,22 @@ static uint32_t immd_evt_proc_2pbe_preload(IMMD_CB *cb, IMMD_EVT *evt, static uint32_t immd_evt_proc_impl_delete(IMMD_CB *cb, IMMD_EVT *evt, IMMSV_SEND_INFO *sinfo); +static bool is_on_same_partition_with_coord( + IMMD_CB *cb, + const IMMD_IMMND_INFO_NODE *node_info) { + assert(cb->immnd_coord && "No coordinator existing"); + // Same partition with the current IMMND coord + if ((cb->coord_select_node == false) || + (cb->ex_immd_node_id == node_info->ex_immd_node_id)) + return true; + + LOG_WA("Node %x ex-IMMD=%x != current IMMND coord %x ex-IMMD=%x", + node_info->immnd_key, node_info->ex_immd_node_id, + cb->immnd_coord, cb->ex_immd_node_id); + + return false; +} + /**************************************************************************** * Name : immd_process_evt * @@ -791,7 +807,7 @@ static void immd_kill_node(IMMD_CB *cb, IMMD_IMMND_INFO_NODE *node_info) static uint16_t accepted_nodes = 0; static void immd_accept_node(IMMD_CB *cb, IMMD_IMMND_INFO_NODE *node_info, - bool doReply, bool knownVeteran) + bool doReply, bool knownVeteran, bool check_ex_immd_node_id) { IMMSV_EVT accept_evt; IMMD_MBCSV_MSG mbcp_msg; @@ -805,6 +821,9 @@ static void immd_accept_node(IMMD_CB *cb, IMMD_IMMND_INFO_NODE *node_info, ++accepted_nodes; } + LOG_NO( + "Accept intro from %x with ex-IMMD %x", + node_info->immnd_key, node_info->ex_immd_node_id); accept_evt.type = IMMSV_EVT_TYPE_IMMND; accept_evt.info.immnd.type = IMMND_EVT_D2ND_INTRO_RSP; accept_evt.info.immnd.info.ctrl.nodeId = node_info->immnd_key; @@ -851,11 +870,20 @@ static void immd_accept_node(IMMD_CB *cb, IMMD_IMMND_INFO_NODE *node_info, cb->immnd_coord = node_info->immnd_key; node_info->isCoord = true; } else if (cb->mScAbsenceAllowed && doReply) { - LOG_NO( - "Postponing acceptance of SC IMMND until %u nodes introduced.", - mds_attached_nodes); - doReply = false; - --accepted_nodes; + if ((check_ex_immd_node_id) && + (cb->node_id == node_info->immnd_key)) { + LOG_NO( + "IMMND re-introduce to IMMD on same this node. " + "Roaming SC => designating IMMND as coordinator"); + cb->immnd_coord = node_info->immnd_key; + node_info->isCoord = true; + } else { + LOG_NO( + "Postponing acceptance of SC IMMND until %u nodes introduced.", + mds_attached_nodes); + doReply = false; + --accepted_nodes; + } } else { LOG_NO( "PROBLEM CASE (?) ScAbsenceAllowed: %u; accepted_nodes:%u; mds_attached_nodess:%u", @@ -892,12 +920,20 @@ static void immd_accept_node(IMMD_CB *cb, IMMD_IMMND_INFO_NODE *node_info, accept_evt.info.immnd.info.ctrl.isCoord = true; accept_evt.info.immnd.info.ctrl.syncStarted = node_info->syncStarted; + LOG_NO( + "IMMND coord at %x with ex-IMMD %x", + node_info->immnd_key, node_info->ex_immd_node_id); + cb->ex_immd_node_id = node_info->ex_immd_node_id; } mbcp_msg.type = IMMD_A2S_MSG_INTRO_RSP; /* Mbcp intro to SBY. */ mbcp_msg.info.ctrl = accept_evt.info.immnd.info.ctrl; + if (check_ex_immd_node_id) { + mbcp_msg.type = IMMD_A2S_MSG_INTRO_RSP_2; + mbcp_msg.info.ctrl.ex_immd_node_id = node_info->ex_immd_node_id; + } if (cb->mPbeFile && !(cb->mFsParamMbcp) && - cb->immd_remote_up) { /* Send fs params to SBY. */ + cb->immd_remote_up && cb->mDir) { /* Send fs params to SBY. */ cb->mFsParamMbcp = true; fsParamMbcp = true; mbcp_msg.info.ctrl.dir.size = strlen(cb->mDir) + 1; @@ -937,6 +973,13 @@ static void immd_accept_node(IMMD_CB *cb, IMMD_IMMND_INFO_NODE *node_info, accept_evt.info.immnd.info.ctrl .canBeCoord = 4; /* Allow all nodes including payloads to be coord */ + + if (check_ex_immd_node_id && + !is_on_same_partition_with_coord(cb, node_info)) { + LOG_WA("Going to reboot node 0x%x", node_info->immnd_key); + accept_evt.info.immnd.info.ctrl.canBeCoord = 255; + } + accept_evt.info.immnd.info.ctrl.ndExecPid = cb->mScAbsenceAllowed; /* ExecPid not realy used by IMMND as receiver on reply @@ -1061,6 +1104,7 @@ static IMMD_IMMND_INFO_NODE *immd_add_immnd_node(IMMD_CB *cb, MDS_DEST dest) return NULL; } + node_info->ex_immd_node_id = 0; node_info->isUp = true; if (add_flag) { @@ -1683,6 +1727,14 @@ static uint32_t immd_evt_proc_immnd_intro(IMMD_CB *cb, IMMD_EVT *evt, } } + // Check refresh value to avoid updating on legacy IMMND. + if (evt->info.ctrl_msg.refresh == 3) { + node_info->ex_immd_node_id = evt->info.ctrl_msg.ex_immd_node_id; + LOG_NO( + "IMMND at %x re-intro with ex-IMMD %x", + node_info->immnd_key, node_info->ex_immd_node_id); + } + oldPid = node_info->immnd_execPid; oldEpoch = node_info->epoch; newPid = evt->info.ctrl_msg.ndExecPid; @@ -1766,7 +1818,7 @@ static uint32_t immd_evt_proc_immnd_intro(IMMD_CB *cb, IMMD_EVT *evt, } } - if (evt->info.ctrl_msg.refresh == 2) { + if (evt->info.ctrl_msg.refresh >= 2) { /* Refresh from up and running IMMND perspective but not from restarted IMMDs perspective. IMMNDs update IMMDs with current global counters. There is a potential @@ -1882,7 +1934,7 @@ static uint32_t immd_evt_proc_immnd_intro(IMMD_CB *cb, IMMD_EVT *evt, } else { /* Regular old refresh, basically just statistics and * tracing. */ - immd_accept_node(cb, node_info, false, false); + immd_accept_node(cb, node_info, false, false, false); goto done; } } @@ -1913,7 +1965,7 @@ update_node_type: node_info->immnd_key); } - if (evt->info.ctrl_msg.refresh == 2) { + if (evt->info.ctrl_msg.refresh >= 2) { goto accept_node; } @@ -2175,7 +2227,8 @@ update_node_type: accept_node: - immd_accept_node(cb, node_info, true, veteranImmndNode); + immd_accept_node(cb, node_info, true, veteranImmndNode, + evt->info.ctrl_msg.refresh == 3); done: @@ -3139,7 +3192,7 @@ static uint32_t immd_evt_proc_mds_evt(IMMD_CB *cb, IMMD_EVT *evt) /* get the peer mds_red_up */ /* from the Node ID get the mds_dest of remote IMMND */ TRACE_5("Process MDS EVT NCSMDS_RED_UP, my PID:%u", getpid()); - if (cb->node_id != mds_info->node_id) { + if ((cb->node_id != mds_info->node_id) && (!cb->immd_remote_up)) { MDS_DEST tmpDest = 0LL; NCS_NODE_ID immnd_remote_id = 0; TRACE_5("Remote IMMD is UP."); @@ -3172,7 +3225,7 @@ static uint32_t immd_evt_proc_mds_evt(IMMD_CB *cb, IMMD_EVT *evt) if (!cb->is_rem_immnd_up) { immd_accept_node( cb, node_info, true, - false); /* <==== Can + false, false); /* <==== Can not be sc-absence veteran @@ -3217,6 +3270,13 @@ static uint32_t immd_evt_proc_mds_evt(IMMD_CB *cb, IMMD_EVT *evt) if (cb->mIs2Pbe && cb->mds_role == V_DEST_RL_STANDBY) { cb->m2PbeCanLoad = true; } + } else if ((mds_info->node_id != cb->immd_self_id) && + (mds_info->node_id != cb->immd_remote_id) && + (mds_info->role == V_DEST_RL_ACTIVE) && + (cb->ha_state == SA_AMF_HA_STANDBY)) { + LOG_ER("Standby peer see two peers: %x and %x", + cb->immd_remote_id, mds_info->node_id); + opensaf_reboot(0, NULL, "Standby peer see two peers"); } break; diff --git a/src/imm/immd/immd_main.c b/src/imm/immd/immd_main.c index b6e36b47a..28443e71d 100644 --- a/src/imm/immd/immd_main.c +++ b/src/imm/immd/immd_main.c @@ -279,6 +279,7 @@ int main(int argc, char *argv[]) const char *absentScStr = getenv("IMMSV_SC_ABSENCE_ALLOWED"); const char *veteranWaitStr = getenv("IMMSV_SC_ABSENCE_VETERAN_MAX_WAIT"); + const char *coordSelectNodePtr = getenv("IMMSV_COORD_SELECT_NODE"); int32_t timeout = (-1); int32_t total_wait = (-1); int64_t start_time = 0LL; @@ -288,6 +289,14 @@ int main(int argc, char *argv[]) daemonize(argc, argv); + immd_cb->coord_select_node = false; + if (coordSelectNodePtr) { + if (atoi(coordSelectNodePtr) == 1) { + LOG_NO("IMMSV_COORD_SELECT_NODE is enable"); + immd_cb->coord_select_node = true; + } + } + if (absentScStr) { scAbsenceAllowed = atoi(absentScStr); if (!scAbsenceAllowed) { diff --git a/src/imm/immd/immd_mbcsv.c b/src/imm/immd/immd_mbcsv.c index 85581f467..0bdac9283 100644 --- a/src/imm/immd/immd_mbcsv.c +++ b/src/imm/immd/immd_mbcsv.c @@ -526,6 +526,7 @@ static uint32_t mbcsv_enc_async_update(IMMD_CB *cb, NCS_MBCSV_CB_ARG *arg) break; case IMMD_A2S_MSG_INTRO_RSP: + case IMMD_A2S_MSG_INTRO_RSP_2: case IMMD_A2S_MSG_SYNC_START: case IMMD_A2S_MSG_SYNC_ABORT: case IMMD_A2S_MSG_DUMP_OK: @@ -580,9 +581,9 @@ static uint32_t mbcsv_enc_async_update(IMMD_CB *cb, NCS_MBCSV_CB_ARG *arg) ncs_enc_claim_space(&arg->info.encode.io_uba, sizeof(uint8_t)); ncs_encode_8bit(&uns8_ptr, immd_msg->info.ctrl.pbeEnabled); - if ((arg->info.encode.io_reo_type == IMMD_A2S_MSG_INTRO_RSP) && - (immd_msg->info.ctrl.pbeEnabled >= - 3)) { /* extended intro */ + if (((arg->info.encode.io_reo_type == IMMD_A2S_MSG_INTRO_RSP) || + (arg->info.encode.io_reo_type == IMMD_A2S_MSG_INTRO_RSP_2)) && + (immd_msg->info.ctrl.pbeEnabled >= 3)) { /* extended intro */ TRACE_5("Encoding Fs params for mbcp to standy immd"); uns32_ptr = ncs_enc_reserve_space( @@ -621,6 +622,13 @@ static uint32_t mbcsv_enc_async_update(IMMD_CB *cb, NCS_MBCSV_CB_ARG *arg) immsv_evt_enc_inline_string(&arg->info.encode.io_uba, os); } + if (arg->info.encode.io_reo_type == IMMD_A2S_MSG_INTRO_RSP_2) { + uns32_ptr = ncs_enc_reserve_space(&arg->info.encode.io_uba, + sizeof(uint32_t)); + osafassert(uns32_ptr); + ncs_enc_claim_space(&arg->info.encode.io_uba, sizeof(uint32_t)); + ncs_encode_32bit(&uns32_ptr, immd_msg->info.ctrl.ex_immd_node_id); + } break; case IMMD_A2S_MSG_RESET: @@ -1044,6 +1052,7 @@ static uint32_t mbcsv_dec_async_update(IMMD_CB *cb, NCS_MBCSV_CB_ARG *arg) break; case IMMD_A2S_MSG_INTRO_RSP: + case IMMD_A2S_MSG_INTRO_RSP_2: case IMMD_A2S_MSG_SYNC_START: case IMMD_A2S_MSG_SYNC_ABORT: case IMMD_A2S_MSG_DUMP_OK: @@ -1087,7 +1096,8 @@ static uint32_t mbcsv_dec_async_update(IMMD_CB *cb, NCS_MBCSV_CB_ARG *arg) immd_msg->info.ctrl.pbeEnabled = ncs_decode_8bit(&ptr); ncs_dec_skip_space(&arg->info.decode.i_uba, sizeof(uint8_t)); - if ((evt_type == IMMD_A2S_MSG_INTRO_RSP) && + if (((evt_type == IMMD_A2S_MSG_INTRO_RSP) || + (evt_type == IMMD_A2S_MSG_INTRO_RSP_2)) && (immd_msg->info.ctrl.pbeEnabled >= 3)) { TRACE("Decoding Fs params for mbcp to standy immd"); @@ -1118,6 +1128,12 @@ static uint32_t mbcsv_dec_async_update(IMMD_CB *cb, NCS_MBCSV_CB_ARG *arg) immsv_evt_dec_inline_string(&arg->info.decode.i_uba, os); } + if (evt_type == IMMD_A2S_MSG_INTRO_RSP_2) { + ptr = ncs_dec_flatten_space(&arg->info.decode.i_uba, data, + sizeof(uint32_t)); + immd_msg->info.ctrl.ex_immd_node_id = ncs_decode_32bit(&ptr); + ncs_dec_skip_space(&arg->info.decode.i_uba, sizeof(uint32_t)); + } rc = immd_process_node_accept(cb, &immd_msg->info.ctrl); if (rc != NCSCC_RC_SUCCESS) { diff --git a/src/imm/immd/immd_mds.c b/src/imm/immd/immd_mds.c index fca6341d8..6cdf5b2e7 100644 --- a/src/imm/immd/immd_mds.c +++ b/src/imm/immd/immd_mds.c @@ -503,16 +503,20 @@ static uint32_t immd_mds_rcv(IMMD_CB *cb, MDS_CALLBACK_RECEIVE_INFO *rcv_info) * other IMMNDs. */ if (cb->mScAbsenceAllowed && pEvt->info.immd.type == IMMD_EVT_ND2D_INTRO && - pEvt->info.immd.info.ctrl_msg.refresh == 2) { + pEvt->info.immd.info.ctrl_msg.refresh >= 2) { prio = NCS_IPC_PRIORITY_HIGH; - m_NCS_LOCK(&immd_cb->veteran_sync_lock, NCS_LOCK_WRITE); - if (cb->veteran_sync_sel.raise_obj != - -1) { /* Check if the sel_obj is not destroyed */ - m_NCS_SEL_OBJ_IND(&cb->veteran_sync_sel); + if (rcv_info->i_node_id == cb->node_id) { + /* Prioritize re-intro from local IMMND */ + prio = NCS_IPC_PRIORITY_VERY_HIGH; + m_NCS_LOCK(&immd_cb->veteran_sync_lock, NCS_LOCK_WRITE); + if (cb->veteran_sync_sel.raise_obj != + -1) { /* Check if the sel_obj is not destroyed */ + m_NCS_SEL_OBJ_IND(&cb->veteran_sync_sel); + } + m_NCS_UNLOCK(&immd_cb->veteran_sync_lock, NCS_LOCK_WRITE); } - m_NCS_UNLOCK(&immd_cb->veteran_sync_lock, NCS_LOCK_WRITE); } /* Put it in IMMD's Event Queue */ @@ -559,6 +563,7 @@ static uint32_t immd_mds_svc_evt(IMMD_CB *cb, evt->info.immd.info.mds_info.dest = svc_evt->i_dest; evt->info.immd.info.mds_info.svc_id = svc_evt->i_svc_id; evt->info.immd.info.mds_info.node_id = svc_evt->i_node_id; + evt->info.immd.info.mds_info.role = svc_evt->i_role; /* Put it in IMMD's Event Queue */ uint32_t rc = m_NCS_IPC_SEND(&cb->mbx, (NCSCONTEXT)evt, diff --git a/src/imm/immd/immd_proc.c b/src/imm/immd/immd_proc.c index 69e23f2d3..b1116b07c 100644 --- a/src/imm/immd/immd_proc.c +++ b/src/imm/immd/immd_proc.c @@ -406,15 +406,19 @@ bool immd_proc_elect_coord(IMMD_CB *cb, bool new_active) if (self_re_elect) { /* Ensure we re-elected ourselves. */ osafassert(immnd_info_node->immnd_key == cb->node_id); - LOG_NO("Coord re-elected, resides at %x", - immnd_info_node->immnd_key); + LOG_NO( + "Coord re-elected, resides at %x", + immnd_info_node->immnd_key); } else { - LOG_NO("New coord elected, resides at %x", - immnd_info_node->immnd_key); + LOG_NO( + "New coord elected, resides at %x with ex-IMMD %x", + immnd_info_node->immnd_key, + immnd_info_node->ex_immd_node_id); } cb->immnd_coord = immnd_info_node->immnd_key; + cb->ex_immd_node_id = immnd_info_node->ex_immd_node_id; if (!cb->is_rem_immnd_up) { if (cb->immd_remote_id && m_IMMND_IS_ON_SCXB( @@ -452,8 +456,9 @@ bool immd_proc_elect_coord(IMMD_CB *cb, bool new_active) send_evt.info.immnd.info.ctrl.pbeEnabled = (cb->mRim == SA_IMM_KEEP_REPOSITORY); - mbcp_msg.type = IMMD_A2S_MSG_INTRO_RSP; + mbcp_msg.type = IMMD_A2S_MSG_INTRO_RSP_2; mbcp_msg.info.ctrl = send_evt.info.immnd.info.ctrl; + mbcp_msg.info.ctrl.ex_immd_node_id = immnd_info_node->ex_immd_node_id; /*Checkpoint the new coordinator message to standby director. Syncronous call=>wait for ack */ if (immd_mbcsv_sync_update(cb, &mbcp_msg) != NCSCC_RC_SUCCESS) { diff --git a/src/imm/immd/immd_red.h b/src/imm/immd/immd_red.h index e8cf8e249..9224facb1 100644 --- a/src/imm/immd/immd_red.h +++ b/src/imm/immd/immd_red.h @@ -29,6 +29,7 @@ typedef enum immd_mbcsv_msg_type { IMMD_A2S_MSG_DUMP_OK, IMMD_A2S_MSG_RESET, IMMD_A2S_MSG_SYNC_ABORT, + IMMD_A2S_MSG_INTRO_RSP_2, IMMD_A2S_MSG_MAX_EVT } IMMD_MBCSV_MSG_TYPE; diff --git a/src/imm/immd/immd_sbevt.c b/src/imm/immd/immd_sbevt.c index 66b2e61f0..fb7e42a68 100644 --- a/src/imm/immd/immd_sbevt.c +++ b/src/imm/immd/immd_sbevt.c @@ -177,6 +177,10 @@ uint32_t immd_process_node_accept(IMMD_CB *cb, IMMSV_D2ND_CONTROL *ctrl) } if (immnd_info_node) { + LOG_NO( + "SBY: Accept intro from %x with ex-IMMD %x", + ctrl->nodeId, ctrl->ex_immd_node_id); + immnd_info_node->ex_immd_node_id = ctrl->ex_immd_node_id; if (immnd_info_node->epoch < ctrl->nodeEpoch) { LOG_NO("SBY: New Epoch for IMMND process at node %x " "old epoch: %u new epoch:%u", @@ -204,6 +208,7 @@ uint32_t immd_process_node_accept(IMMD_CB *cb, IMMSV_D2ND_CONTROL *ctrl) immnd_info_node->isCoord = ctrl->isCoord; if (ctrl->isCoord) { + cb->ex_immd_node_id = immnd_info_node->ex_immd_node_id; SaImmRepositoryInitModeT oldRim = cb->mRim; cb->immnd_coord = immnd_info_node->immnd_key; cb->payload_coord_dest = @@ -211,7 +216,9 @@ uint32_t immd_process_node_accept(IMMD_CB *cb, IMMSV_D2ND_CONTROL *ctrl) ? 0LL : immnd_info_node->immnd_dest; cb->m2PbeCanLoad = true; - LOG_NO("IMMND coord at %x", immnd_info_node->immnd_key); + LOG_NO( + "IMMND coord at %x with ex-IMMD %x", + immnd_info_node->immnd_key, cb->ex_immd_node_id); immnd_info_node->syncStarted = ctrl->syncStarted; cb->mRim = (ctrl->pbeEnabled == 4) ? SA_IMM_KEEP_REPOSITORY diff --git a/src/imm/immnd/immnd_cb.h b/src/imm/immnd/immnd_cb.h index bca99f98b..8feb3f394 100644 --- a/src/imm/immnd/immnd_cb.h +++ b/src/imm/immnd/immnd_cb.h @@ -151,6 +151,9 @@ typedef struct immnd_cb_tag { /* Information about the IMMD */ MDS_DEST immd_mdest_id; + NODE_ID immd_node_id; + NODE_ID other_immd_id; + NODE_ID ex_immd_node_id; bool is_immd_up; /* IMMND data */ diff --git a/src/imm/immnd/immnd_evt.c b/src/imm/immnd/immnd_evt.c index 5b82de01e..ff6cc353e 100644 --- a/src/imm/immnd/immnd_evt.c +++ b/src/imm/immnd/immnd_evt.c @@ -600,6 +600,15 @@ void immnd_process_evt(void) return; } + if ((evt->sinfo.to_svc == NCSMDS_SVC_ID_IMMD) && + (evt->sinfo.node_id != cb->immd_node_id) && + (evt->sinfo.node_id != cb->other_immd_id)) { + LOG_WA("DISCARD message from IMMD %x as ACT:%x SBY:%x", + evt->sinfo.node_id, cb->immd_node_id, cb->other_immd_id); + immnd_evt_destroy(evt, true, __LINE__); + return; + } + if ((evt->info.immnd.type != IMMND_EVT_D2ND_GLOB_FEVS_REQ) && (evt->info.immnd.type != IMMND_EVT_D2ND_GLOB_FEVS_REQ_2)) immsv_msg_trace_rec(evt->sinfo.dest, evt); @@ -10490,14 +10499,18 @@ static uint32_t immnd_evt_proc_intro_rsp(IMMND_CB *cb, IMMND_EVT *evt, if (evt->info.ctrl.nodeId == cb->node_id) { /*This node was introduced to the IMM cluster */ uint8_t oldCanBeCoord = cb->mCanBeCoord; - cb->mIntroduced = 1; - if (evt->info.ctrl.canBeCoord == 3) { + if (evt->info.ctrl.canBeCoord == 255) { + LOG_NO("Used to be on another partition. Rebooting..."); + opensaf_quick_reboot( + "Used to be on another partition. Rebooting..."); + return NCSCC_RC_SUCCESS; + } else if (evt->info.ctrl.canBeCoord == 3) { cb->m2Pbe = 1; evt->info.ctrl.canBeCoord = 1; LOG_IN("2PBE SYNC CASE CAUGHT oldCanBeCoord:%u", oldCanBeCoord); } - + cb->mIntroduced = 1; cb->mCanBeCoord = evt->info.ctrl.canBeCoord; if ((cb->mCanBeCoord == 2) && (cb->m2Pbe < 2) && immnd_cb->isNodeTypeController) { @@ -10739,7 +10752,7 @@ void dequeue_outgoing(IMMND_CB *cb) *****************************************************************************/ static uint32_t immnd_evt_proc_fevs_rcv(IMMND_CB *cb, IMMND_EVT *evt, IMMSV_SEND_INFO *sinfo) -{ /*sinfo not used */ +{ osafassert(evt); SaUint64T msgNo = evt->info.fevsReq.sender_count; /*Global MsgNo */ SaImmHandleT clnt_hdl = evt->info.fevsReq.client_hdl; @@ -10750,9 +10763,17 @@ static uint32_t immnd_evt_proc_fevs_rcv(IMMND_CB *cb, IMMND_EVT *evt, : false; TRACE_ENTER(); + if (cb->mIntroduced == 2) { + LOG_WA("DISCARD FEVS message:%llu from %x", msgNo, sinfo->node_id); + dequeue_outgoing(cb); + return NCSCC_RC_FAILURE; + } + if (cb->highestProcessed >= msgNo) { /*We have already received this message, discard it. */ - LOG_WA("DISCARD DUPLICATE FEVS message:%llu", msgNo); + LOG_WA( + "DISCARD DUPLICATE FEVS message:%llu from %x", + msgNo, sinfo->node_id); dequeue_outgoing(cb); return NCSCC_RC_FAILURE; /*TODO: ensure evt is discarded by invoker */ @@ -10795,8 +10816,9 @@ static uint32_t immnd_evt_proc_fevs_rcv(IMMND_CB *cb, IMMND_EVT *evt, * message */ if (cb->mAccepted) { LOG_ER( - "MESSAGE:%llu OUT OF ORDER my highest processed:%llu - exiting", - msgNo, cb->highestProcessed); + "MESSAGE:%llu from %x OUT OF ORDER " + "my highest processed:%llu - exiting", + msgNo, sinfo->node_id, cb->highestProcessed); SyslogRecentFevs(); immnd_ackToNid(NCSCC_RC_FAILURE); exit(1); @@ -10843,6 +10865,8 @@ static uint32_t immnd_evt_proc_fevs_rcv(IMMND_CB *cb, IMMND_EVT *evt, } done: + if (sinfo->node_id) + cb->ex_immd_node_id = sinfo->node_id; cb->highestProcessed++; dequeue_outgoing(cb); TRACE_LEAVE(); @@ -12169,6 +12193,7 @@ static uint32_t immnd_evt_proc_mds_evt(IMMND_CB *cb, IMMND_EVT *evt) { /*TRACE_ENTER(); */ uint32_t rc = NCSCC_RC_SUCCESS; + bool is_headless = false; if ((evt->info.mds_info.change == NCSMDS_DOWN) && (evt->info.mds_info.svc_id == NCSMDS_SVC_ID_IMMA_OM || @@ -12176,8 +12201,35 @@ static uint32_t immnd_evt_proc_mds_evt(IMMND_CB *cb, IMMND_EVT *evt) TRACE_2("IMMA DOWN EVENT"); immnd_proc_imma_down(cb, evt->info.mds_info.dest, evt->info.mds_info.svc_id); - } else if ((evt->info.mds_info.change == NCSMDS_DOWN) && - evt->info.mds_info.svc_id == NCSMDS_SVC_ID_IMMD) { + } + + /* In multi partitioned clusters rejoin, IMMND may not realize + * headless due to see IMMDs from different partitions */ + if ((evt->info.mds_info.change == NCSMDS_RED_UP) && + (evt->info.mds_info.svc_id == NCSMDS_SVC_ID_IMMD) && + (evt->info.mds_info.node_id != cb->immd_node_id) && + (evt->info.mds_info.role == V_DEST_RL_STANDBY) && + (cb->other_immd_id == 0)) { + cb->other_immd_id = evt->info.mds_info.node_id; + TRACE_2("IMMD RED_UP EVENT %x role=%d ==> ACT:%x SBY:%x", + evt->info.mds_info.node_id, evt->info.mds_info.role, + cb->immd_node_id, cb->other_immd_id); + } else if ((evt->info.mds_info.change == NCSMDS_RED_DOWN) && + (evt->info.mds_info.svc_id == NCSMDS_SVC_ID_IMMD)) { + if (cb->immd_node_id == evt->info.mds_info.node_id) + cb->immd_node_id = 0; + if (cb->other_immd_id == evt->info.mds_info.node_id) + cb->other_immd_id = 0; + TRACE_2("IMMD RED_DOWN EVENT %x role=%d ==> ACT:%x SBY:%x", + evt->info.mds_info.node_id, evt->info.mds_info.role, + cb->immd_node_id, cb->other_immd_id); + if ((cb->immd_node_id == 0) && (cb->other_immd_id == 0)) { + LOG_WA("Both Active & Standby DOWN, going to headless"); + is_headless = true; + } + } + + if (is_headless) { /* Cluster is going down. */ if (cb->mScAbsenceAllowed == 0) { /* Regular (non Hydra) exit on IMMD DOWN. */ @@ -12188,8 +12240,10 @@ static uint32_t immnd_evt_proc_mds_evt(IMMND_CB *cb, IMMND_EVT *evt) exit(1); } else { /* SC ABSENCE ALLOWED */ cb->mIntroduced = 2; - LOG_WA("SC Absence IS allowed:%u IMMD service is DOWN", - cb->mScAbsenceAllowed); + LOG_WA( + "SC Absence IS allowed:%u IMMD service is DOWN " + "ex_immd_node_id=%x", + cb->mScAbsenceAllowed, cb->ex_immd_node_id); if (cb->mState < IMM_SERVER_SYNC_SERVER) { immnd_ackToNid(NCSCC_RC_FAILURE); exit(1); @@ -12304,7 +12358,8 @@ static uint32_t immnd_evt_proc_mds_evt(IMMND_CB *cb, IMMND_EVT *evt) } else if ((evt->info.mds_info.change == NCSMDS_UP) && (evt->info.mds_info.svc_id == NCSMDS_SVC_ID_IMMD)) { LOG_NO( - "IMMD service is UP ... ScAbsenseAllowed?:%u introduced?:%u", + "IMMD(%x) service is UP ... ScAbsenseAllowed?:%u introduced?:%u", + evt->info.mds_info.node_id, cb->mScAbsenceAllowed, cb->mIntroduced); if ((cb->mIntroduced == 2) && (immnd_introduceMe(cb) != NCSCC_RC_SUCCESS)) { @@ -12315,11 +12370,6 @@ static uint32_t immnd_evt_proc_mds_evt(IMMND_CB *cb, IMMND_EVT *evt) (evt->info.mds_info.svc_id == NCSMDS_SVC_ID_IMMA_OI || evt->info.mds_info.svc_id == NCSMDS_SVC_ID_IMMA_OM)) { TRACE_2("IMMA UP EVENT"); - } else if ((evt->info.mds_info.change == NCSMDS_RED_UP) && - (evt->info.mds_info.role == V_DEST_RL_ACTIVE) && - evt->info.mds_info.svc_id == NCSMDS_SVC_ID_IMMD) { - TRACE_2("IMMD new activeEVENT"); - /*immnd_evt_immd_new_active(cb); */ } else if ((evt->info.mds_info.change == NCSMDS_CHG_ROLE) && (evt->info.mds_info.role == V_DEST_RL_ACTIVE) && (evt->info.mds_info.svc_id == NCSMDS_SVC_ID_IMMD)) { diff --git a/src/imm/immnd/immnd_main.c b/src/imm/immnd/immnd_main.c index 62c7b2478..1f3af6558 100644 --- a/src/imm/immnd/immnd_main.c +++ b/src/imm/immnd/immnd_main.c @@ -298,6 +298,8 @@ static uint32_t immnd_initialize(char *progname) immnd_cb->mFile = getenv("IMMSV_LOAD_FILE"); immnd_cb->clm_hdl = 0; immnd_cb->clmSelectionObject = -1; + immnd_cb->immd_node_id = 0; + immnd_cb->other_immd_id = 0; populate_reserved_class_names(immnd_cb); diff --git a/src/imm/immnd/immnd_mds.c b/src/imm/immnd/immnd_mds.c index 89cc56a29..4515787d0 100644 --- a/src/imm/immnd/immnd_mds.c +++ b/src/imm/immnd/immnd_mds.c @@ -158,7 +158,7 @@ uint32_t immnd_mds_register(IMMND_CB *cb) /* STEP 3: Subscribe to IMMD up/down events */ svc_info.i_op = - MDS_SUBSCRIBE; /* Normal mode subscription => vdest is used. */ + MDS_RED_SUBSCRIBE; /* Redundant mode subscription => vdest is used. */ svc_info.info.svc_subscribe.i_num_svcs = 1; svc_info.info.svc_subscribe.i_scope = NCSMDS_SCOPE_NONE; svc_info.info.svc_subscribe.i_svc_ids = svc_id; @@ -541,6 +541,7 @@ static uint32_t immnd_mds_rcv(IMMND_CB *cb, MDS_CALLBACK_RECEIVE_INFO *rcv_info) pEvt->sinfo.ctxt = rcv_info->i_msg_ctxt; pEvt->sinfo.dest = rcv_info->i_fr_dest; + pEvt->sinfo.node_id = rcv_info->i_node_id; pEvt->sinfo.to_svc = rcv_info->i_fr_svc_id; pEvt->sinfo.pid = rcv_info->pid; pEvt->sinfo.uid = rcv_info->uid; @@ -594,14 +595,18 @@ static uint32_t immnd_mds_svc_evt(IMMND_CB *cb, TRACE("IMMD SERVICE DOWN => CLUSTER GOING DOWN"); cb->fevs_replies_pending = 0; cb->is_immd_up = false; + cb->immd_node_id = 0; break; case NCSMDS_UP: TRACE( - "NCSMDS_UP for IMMD. cb->is_immd_up = true; (v)dest:%llu", - (long long unsigned int)svc_evt->i_dest); + "NCSMDS_UP for IMMD. cb->is_immd_up = true; (v)dest:%llu" + " svc_evt->i_node_id=%x", + (long long unsigned int)svc_evt->i_dest, + svc_evt->i_node_id); cb->is_immd_up = true; cb->immd_mdest_id = svc_evt->i_dest; + cb->immd_node_id = svc_evt->i_node_id; break; case NCSMDS_NO_ACTIVE: @@ -621,20 +626,33 @@ static uint32_t immnd_mds_svc_evt(IMMND_CB *cb, break; case NCSMDS_NEW_ACTIVE: - TRACE("NCSMDS_NEW_ACTIVE IMMD"); - cb->immd_mdest_id = svc_evt->i_dest; + TRACE("NCSMDS_NEW_ACTIVE IMMD %x", svc_evt->i_node_id); + /* In multi partitioned clusters rejoin, IMMND can get + * NEW_ACTIVE from current and other partitions, + * then get FEVS from it cause restart OUT OF ORDER */ + if (cb->other_immd_id == svc_evt->i_node_id) { + cb->other_immd_id = cb->immd_node_id; + cb->immd_mdest_id = svc_evt->i_dest; + cb->immd_node_id = svc_evt->i_node_id; + } break; case NCSMDS_RED_UP: - LOG_ER("NCSMDS_RED_UP: SHOULD NOT HAPPEN"); + TRACE("NCSMDS_RED_UP"); break; case NCSMDS_RED_DOWN: - LOG_ER("NCSMDS_RED_DOWN: SHOULD NOT HAPPEN"); + TRACE("NCSMDS_RED_DOWN"); break; case NCSMDS_CHG_ROLE: - LOG_ER("NCSMDS_CHG_ROLE: SHOULD NOT HAPPEN"); + TRACE("NCSMDS_CHG_ROLE"); + /* Sometimes not get NEW_ACTIVE but only CHG_ROLE */ + if (cb->other_immd_id == svc_evt->i_node_id) { + cb->other_immd_id = cb->immd_node_id; + cb->immd_mdest_id = svc_evt->i_dest; + cb->immd_node_id = svc_evt->i_node_id; + } break; default: @@ -687,6 +705,7 @@ static uint32_t immnd_mds_svc_evt(IMMND_CB *cb, evt->info.immnd.info.mds_info.dest = svc_evt->i_dest; evt->info.immnd.info.mds_info.svc_id = svc_evt->i_svc_id; evt->info.immnd.info.mds_info.role = svc_evt->i_role; + evt->info.immnd.info.mds_info.node_id = svc_evt->i_node_id; /* Put it in IMMND's Event Queue */ rc = m_NCS_IPC_SEND(&cb->immnd_mbx, (NCSCONTEXT)evt, priority); diff --git a/src/imm/immnd/immnd_proc.c b/src/imm/immnd/immnd_proc.c index 87b8a728d..1c837360e 100644 --- a/src/imm/immnd/immnd_proc.c +++ b/src/imm/immnd/immnd_proc.c @@ -595,18 +595,15 @@ uint32_t immnd_introduceMe(IMMND_CB *cb) if (cb->mIntroduced == 2) { LOG_NO( - "Re-introduce-me highestProcessed:%llu highestReceived:%llu", - cb->highestProcessed, cb->highestReceived); - send_evt.info.immd.info.ctrl_msg.refresh = 2; - send_evt.info.immd.info.ctrl_msg.fevs_count = - cb->highestReceived; - - send_evt.info.immd.info.ctrl_msg.admo_id_count = - cb->mLatestAdmoId; - ; - send_evt.info.immd.info.ctrl_msg.ccb_id_count = - cb->mLatestCcbId; + "Re-introduce-me highestProcessed:%llu highestReceived:%llu " + "ex_immd_node_id=%x", + cb->highestProcessed, cb->highestReceived, cb->ex_immd_node_id); + send_evt.info.immd.info.ctrl_msg.refresh = 3; + send_evt.info.immd.info.ctrl_msg.fevs_count = cb->highestReceived; + send_evt.info.immd.info.ctrl_msg.admo_id_count = cb->mLatestAdmoId; + send_evt.info.immd.info.ctrl_msg.ccb_id_count = cb->mLatestCcbId; send_evt.info.immd.info.ctrl_msg.impl_count = cb->mLatestImplId; + send_evt.info.immd.info.ctrl_msg.ex_immd_node_id = cb->ex_immd_node_id; } if (!immnd_is_immd_up(cb)) { -- 2.17.1 _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel