If split brain happens and network merges back, one active SC is possible to stay alive due to other active SCs reboot too fast. Although alive SC doesn't detect the split brain, it still detects some services are up in active rebooting SCs. In the ticket description, the status of cluster before network merges is [[SC-1(ACT), SC-2(STB)], [SC-3(ACT), SC-4, SC-5(STB), SC-6], [SC-7(STB), SC-8(ATC), SC-9], [SC-10(ATC)]]. On SC-1, the ntfa received wrong NCSMDS_NO_ACTIVE in the below scenario: - Detect ntfs on active SC-3 is up. - Because ntfs on SC-1 is active, there are not SVC up event. - Update the active vdest to new ntfs(on SC-3). - Detect ntfs on active SC-3 is down. - The active vdest is removed. A NCSMDS_NO_ACTIVE is generated, although ntfs on SC-1 is still active. Solution: don't replace current active vdest by a new active vdest. When current active vdest is down, replace it by other active vdest if any. --- src/mds/mds_c_api.c | 260 ++++++++++++++++++++++++++++++++++---------- src/mds/mds_c_db.c | 22 ++-- 2 files changed, 213 insertions(+), 69 deletions(-)
diff --git a/src/mds/mds_c_api.c b/src/mds/mds_c_api.c index 2a297a1e2..8e2f899a9 100644 --- a/src/mds/mds_c_api.c +++ b/src/mds/mds_c_api.c @@ -2799,21 +2799,16 @@ uint32_t mds_mcm_svc_up(PW_ENV_ID pwe_id, MDS_SVC_ID svc_id, V_DEST_RL role, } } - /* Make this as active - */ - mds_subtn_res_tbl_change_active( - local_svc_hdl, - svc_id, - (MDS_VDEST_ID) - vdest_id, - subtn_result_info, - svc_sub_part_ver, - archword_type); - - if ((tmr_running == - true) || - (local_subtn_view == - MDS_VIEW_NORMAL)) { + if (tmr_running == true) { + /* Make this as active */ + mds_subtn_res_tbl_change_active( + local_svc_hdl, + svc_id, + (MDS_VDEST_ID) + vdest_id, + subtn_result_info, + svc_sub_part_ver, + archword_type); /* Call user * callback UP */ @@ -2862,6 +2857,11 @@ uint32_t mds_mcm_svc_up(PW_ENV_ID pwe_id, MDS_SVC_ID svc_id, V_DEST_RL role, ->sub_adest_details, svc_sub_part_ver, archword_type); + } else { + /* + * Conflict active entry, + * keep old active entry. + */ } } } @@ -2973,6 +2973,63 @@ uint32_t mds_mcm_svc_up(PW_ENV_ID pwe_id, MDS_SVC_ID svc_id, V_DEST_RL role, svc_sub_part_ver, archword_type); } + + status = + mds_subtn_res_tbl_query_next_active( + local_svc_hdl, svc_id, + vdest_id, + subtn_result_info, + &next_active_result_info); + if (status == + NCSCC_RC_SUCCESS) { + /* + * Other active present + * Change active entry + */ + mds_subtn_res_tbl_change_active( + local_svc_hdl, + svc_id, vdest_id, + next_active_result_info, + svc_sub_part_ver, + archword_type); + status = mds_mcm_user_event_callback( + local_svc_hdl, pwe_id, + svc_id, role, vdest_id, 0, + NCSMDS_NEW_ACTIVE, + svc_sub_part_ver, + MDS_SVC_ARCHWORD_TYPE_UNSPECIFIED); + if (status != NCSCC_RC_SUCCESS) { + /* Callback failure */ + m_MDS_LOG_ERR( + "MCM:API: svc_up :" + " NCSMDS_NEW_ACTIVE Callback" + " Failure for svc_id = %s(%d)", + get_svc_names( + m_MDS_GET_SVC_ID_FROM_SVC_HDL( + local_svc_hdl)), + m_MDS_GET_SVC_ID_FROM_SVC_HDL( + local_svc_hdl)); + m_MDS_LEAVE(); + return NCSCC_RC_FAILURE; + } + m_MDS_LOG_INFO( + "MCM:API: svc_up : svc_id = %s(%d)" + " on DEST id = %d got NCSMDS_NEW_ACTIVE for" + " svc_id = %s(%d) on Vdest id = %d" + " Adest = %s, rem_svc_pvt_ver=%d", + get_svc_names( + m_MDS_GET_SVC_ID_FROM_SVC_HDL( + local_svc_hdl)), + m_MDS_GET_SVC_ID_FROM_SVC_HDL( + local_svc_hdl), + m_MDS_GET_VDEST_ID_FROM_SVC_HDL( + local_svc_hdl), + get_svc_names(svc_id), + svc_id, vdest_id, + next_active_result_info + ->sub_adest_details, + svc_sub_part_ver); + } } else { /* Some other entry is active */ @@ -3779,7 +3836,7 @@ uint32_t mds_mcm_svc_down(PW_ENV_ID pwe_id, MDS_SVC_ID svc_id, V_DEST_RL role, m_MDS_LOG_INFO("MCM:API: svc_down : " "svc_id = %s(%d) on DEST id = %d " "got NO_ACTIVE for svc_id = %s(%d) " - "on Vdest id = %d Adest = %s, rem_svc_pvt_ver=%d", + "on Vdest id = %d Adest = %s, rem_svc_pvt_ver=%d", get_svc_names( m_MDS_GET_SVC_ID_FROM_SVC_HDL(local_svc_hdl)), m_MDS_GET_SVC_ID_FROM_SVC_HDL( @@ -3799,49 +3856,142 @@ uint32_t mds_mcm_svc_down(PW_ENV_ID pwe_id, MDS_SVC_ID svc_id, V_DEST_RL role, if (active_adest == adest) { if (vdest_policy == NCS_VDEST_TYPE_MxN) { - mds_subtn_res_tbl_remove_active( - local_svc_hdl, svc_id, - vdest_id); + status = + mds_subtn_res_tbl_query_next_active( + local_svc_hdl, svc_id, + vdest_id, + subtn_result_info, + &next_active_result_info); + if (status == + NCSCC_RC_FAILURE) { + /* No other active present */ + mds_subtn_res_tbl_remove_active( + local_svc_hdl, svc_id, + vdest_id); - /* Call user call back with NO - * ACTIVE */ - status = NCSCC_RC_SUCCESS; - status = mds_mcm_user_event_callback( - local_svc_hdl, pwe_id, - svc_id, role, vdest_id, 0, - NCSMDS_NO_ACTIVE, - svc_sub_part_ver, - MDS_SVC_ARCHWORD_TYPE_UNSPECIFIED); + /* Call user call back with NO ACTIVE */ + status = NCSCC_RC_SUCCESS; + status = mds_mcm_user_event_callback( + local_svc_hdl, pwe_id, + svc_id, role, vdest_id, 0, + NCSMDS_NO_ACTIVE, + svc_sub_part_ver, + MDS_SVC_ARCHWORD_TYPE_UNSPECIFIED); - if (status != - NCSCC_RC_SUCCESS) { - /* Callback failure */ - m_MDS_LOG_ERR( - "MCM:API: svc_down : NO_ACTIVE Callback Failure for svc_id = %s(%d)", - get_svc_names( + if (status != + NCSCC_RC_SUCCESS) { + /* Callback failure */ + m_MDS_LOG_ERR( + "MCM:API: svc_down : NO_ACTIVE Callback Failure for svc_id = %s(%d)", + get_svc_names( + m_MDS_GET_SVC_ID_FROM_SVC_HDL( + local_svc_hdl)), + m_MDS_GET_SVC_ID_FROM_SVC_HDL( + local_svc_hdl)); + m_MDS_LEAVE(); + return NCSCC_RC_FAILURE; + } + + m_MDS_LOG_INFO( + "MCM:API: svc_down : svc_id = %s(%d) on DEST id = %d got NO_ACTIVE for " + "svc_id = %s(%d) on Vdest id = %d Adest = %s, rem_svc_pvt_ver=%d", + get_svc_names( m_MDS_GET_SVC_ID_FROM_SVC_HDL( - local_svc_hdl)), - m_MDS_GET_SVC_ID_FROM_SVC_HDL( - local_svc_hdl)); - m_MDS_LEAVE(); - return NCSCC_RC_FAILURE; + local_svc_hdl)), + m_MDS_GET_SVC_ID_FROM_SVC_HDL( + local_svc_hdl), + m_MDS_GET_VDEST_ID_FROM_SVC_HDL( + local_svc_hdl), + get_svc_names(svc_id), + svc_id, vdest_id, + log_subtn_result_info + ->sub_adest_details, + svc_sub_part_ver); + } else { + status = mds_mcm_user_event_callback( + local_svc_hdl, pwe_id, + svc_id, role, vdest_id, 0, + NCSMDS_NO_ACTIVE, + svc_sub_part_ver, + MDS_SVC_ARCHWORD_TYPE_UNSPECIFIED); + if (status != NCSCC_RC_SUCCESS) { + /* Callback failure */ + m_MDS_LOG_ERR( + "MCM:API: svc_down : NCSMDS_NO_ACTIVE" + " Callback Failure for svc_id = %s(%d)", + get_svc_names( + m_MDS_GET_SVC_ID_FROM_SVC_HDL( + local_svc_hdl)), + m_MDS_GET_SVC_ID_FROM_SVC_HDL( + local_svc_hdl)); + m_MDS_LEAVE(); + return NCSCC_RC_FAILURE; + } + m_MDS_LOG_INFO( + "MCM:API: svc_down : svc_id = %s(%d)" + " on DEST id = %d got NCSMDS_DOWN for" + " svc_id = %s(%d) on Vdest id = %d" + " Adest = %s, rem_svc_pvt_ver=%d", + get_svc_names( + m_MDS_GET_SVC_ID_FROM_SVC_HDL( + local_svc_hdl)), + m_MDS_GET_SVC_ID_FROM_SVC_HDL( + local_svc_hdl), + m_MDS_GET_VDEST_ID_FROM_SVC_HDL( + local_svc_hdl), + get_svc_names(svc_id), + svc_id, vdest_id, + log_subtn_result_info + ->sub_adest_details, + svc_sub_part_ver); + /* + * Change Active entry + */ + mds_subtn_res_tbl_change_active( + local_svc_hdl, + svc_id, vdest_id, + next_active_result_info, + svc_sub_part_ver, + archword_type); + status = mds_mcm_user_event_callback( + local_svc_hdl, pwe_id, + svc_id, role, vdest_id, 0, + NCSMDS_NEW_ACTIVE, + svc_sub_part_ver, + MDS_SVC_ARCHWORD_TYPE_UNSPECIFIED); + if (status != NCSCC_RC_SUCCESS) { + /* + * Callback failure + */ + m_MDS_LOG_ERR( + "MCM:API: svc_up : NCSMDS_NEW_ACTIVE" + " Callback Failure for svc_id = %s(%d)", + get_svc_names( + m_MDS_GET_SVC_ID_FROM_SVC_HDL( + local_svc_hdl)), + m_MDS_GET_SVC_ID_FROM_SVC_HDL( + local_svc_hdl)); + m_MDS_LEAVE(); + return NCSCC_RC_FAILURE; + } + m_MDS_LOG_INFO( + "MCM:API: svc_up : svc_id = %s(%d)" + " on DEST id = %d got NCSMDS_NEW_ACTIVE for" + " svc_id = %s(%d) on Vdest id = %d" + " Adest = %s, rem_svc_pvt_ver=%d", + get_svc_names( + m_MDS_GET_SVC_ID_FROM_SVC_HDL( + local_svc_hdl)), + m_MDS_GET_SVC_ID_FROM_SVC_HDL( + local_svc_hdl), + m_MDS_GET_VDEST_ID_FROM_SVC_HDL( + local_svc_hdl), + get_svc_names(svc_id), + svc_id, vdest_id, + next_active_result_info + ->sub_adest_details, + svc_sub_part_ver); } - - m_MDS_LOG_INFO( - "MCM:API: svc_down : svc_id = %s(%d) on DEST id = %d got NO_ACTIVE for " - "svc_id = %s(%d) on Vdest id = %d Adest = %s, rem_svc_pvt_ver=%d", - get_svc_names( - m_MDS_GET_SVC_ID_FROM_SVC_HDL( - local_svc_hdl)), - m_MDS_GET_SVC_ID_FROM_SVC_HDL( - local_svc_hdl), - m_MDS_GET_VDEST_ID_FROM_SVC_HDL( - local_svc_hdl), - get_svc_names(svc_id), - svc_id, vdest_id, - log_subtn_result_info - ->sub_adest_details, - svc_sub_part_ver); { if (adest_exists == false) { diff --git a/src/mds/mds_c_db.c b/src/mds/mds_c_db.c index 337f0cb23..2213c664b 100644 --- a/src/mds/mds_c_db.c +++ b/src/mds/mds_c_db.c @@ -1991,20 +1991,14 @@ uint32_t mds_subtn_res_tbl_add(MDS_SVC_HDL svc_hdl, MDS_SVC_ID subscr_svc_id, .active_route_info ->next_active_in_turn = subtn_res_info; - } else { /* Present entry is Active Entry */ - - if (local_vdest_policy == - NCS_VDEST_TYPE_MxN) { - /* Change active to point to - * this active */ - active_subtn_res_info->info - .active_vdest - .active_route_info - ->next_active_in_turn = - subtn_res_info; - } else { - /* Do nothing just add entry */ - } + } else { + /* + * Present entry is Active Entry. + * Do nothing just add entry. + * It is an active entry conflict + * in NCS_VDEST_TYPE_MxN mode, just wait + * for an actice entry goes down. + */ } } } else { /* role == V_DEST_RL_STANDBY */ -- 2.17.1 _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel