Re: [devel] [PATCH 1/1] amfd: update SI assignment state when SU is added or removed [#2269]
Ack, code review only. Thanks Praveen On 01-Sep-17 9:24 AM, Gary Lee wrote: --- src/amf/amfd/sg.cc | 5 + src/amf/amfd/su.cc | 6 ++ 2 files changed, 11 insertions(+) diff --git a/src/amf/amfd/sg.cc b/src/amf/amfd/sg.cc index 8f3590190..986bb 100644 --- a/src/amf/amfd/sg.cc +++ b/src/amf/amfd/sg.cc @@ -1725,6 +1725,11 @@ void avd_sg_add_su(AVD_SU *su) { }); avd_verify_equal_ranked_su(su->sg_of_su); + + // update any affected SI assignment state (if saAmfSGNumPrefAssignedSUs==0) + for (const auto &si : su->sg_of_su->list_of_si) { +si->update_ass_state(); + } } void avd_sg_constructor(void) { diff --git a/src/amf/amfd/su.cc b/src/amf/amfd/su.cc index b091a5bfb..3726a71fb 100644 --- a/src/amf/amfd/su.cc +++ b/src/amf/amfd/su.cc @@ -2103,6 +2103,12 @@ void su_ccb_apply_delete_hdlr(struct CcbUtilOperationData *opdata) { } /*if (AVD_SG_FSM_STABLE == sg->sg_fsm_state) */ done: + + // update any affected SI assignment state (if saAmfSGNumPrefAssignedSUs==0) + for (const auto &si : sg->list_of_si) { +si->update_ass_state(); + } + TRACE_LEAVE(); } -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] amfd: honor PrefAssignedSU in nway and nway active model during assignments [#2269]
I got confused with previous release strategies. Please push in release branch also. Thanks Praveen On 31-Aug-17 11:48 AM, praveen malviya wrote: I am not seeing any milestone for any maintenance/release branch. There is only one milestone 5.17.10 which is for new release. Thanks Praveen On 31-Aug-17 11:34 AM, Gary Lee wrote: OK - thanks. Should we push it to release as well? On 31/08/17 15:30, praveen malviya wrote: Hi Gary, ack for part2. I think I did not grep on saAmfSGNumPrefInserviceSUs in all files. I have pushed part1. Please push part2. Thanks Praveen On 31-Aug-17 9:50 AM, Gary Lee wrote: Hi Praveen ack, but I think further changes are required (see attachment). Do you think you could push it today? Thanks Gary On 27/07/17 15:36, Praveen wrote: SG attribute saAmfSGNumPrefAssignedSUs is applicable to N-Way and N-Way Active model. AMF is assigning more than saAmfSGNumPrefAssignedSUs in both N-Way and N-Way Active model. Patch fixes this problem. --- src/amf/amfd/sg.cc | 49 -- src/amf/amfd/sg.h | 1 + src/amf/amfd/sg_nway_fsm.cc | 39 + src/amf/amfd/sg_nwayact_fsm.cc | 29 - 4 files changed, 87 insertions(+), 31 deletions(-) diff --git a/src/amf/amfd/sg.cc b/src/amf/amfd/sg.cc index 7bdf52a..8f35901 100644 --- a/src/amf/amfd/sg.cc +++ b/src/amf/amfd/sg.cc @@ -98,7 +98,7 @@ AVD_SG::AVD_SG() saAmfSGAutoAdjust(SA_FALSE), saAmfSGNumPrefActiveSUs(0), saAmfSGNumPrefStandbySUs(0), - saAmfSGNumPrefInserviceSUs(~0), + saAmfSGNumPrefInserviceSUs(0), saAmfSGNumPrefAssignedSUs(0), saAmfSGMaxActiveSIsperSU(0), saAmfSGMaxStandbySIsperSU(0), @@ -978,18 +978,18 @@ static void ccb_apply_modify_hdlr(CcbUtilOperationData_t *opdata) { sg->saAmfSGNumPrefStandbySUs); } else if (!strcmp(attribute->attrName, "saAmfSGNumPrefInserviceSUs")) { if (value_is_deleted) - sg->saAmfSGNumPrefInserviceSUs = ~0; + sg->saAmfSGNumPrefInserviceSUs = 0; //default value for internal use. else sg->saAmfSGNumPrefInserviceSUs = *((SaUint32T *)value); TRACE("Modified saAmfSGNumPrefInserviceSUs is '%u'", - sg->saAmfSGNumPrefInserviceSUs); + sg->pref_inservice_sus()); } else if (!strcmp(attribute->attrName, "saAmfSGNumPrefAssignedSUs")) { if (value_is_deleted) - sg->saAmfSGNumPrefAssignedSUs = sg->saAmfSGNumPrefInserviceSUs; + sg->saAmfSGNumPrefAssignedSUs = 0; //default value for internal use. else sg->saAmfSGNumPrefAssignedSUs = *((SaUint32T *)value); TRACE("Modified saAmfSGNumPrefAssignedSUs is '%u'", - sg->saAmfSGNumPrefAssignedSUs); + sg->pref_assigned_sus()); } else if (!strcmp(attribute->attrName, "saAmfSGMaxActiveSIsperSU")) { if (value_is_deleted) sg->saAmfSGMaxActiveSIsperSU = -1; @@ -1091,11 +1091,11 @@ static void ccb_apply_modify_hdlr(CcbUtilOperationData_t *opdata) { if (!strcmp(attribute->attrName, "saAmfSGNumPrefInserviceSUs")) { if (value_is_deleted) - sg->saAmfSGNumPrefInserviceSUs = ~0; + sg->saAmfSGNumPrefInserviceSUs = 0; else sg->saAmfSGNumPrefInserviceSUs = *((SaUint32T *)value); TRACE("Modified saAmfSGNumPrefInserviceSUs is '%u'", - sg->saAmfSGNumPrefInserviceSUs); + sg->pref_inservice_sus()); if (avd_cb->avail_state_avd == SA_AMF_HA_ACTIVE) { if (avd_sg_app_su_inst_func(avd_cb, sg) != NCSCC_RC_SUCCESS) { @@ -1256,7 +1256,7 @@ static void sg_app_sg_admin_unlock_inst(AVD_CL_CB *cb, AVD_SG *sg) { (su->saAmfSUPresenceState == SA_AMF_PRESENCE_UNINSTANTIATED)) { if (su->saAmfSUPreInstantiable == true) { if (su->su_on_node->node_state == AVD_AVND_STATE_PRESENT) { - if (su->sg_of_su->saAmfSGNumPrefInserviceSUs > su_try_inst) { + if (su->sg_of_su->pref_inservice_sus() > su_try_inst) { if (avd_snd_presence_msg(cb, su, false) != NCSCC_RC_SUCCESS) { LOG_NO("%s: Failed to send Instantiation order of '%s' to %x", __FUNCTION__, su->name.c_str(), @@ -1944,19 +1944,6 @@ void avd_sg_adjust_config(AVD_SG *sg) {
Re: [devel] [PATCH 1/1] amfd: honor PrefAssignedSU in nway and nway active model during assignments [#2269]
I am not seeing any milestone for any maintenance/release branch. There is only one milestone 5.17.10 which is for new release. Thanks Praveen On 31-Aug-17 11:34 AM, Gary Lee wrote: OK - thanks. Should we push it to release as well? On 31/08/17 15:30, praveen malviya wrote: Hi Gary, ack for part2. I think I did not grep on saAmfSGNumPrefInserviceSUs in all files. I have pushed part1. Please push part2. Thanks Praveen On 31-Aug-17 9:50 AM, Gary Lee wrote: Hi Praveen ack, but I think further changes are required (see attachment). Do you think you could push it today? Thanks Gary On 27/07/17 15:36, Praveen wrote: SG attribute saAmfSGNumPrefAssignedSUs is applicable to N-Way and N-Way Active model. AMF is assigning more than saAmfSGNumPrefAssignedSUs in both N-Way and N-Way Active model. Patch fixes this problem. --- src/amf/amfd/sg.cc | 49 -- src/amf/amfd/sg.h | 1 + src/amf/amfd/sg_nway_fsm.cc | 39 + src/amf/amfd/sg_nwayact_fsm.cc | 29 - 4 files changed, 87 insertions(+), 31 deletions(-) diff --git a/src/amf/amfd/sg.cc b/src/amf/amfd/sg.cc index 7bdf52a..8f35901 100644 --- a/src/amf/amfd/sg.cc +++ b/src/amf/amfd/sg.cc @@ -98,7 +98,7 @@ AVD_SG::AVD_SG() saAmfSGAutoAdjust(SA_FALSE), saAmfSGNumPrefActiveSUs(0), saAmfSGNumPrefStandbySUs(0), - saAmfSGNumPrefInserviceSUs(~0), + saAmfSGNumPrefInserviceSUs(0), saAmfSGNumPrefAssignedSUs(0), saAmfSGMaxActiveSIsperSU(0), saAmfSGMaxStandbySIsperSU(0), @@ -978,18 +978,18 @@ static void ccb_apply_modify_hdlr(CcbUtilOperationData_t *opdata) { sg->saAmfSGNumPrefStandbySUs); } else if (!strcmp(attribute->attrName, "saAmfSGNumPrefInserviceSUs")) { if (value_is_deleted) - sg->saAmfSGNumPrefInserviceSUs = ~0; + sg->saAmfSGNumPrefInserviceSUs = 0; //default value for internal use. else sg->saAmfSGNumPrefInserviceSUs = *((SaUint32T *)value); TRACE("Modified saAmfSGNumPrefInserviceSUs is '%u'", - sg->saAmfSGNumPrefInserviceSUs); + sg->pref_inservice_sus()); } else if (!strcmp(attribute->attrName, "saAmfSGNumPrefAssignedSUs")) { if (value_is_deleted) - sg->saAmfSGNumPrefAssignedSUs = sg->saAmfSGNumPrefInserviceSUs; + sg->saAmfSGNumPrefAssignedSUs = 0; //default value for internal use. else sg->saAmfSGNumPrefAssignedSUs = *((SaUint32T *)value); TRACE("Modified saAmfSGNumPrefAssignedSUs is '%u'", - sg->saAmfSGNumPrefAssignedSUs); + sg->pref_assigned_sus()); } else if (!strcmp(attribute->attrName, "saAmfSGMaxActiveSIsperSU")) { if (value_is_deleted) sg->saAmfSGMaxActiveSIsperSU = -1; @@ -1091,11 +1091,11 @@ static void ccb_apply_modify_hdlr(CcbUtilOperationData_t *opdata) { if (!strcmp(attribute->attrName, "saAmfSGNumPrefInserviceSUs")) { if (value_is_deleted) - sg->saAmfSGNumPrefInserviceSUs = ~0; + sg->saAmfSGNumPrefInserviceSUs = 0; else sg->saAmfSGNumPrefInserviceSUs = *((SaUint32T *)value); TRACE("Modified saAmfSGNumPrefInserviceSUs is '%u'", - sg->saAmfSGNumPrefInserviceSUs); + sg->pref_inservice_sus()); if (avd_cb->avail_state_avd == SA_AMF_HA_ACTIVE) { if (avd_sg_app_su_inst_func(avd_cb, sg) != NCSCC_RC_SUCCESS) { @@ -1256,7 +1256,7 @@ static void sg_app_sg_admin_unlock_inst(AVD_CL_CB *cb, AVD_SG *sg) { (su->saAmfSUPresenceState == SA_AMF_PRESENCE_UNINSTANTIATED)) { if (su->saAmfSUPreInstantiable == true) { if (su->su_on_node->node_state == AVD_AVND_STATE_PRESENT) { - if (su->sg_of_su->saAmfSGNumPrefInserviceSUs > su_try_inst) { + if (su->sg_of_su->pref_inservice_sus() > su_try_inst) { if (avd_snd_presence_msg(cb, su, false) != NCSCC_RC_SUCCESS) { LOG_NO("%s: Failed to send Instantiation order of '%s' to %x", __FUNCTION__, su->name.c_str(), @@ -1944,19 +1944,6 @@ void avd_sg_adjust_config(AVD_SG *sg) { } } } - - /* adjust saAmfSGNumPrefAssignedSUs if not configured, only applicable for - * the N-way and N-way active redundancy models - */ - if ((sg->saAmfSGNumPrefAssignedSUs == 0) && - ((sg->sg_type->saAmfSgtRedundancyModel == - SA_AMF_N_WAY_REDUNDANCY_MODEL) || - (sg->sg_type->saAmfSgtRedundancyModel == - SA_AMF_N_WAY_ACTIVE_REDUNDANCY_MODEL))) { - sg->saAmfSGNumPrefAssignedSUs = sg->saAmfSGNu
Re: [devel] [PATCH 1/1] amfd: honor PrefAssignedSU in nway and nway active model during assignments [#2269]
Hi Gary, ack for part2. I think I did not grep on saAmfSGNumPrefInserviceSUs in all files. I have pushed part1. Please push part2. Thanks Praveen On 31-Aug-17 9:50 AM, Gary Lee wrote: Hi Praveen ack, but I think further changes are required (see attachment). Do you think you could push it today? Thanks Gary On 27/07/17 15:36, Praveen wrote: SG attribute saAmfSGNumPrefAssignedSUs is applicable to N-Way and N-Way Active model. AMF is assigning more than saAmfSGNumPrefAssignedSUs in both N-Way and N-Way Active model. Patch fixes this problem. --- src/amf/amfd/sg.cc | 49 -- src/amf/amfd/sg.h | 1 + src/amf/amfd/sg_nway_fsm.cc | 39 + src/amf/amfd/sg_nwayact_fsm.cc | 29 - 4 files changed, 87 insertions(+), 31 deletions(-) diff --git a/src/amf/amfd/sg.cc b/src/amf/amfd/sg.cc index 7bdf52a..8f35901 100644 --- a/src/amf/amfd/sg.cc +++ b/src/amf/amfd/sg.cc @@ -98,7 +98,7 @@ AVD_SG::AVD_SG() saAmfSGAutoAdjust(SA_FALSE), saAmfSGNumPrefActiveSUs(0), saAmfSGNumPrefStandbySUs(0), - saAmfSGNumPrefInserviceSUs(~0), + saAmfSGNumPrefInserviceSUs(0), saAmfSGNumPrefAssignedSUs(0), saAmfSGMaxActiveSIsperSU(0), saAmfSGMaxStandbySIsperSU(0), @@ -978,18 +978,18 @@ static void ccb_apply_modify_hdlr(CcbUtilOperationData_t *opdata) { sg->saAmfSGNumPrefStandbySUs); } else if (!strcmp(attribute->attrName, "saAmfSGNumPrefInserviceSUs")) { if (value_is_deleted) - sg->saAmfSGNumPrefInserviceSUs = ~0; + sg->saAmfSGNumPrefInserviceSUs = 0; //default value for internal use. else sg->saAmfSGNumPrefInserviceSUs = *((SaUint32T *)value); TRACE("Modified saAmfSGNumPrefInserviceSUs is '%u'", - sg->saAmfSGNumPrefInserviceSUs); + sg->pref_inservice_sus()); } else if (!strcmp(attribute->attrName, "saAmfSGNumPrefAssignedSUs")) { if (value_is_deleted) - sg->saAmfSGNumPrefAssignedSUs = sg->saAmfSGNumPrefInserviceSUs; + sg->saAmfSGNumPrefAssignedSUs = 0; //default value for internal use. else sg->saAmfSGNumPrefAssignedSUs = *((SaUint32T *)value); TRACE("Modified saAmfSGNumPrefAssignedSUs is '%u'", - sg->saAmfSGNumPrefAssignedSUs); + sg->pref_assigned_sus()); } else if (!strcmp(attribute->attrName, "saAmfSGMaxActiveSIsperSU")) { if (value_is_deleted) sg->saAmfSGMaxActiveSIsperSU = -1; @@ -1091,11 +1091,11 @@ static void ccb_apply_modify_hdlr(CcbUtilOperationData_t *opdata) { if (!strcmp(attribute->attrName, "saAmfSGNumPrefInserviceSUs")) { if (value_is_deleted) - sg->saAmfSGNumPrefInserviceSUs = ~0; + sg->saAmfSGNumPrefInserviceSUs = 0; else sg->saAmfSGNumPrefInserviceSUs = *((SaUint32T *)value); TRACE("Modified saAmfSGNumPrefInserviceSUs is '%u'", - sg->saAmfSGNumPrefInserviceSUs); + sg->pref_inservice_sus()); if (avd_cb->avail_state_avd == SA_AMF_HA_ACTIVE) { if (avd_sg_app_su_inst_func(avd_cb, sg) != NCSCC_RC_SUCCESS) { @@ -1256,7 +1256,7 @@ static void sg_app_sg_admin_unlock_inst(AVD_CL_CB *cb, AVD_SG *sg) { (su->saAmfSUPresenceState == SA_AMF_PRESENCE_UNINSTANTIATED)) { if (su->saAmfSUPreInstantiable == true) { if (su->su_on_node->node_state == AVD_AVND_STATE_PRESENT) { - if (su->sg_of_su->saAmfSGNumPrefInserviceSUs > su_try_inst) { + if (su->sg_of_su->pref_inservice_sus() > su_try_inst) { if (avd_snd_presence_msg(cb, su, false) != NCSCC_RC_SUCCESS) { LOG_NO("%s: Failed to send Instantiation order of '%s' to %x", __FUNCTION__, su->name.c_str(), @@ -1944,19 +1944,6 @@ void avd_sg_adjust_config(AVD_SG *sg) { } } } - - /* adjust saAmfSGNumPrefAssignedSUs if not configured, only applicable for - * the N-way and N-way active redundancy models - */ - if ((sg->saAmfSGNumPrefAssignedSUs == 0) && - ((sg->sg_type->saAmfSgtRedundancyModel == - SA_AMF_N_WAY_REDUNDANCY_MODEL) || - (sg->sg_type->saAmfSgtRedundancyModel == - SA_AMF_N_WAY_ACTIVE_REDUNDANCY_MODEL))) { - sg->saAmfSGNumPrefAssignedSUs = sg->saAmfSGNumPrefInserviceSUs; - LOG_NO("'%s' saAmfSGNumPrefAssignedSUs adjusted to %u", sg->name.c_str(), - sg->saAmfSGNumPrefAssignedSUs); - } } /** @@ -1972,7 +1959,7 @@ uint32_t sg_instantiated_su_count(const AVD_SG *sg) { for (const auto &su : sg->list_of_su) { TRACE_1("su'%s', pres state'%u', in_serv'%u', PrefIn'%u'", su->name.c_str(), su->saAmfSUPresenceState, su->saAmfSuReadinessState, - sg->saAmfSGNumPrefInserviceSUs); + sg->pref_inservice_sus()); if (((su->saAmfSU
Re: [devel] [PATCH 1/1] amfd: postpone deletion of node from node_id_db [#2547]
Ack, code review only. Thanks, Praveen On 14-Aug-17 9:05 AM, Gary Lee wrote: CLM and MDS callbacks are delivered to the main thread via different paths. If a node is restarted quickly, sometimes CLM JOIN is processed before the prior MDS down. This means the node will not be able to join the cluster as it is not in node_id_db (deleted in MDS down processing). This patch ensures addition to, and removal from node_id_db is only done from CLM callbacks to avoid race conditions such as above. --- src/amf/amfd/clm.cc| 10 -- src/amf/amfd/ndfsm.cc | 1 + src/amf/amfd/ndproc.cc | 2 +- src/amf/amfd/node.cc | 1 + src/amf/amfd/node.h| 1 + 5 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/amf/amfd/clm.cc b/src/amf/amfd/clm.cc index da951d223..b2133b57e 100644 --- a/src/amf/amfd/clm.cc +++ b/src/amf/amfd/clm.cc @@ -203,6 +203,7 @@ static void clm_node_exit_complete(SaClmNodeIdT nodeId) { } avd_node_failover(node); + avd_node_delete_nodeid(node); m_AVSV_SEND_CKPT_UPDT_ASYNC_UPDT(avd_cb, node, AVSV_CKPT_AVD_NODE_CONFIG); node->clm_change_start_preceded = false; @@ -246,7 +247,7 @@ static void clm_track_cb( case SA_CLM_CHANGE_VALIDATE: if (notifItem->clusterChange == SA_CLM_NODE_LEFT) { node = avd_node_find_nodeid(notifItem->clusterNode.nodeId); - if (node == nullptr) { + if (node == nullptr || node->node_up == false) { LOG_IN("%s: CLM node '%s' is not an AMF cluster member", __FUNCTION__, node_name.c_str()); goto done; @@ -262,7 +263,7 @@ static void clm_track_cb( case SA_CLM_CHANGE_START: node = avd_node_find_nodeid(notifItem->clusterNode.nodeId); -if (node == nullptr) { +if (node == nullptr || node->node_up == false) { LOG_IN("%s: CLM node '%s' is not an AMF cluster member", __FUNCTION__, node_name.c_str()); goto done; @@ -293,6 +294,11 @@ static void clm_track_cb( LOG_IN("%s: CLM node '%s' is not an AMF cluster member", __FUNCTION__, node_name.c_str()); goto done; + } else if (node->node_up == false) { +LOG_IN("%s: CLM node '%s' is not an AMF cluster member; MDS down received", + __FUNCTION__, node_name.c_str()); +avd_node_delete_nodeid(node); +goto done; } TRACE(" Node Left: rootCauseEntity %s for node %u", osaf_extended_name_borrow(rootCauseEntity), diff --git a/src/amf/amfd/ndfsm.cc b/src/amf/amfd/ndfsm.cc index ca2e3f698..223f57f20 100644 --- a/src/amf/amfd/ndfsm.cc +++ b/src/amf/amfd/ndfsm.cc @@ -247,6 +247,7 @@ void record_node_up_msg_info(AVD_AVND *avnd, const AVD_DND_MSG *n2d_msg) { osafassert(avnd != nullptr); avnd->adest = n2d_msg->msg_info.n2d_node_up.adest_address; + avnd->node_up = true; if (n2d_msg->msg_info.n2d_node_up.msg_id >= avnd->rcv_msg_id) { LOG_NO("Received node_up from %x: msg_id %u", diff --git a/src/amf/amfd/ndproc.cc b/src/amf/amfd/ndproc.cc index e80a0b3b8..2edb9b16e 100644 --- a/src/amf/amfd/ndproc.cc +++ b/src/amf/amfd/ndproc.cc @@ -1221,6 +1221,6 @@ void avd_node_failover(AVD_AVND *node) { avd_pg_node_csi_del_all(avd_cb, node); avd_node_down_mw_susi_failover(avd_cb, node); avd_node_down_appl_susi_failover(avd_cb, node); - avd_node_delete_nodeid(node); + node->node_up = false; // postpone deletion from node_id_db TRACE_LEAVE(); } diff --git a/src/amf/amfd/node.cc b/src/amf/amfd/node.cc index 37f6ee389..8390515b4 100644 --- a/src/amf/amfd/node.cc +++ b/src/amf/amfd/node.cc @@ -120,6 +120,7 @@ void AVD_AVND::initialize() { clm_change_start_preceded = {}; recvr_fail_sw = {}; admin_ng = {}; + node_up = false; } // diff --git a/src/amf/amfd/node.h b/src/amf/amfd/node.h index e64bf8c93..4cee956cc 100644 --- a/src/amf/amfd/node.h +++ b/src/amf/amfd/node.h @@ -148,6 +148,7 @@ class AVD_AVND { bool is_campaign_set_for_all_sus() const; // Member functions. void node_sus_termstate_set(bool term_state) const; + bool node_up; // true if MDS is up, false if MDS is down private: void initialize(); -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] amfnd: convert dnd_list to a vector [#1945]
Hi Gary, Ack code review only. I have no further comments. Please go ahead and push the patch with that correction at 2 places. Thanks Praveen On 18-Aug-17 5:43 AM, Gary Lee wrote: Hi Praveen Yes, thanks for picking that up. I will send another patch. Thanks Gary On 17/8/17, 6:24 pm, "praveen malviya" wrote: Hi Gary, Please find one comment inline with [Praveen]. Thanks Praveen On 05-Jul-17 2:15 PM, Gary Lee wrote: > --- > src/amf/amfnd/avnd_cb.h | 3 +- > src/amf/amfnd/avnd_di.h | 36 -- > src/amf/amfnd/avnd_mds.h | 11 +- > src/amf/amfnd/di.cc | 321 +-- > src/amf/amfnd/proxy.cc | 19 ++- > src/amf/amfnd/verify.cc | 14 +-- > 6 files changed, 167 insertions(+), 237 deletions(-) > > diff --git a/src/amf/amfnd/avnd_cb.h b/src/amf/amfnd/avnd_cb.h > index 2d642c752..ff21e3108 100644 > --- a/src/amf/amfnd/avnd_cb.h > +++ b/src/amf/amfnd/avnd_cb.h > @@ -33,6 +33,7 @@ > #ifndef AMF_AMFND_AVND_CB_H_ > #define AMF_AMFND_AVND_CB_H_ > #include > +#include > > typedef struct avnd_cb_tag { > SYSF_MBX mbx; /* mailbox on which AvND waits */ > @@ -99,7 +100,7 @@ typedef struct avnd_cb_tag { > * Messages are removed when acked with the ACK message. > * At director failover the list is scanned handling the > * VERIFY message from the director and possibly resent again */ > - AVND_DND_LIST dnd_list; > + std::vector dnd_list; > > AVND_TERM_STATE term_state; > AVND_LED_STATE led_state; > diff --git a/src/amf/amfnd/avnd_di.h b/src/amf/amfnd/avnd_di.h > index d7ccd68fd..9870ad774 100644 > --- a/src/amf/amfnd/avnd_di.h > +++ b/src/amf/amfnd/avnd_di.h > @@ -33,42 +33,6 @@ > > #include "amf/common/amf_si_assign.h" > > -/* macro to find the matching record (based on the msg-id) */ > -/* > - * Caution!!! It is assumed that the msg-id is the 1st element in the message > - * structure. Ensure it. Else move the msg id to the common portion of the > - * message structure (outside the msg type specific contents). > - */ > -#define m_AVND_DIQ_REC_FIND(cb, mid, o_rec) \ > - { \ > -AVND_DND_LIST *list = &((cb)->dnd_list); \ > -for ((o_rec) = list->head; \ > - (o_rec) && \ > - !(*((uint32_t *)(&((o_rec)->msg.info.avd->msg_info))) == (mid)); \ > - (o_rec) = (o_rec)->next) \ > - ; \ > - } > - > -/* macro to find & pop a given record */ > -#define m_AVND_DIQ_REC_FIND_POP(cb, rec) \ > - { \ > -AVND_DND_LIST *list = &((cb)->dnd_list); \ > -AVND_DND_MSG_LIST *prv = list->head, *curr; \ > -for (curr = list->head; curr && !(curr == (rec));\ > - prv = curr, curr = curr->next) \ > - ; \ > -if (curr) { \ > - if (curr == list->head) { \ > -list->head = curr->next; \ > -if (list->tail == curr) list->tail = list->head; \ > - } else { \ > -prv->next = curr->next; \ > -if (list->tail == curr) list->tail = prv;\ > - } \ > - curr->next = 0;\ > -}\ > - } > - > struct avnd_cb_tag; > > uint32_t avnd_di_oper_send(struct avnd_cb_tag *, const AVND_SU *, uint32_t); > diff --git a/src/amf/amfnd/avnd_mds.h b/src/amf/amfnd/avnd_mds.h > index 70173acaa..8c81f7bb1 100644 > --- a/src/amf/amfnd/avnd_mds.h > +++ b/src/amf/amfnd/avnd_mds.h > @@ -63,18 +63,13 @@ typedef struct avnd_msg { > } info;
Re: [devel] [PATCH 1/1] amfnd: convert dnd_list to a vector [#1945]
Hi Gary, Please find one comment inline with [Praveen]. Thanks Praveen On 05-Jul-17 2:15 PM, Gary Lee wrote: --- src/amf/amfnd/avnd_cb.h | 3 +- src/amf/amfnd/avnd_di.h | 36 -- src/amf/amfnd/avnd_mds.h | 11 +- src/amf/amfnd/di.cc | 321 +-- src/amf/amfnd/proxy.cc | 19 ++- src/amf/amfnd/verify.cc | 14 +-- 6 files changed, 167 insertions(+), 237 deletions(-) diff --git a/src/amf/amfnd/avnd_cb.h b/src/amf/amfnd/avnd_cb.h index 2d642c752..ff21e3108 100644 --- a/src/amf/amfnd/avnd_cb.h +++ b/src/amf/amfnd/avnd_cb.h @@ -33,6 +33,7 @@ #ifndef AMF_AMFND_AVND_CB_H_ #define AMF_AMFND_AVND_CB_H_ #include +#include typedef struct avnd_cb_tag { SYSF_MBX mbx; /* mailbox on which AvND waits */ @@ -99,7 +100,7 @@ typedef struct avnd_cb_tag { * Messages are removed when acked with the ACK message. * At director failover the list is scanned handling the * VERIFY message from the director and possibly resent again */ - AVND_DND_LIST dnd_list; + std::vector dnd_list; AVND_TERM_STATE term_state; AVND_LED_STATE led_state; diff --git a/src/amf/amfnd/avnd_di.h b/src/amf/amfnd/avnd_di.h index d7ccd68fd..9870ad774 100644 --- a/src/amf/amfnd/avnd_di.h +++ b/src/amf/amfnd/avnd_di.h @@ -33,42 +33,6 @@ #include "amf/common/amf_si_assign.h" -/* macro to find the matching record (based on the msg-id) */ -/* - * Caution!!! It is assumed that the msg-id is the 1st element in the message - * structure. Ensure it. Else move the msg id to the common portion of the - * message structure (outside the msg type specific contents). - */ -#define m_AVND_DIQ_REC_FIND(cb, mid, o_rec) \ - { \ -AVND_DND_LIST *list = &((cb)->dnd_list); \ -for ((o_rec) = list->head;\ - (o_rec) && \ - !(*((uint32_t *)(&((o_rec)->msg.info.avd->msg_info))) == (mid)); \ - (o_rec) = (o_rec)->next) \ - ; \ - } - -/* macro to find & pop a given record */ -#define m_AVND_DIQ_REC_FIND_POP(cb, rec) \ - { \ -AVND_DND_LIST *list = &((cb)->dnd_list); \ -AVND_DND_MSG_LIST *prv = list->head, *curr; \ -for (curr = list->head; curr && !(curr == (rec));\ - prv = curr, curr = curr->next) \ - ; \ -if (curr) { \ - if (curr == list->head) { \ -list->head = curr->next; \ -if (list->tail == curr) list->tail = list->head; \ - } else { \ -prv->next = curr->next; \ -if (list->tail == curr) list->tail = prv;\ - } \ - curr->next = 0;\ -}\ - } - struct avnd_cb_tag; uint32_t avnd_di_oper_send(struct avnd_cb_tag *, const AVND_SU *, uint32_t); diff --git a/src/amf/amfnd/avnd_mds.h b/src/amf/amfnd/avnd_mds.h index 70173acaa..8c81f7bb1 100644 --- a/src/amf/amfnd/avnd_mds.h +++ b/src/amf/amfnd/avnd_mds.h @@ -63,18 +63,13 @@ typedef struct avnd_msg { } info; } AVND_MSG; -typedef struct avnd_dnd_msg_list_tag { +class AVND_DND_MSG_LIST { +public: AVND_MSG msg; AVND_TMR resp_tmr; uint32_t opq_hdl; uint16_t no_retries; - struct avnd_dnd_msg_list_tag *next; -} AVND_DND_MSG_LIST; - -typedef struct avnd_dnd_list_tag { - AVND_DND_MSG_LIST *head; - AVND_DND_MSG_LIST *tail; -} AVND_DND_LIST; +}; /* Macros to fill the MDS message structure diff --git a/src/amf/amfnd/di.cc b/src/amf/amfnd/di.cc index 6f0a76cda..93350e62a 100644 --- a/src/amf/amfnd/di.cc +++ b/src/amf/amfnd/di.cc @@ -39,30 +39,7 @@ #include "base/logtrace.h" #include "amf/amfnd/avnd.h" - -/* macro to push the AvD msg parameters (to the end of the list) */ -#define m_AVND_DIQ_REC_PUSH(cb, rec) \ - { \ -AVND_DND_LIST *list = &((cb)->dnd_list); \ -if (!(list->head)) \ - list->head = (rec);\ -else \ - list->tail->next = (rec); \ -list->tail = (rec); \ - } - -/* macro to pop the record (from the beginning of the list) */ -#define m_AVND_DIQ_REC_POP(cb, o_rec)\ - {
Re: [devel] [PATCH 1/1] amfnd: convert dnd_list to a vector [#1945]
Hi Gary, I have started reviewing it. Thanks Praveen On 15-Aug-17 7:02 AM, Gary Lee wrote: Hi I would like to push this in a week's time if no one has comments. Thanks Gary On 05/07/17 18:45, Gary Lee wrote: --- src/amf/amfnd/avnd_cb.h | 3 +- src/amf/amfnd/avnd_di.h | 36 -- src/amf/amfnd/avnd_mds.h | 11 +- src/amf/amfnd/di.cc | 321 +-- src/amf/amfnd/proxy.cc | 19 ++- src/amf/amfnd/verify.cc | 14 +-- 6 files changed, 167 insertions(+), 237 deletions(-) diff --git a/src/amf/amfnd/avnd_cb.h b/src/amf/amfnd/avnd_cb.h index 2d642c752..ff21e3108 100644 --- a/src/amf/amfnd/avnd_cb.h +++ b/src/amf/amfnd/avnd_cb.h @@ -33,6 +33,7 @@ #ifndef AMF_AMFND_AVND_CB_H_ #define AMF_AMFND_AVND_CB_H_ #include +#include typedef struct avnd_cb_tag { SYSF_MBX mbx; /* mailbox on which AvND waits */ @@ -99,7 +100,7 @@ typedef struct avnd_cb_tag { * Messages are removed when acked with the ACK message. * At director failover the list is scanned handling the * VERIFY message from the director and possibly resent again */ - AVND_DND_LIST dnd_list; + std::vector dnd_list; AVND_TERM_STATE term_state; AVND_LED_STATE led_state; diff --git a/src/amf/amfnd/avnd_di.h b/src/amf/amfnd/avnd_di.h index d7ccd68fd..9870ad774 100644 --- a/src/amf/amfnd/avnd_di.h +++ b/src/amf/amfnd/avnd_di.h @@ -33,42 +33,6 @@ #include "amf/common/amf_si_assign.h" -/* macro to find the matching record (based on the msg-id) */ -/* - * Caution!!! It is assumed that the msg-id is the 1st element in the message - * structure. Ensure it. Else move the msg id to the common portion of the - * message structure (outside the msg type specific contents). - */ -#define m_AVND_DIQ_REC_FIND(cb, mid, o_rec) \ - { \ -AVND_DND_LIST *list = &((cb)->dnd_list); \ -for ((o_rec) = list->head;\ - (o_rec) && \ - !(*((uint32_t *)(&((o_rec)->msg.info.avd->msg_info))) == (mid)); \ - (o_rec) = (o_rec)->next) \ - ; \ - } - -/* macro to find & pop a given record */ -#define m_AVND_DIQ_REC_FIND_POP(cb, rec) \ - { \ -AVND_DND_LIST *list = &((cb)->dnd_list); \ -AVND_DND_MSG_LIST *prv = list->head, *curr; \ -for (curr = list->head; curr && !(curr == (rec));\ - prv = curr, curr = curr->next) \ - ; \ -if (curr) { \ - if (curr == list->head) { \ -list->head = curr->next; \ -if (list->tail == curr) list->tail = list->head; \ - } else { \ -prv->next = curr->next; \ -if (list->tail == curr) list->tail = prv;\ - } \ - curr->next = 0;\ -}\ - } - struct avnd_cb_tag; uint32_t avnd_di_oper_send(struct avnd_cb_tag *, const AVND_SU *, uint32_t); diff --git a/src/amf/amfnd/avnd_mds.h b/src/amf/amfnd/avnd_mds.h index 70173acaa..8c81f7bb1 100644 --- a/src/amf/amfnd/avnd_mds.h +++ b/src/amf/amfnd/avnd_mds.h @@ -63,18 +63,13 @@ typedef struct avnd_msg { } info; } AVND_MSG; -typedef struct avnd_dnd_msg_list_tag { +class AVND_DND_MSG_LIST { +public: AVND_MSG msg; AVND_TMR resp_tmr; uint32_t opq_hdl; uint16_t no_retries; - struct avnd_dnd_msg_list_tag *next; -} AVND_DND_MSG_LIST; - -typedef struct avnd_dnd_list_tag { - AVND_DND_MSG_LIST *head; - AVND_DND_MSG_LIST *tail; -} AVND_DND_LIST; +}; /* Macros to fill the MDS message structure diff --git a/src/amf/amfnd/di.cc b/src/amf/amfnd/di.cc index 6f0a76cda..93350e62a 100644 --- a/src/amf/amfnd/di.cc +++ b/src/amf/amfnd/di.cc @@ -39,30 +39,7 @@ #include "base/logtrace.h" #include "amf/amfnd/avnd.h" - -/* macro to push the AvD msg parameters (to the end of the list) */ -#define m_AVND_DIQ_REC_PUSH(cb, rec) \ - { \ -AVND_DND_LIST *list = &((cb)->dnd_list); \ -if (!(list->head)) \ - list->head = (rec);\ -else \ - list->tail->next = (rec); \ -list->tail = (rec); \ - } - -/* macro to pop the record (from the be
Re: [devel] Review Request for amf: update PR doc compliance report for saAmfComponentErrorClear_4() [#2540]
Ack. Thanks Praveen On 16-Aug-17 1:56 PM, Nguyen Luu wrote: Summary: amf: update PR doc compliance report for saAmfComponentErrorClear_4() [#2540] Review request for Trac Ticket(s): #2540 Peer Reviewer(s): AMF devs Pull request to: AMF maintainers Affected branch(es): default Development branch: default Impacted area Impact y/n Docsy Build systemn RPM/packaging n Configuration files n Startup scripts n SAF servicesn OpenSAF servicesn Core libraries n Samples n Tests n Other n Comments (indicate scope for each "y" above): - amf: update PR doc compliance report for saAmfComponentErrorClear_4() [#2540] Conditions of Submission: - Ack from reviewers Arch Built StartedLinux distro --- mipsn n mips64 n n x86 n n x86_64 n n powerpc n n powerpc64 n n Reviewer Checklist: --- [Submitters: make sure that your review doesn't trigger any checkmarks!] Your checkin has not passed review because (see checked entries): ___ Your RR template is generally incomplete; it has too many blank entries that need proper data filled in. ___ You have failed to nominate the proper persons for review and push. ___ Your patches do not have proper short+long header ___ You have grammar/spelling in your header that is unacceptable. ___ You have exceeded a sensible line length in your headers/comments/text. ___ You have failed to put in a proper Trac Ticket # into your commits. ___ You have incorrectly put/left internal data in your comments/files (i.e. internal bug tracking tool IDs, product names etc) ___ You have not given any evidence of testing beyond basic build tests. Demonstrate some level of runtime or other sanity testing. ___ You have ^M present in some of your files. These have to be removed. ___ You have needlessly changed whitespace or added whitespace crimes like trailing spaces, or spaces before tabs. ___ You have mixed real technical changes with whitespace and other cosmetic code cleanup changes. These have to be separate commits. ___ You need to refactor your submission into logical chunks; there is too much content into a single commit. ___ You have extraneous garbage in your review (merge commits etc) ___ You have giant attachments which should never have been sent; Instead you should place your content in a public tree to be pulled. ___ You have too many commits attached to an e-mail; resend as threaded commits, or place in a public tree for a pull. ___ You have resent this content multiple times without a clear indication of what has changed between each re-send. ___ You have failed to adequately and individually address all of the comments and change requests that were proposed in the initial review. ___ You have a misconfigured ~/.hgrc file (i.e. username, email etc) ___ Your computer have a badly configured date and time; confusing the the threaded patch review. ___ Your changes affect IPC mechanism, and you don't present any results for in-service upgradability test. ___ Your changes affect user manual and documentation, your patch series do not contain the patch that updates the Doxygen manual -- Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org!http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] amfa: Fix saAmfComponentErrorClear_4 to return ERR_NOT_EXIST for non-exist comp [#2540]
Ack, code review only. Thanks Praveen On 16-Aug-17 1:44 PM, Nguyen Luu wrote: When called with a non-existing component name, saAmfComponentErrorClear_4 should return SA_AIS_ERR_NOT_EXIST instead of SA_AIS_ERR_BAD_OPERATION as previously done. --- src/amf/amfnd/err.cc | 17 +++-- 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/amf/amfnd/err.cc b/src/amf/amfnd/err.cc index e4cb9f0..a0529b9 100644 --- a/src/amf/amfnd/err.cc +++ b/src/amf/amfnd/err.cc @@ -2,6 +2,7 @@ * * (C) Copyright 2008 The OpenSAF Foundation * Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (C) 2017, Ericsson AB. All rights reserved. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY @@ -13,6 +14,7 @@ * licensing terms. * * Author(s): Emerson Network Power + *Ericsson * */ @@ -164,11 +166,12 @@ uint32_t avnd_evt_ava_err_rep_evh(AVND_CB *cb, AVND_EVT *evt) { } } - /* get the comp */ + /* check if component exists on local AvND node */ comp = avnd_compdb_rec_get(cb->compdb, Amf::to_string(&err_rep->err_comp)); - /* determine the error code, if any */ if (!comp) amf_rc = SA_AIS_ERR_NOT_EXIST; + /* determine other error codes, if any */ + /* We need not entertain errors when comp is not in shape */ if (comp && (m_AVND_COMP_PRES_STATE_IS_UNINSTANTIATED(comp) || m_AVND_COMP_PRES_STATE_IS_INSTANTIATIONFAILED(comp) || @@ -265,13 +268,15 @@ uint32_t avnd_evt_ava_err_clear_evh(AVND_CB *cb, AVND_EVT *evt) { } } - /* get the comp */ + /* check if component exists on local AvND node */ comp = avnd_compdb_rec_get(cb->compdb, Amf::to_string(&err_clear->comp_name)); + if (!comp) amf_rc = SA_AIS_ERR_NOT_EXIST; + + /* determine other error codes, if any */ - /* determine the error code, if any */ - if (!comp || !m_AVND_COMP_IS_REG(comp) || + if ((comp) && (!m_AVND_COMP_IS_REG(comp) || (!m_AVND_COMP_TYPE_IS_PREINSTANTIABLE(comp) && - !m_AVND_COMP_TYPE_IS_PROXIED(comp))) + !m_AVND_COMP_TYPE_IS_PROXIED(comp amf_rc = SA_AIS_ERR_BAD_OPERATION; if ((comp) && m_AVND_COMP_OPER_STATE_IS_ENABLED(comp)) -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] amfa: Fix saAmfComponentErrorClear_4 to return ERR_NOT_EXIST for non-exist comp [#2540]
Hi, Both ErrorReport() and ErrorClear() APIs can be called for any component hosted anywhere. Since amfnd only maintains local components, component may not be found in its data base. A message should be sent to AMFD or AMFND should instantly read IMM database for validating the component. I am ok, if this patch is pushed by documenting this limitation for both ErrorReport() and ErrorClear() APIs. Thanks Praveen On 02-Aug-17 12:51 PM, Nguyen Luu wrote: When called with a non-existing component name, saAmfComponentErrorClear_4 should return SA_AIS_ERR_NOT_EXIST instead of SA_AIS_ERR_BAD_OPERATION as previously done. --- src/amf/amfnd/err.cc | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/amf/amfnd/err.cc b/src/amf/amfnd/err.cc index e4cb9f0..65c54f5 100644 --- a/src/amf/amfnd/err.cc +++ b/src/amf/amfnd/err.cc @@ -269,9 +269,11 @@ uint32_t avnd_evt_ava_err_clear_evh(AVND_CB *cb, AVND_EVT *evt) { comp = avnd_compdb_rec_get(cb->compdb, Amf::to_string(&err_clear->comp_name)); /* determine the error code, if any */ - if (!comp || !m_AVND_COMP_IS_REG(comp) || + if (!comp) amf_rc = SA_AIS_ERR_NOT_EXIST; + + if ((comp) && (!m_AVND_COMP_IS_REG(comp) || (!m_AVND_COMP_TYPE_IS_PREINSTANTIABLE(comp) && - !m_AVND_COMP_TYPE_IS_PROXIED(comp))) + !m_AVND_COMP_TYPE_IS_PROXIED(comp amf_rc = SA_AIS_ERR_BAD_OPERATION; if ((comp) && m_AVND_COMP_OPER_STATE_IS_ENABLED(comp)) -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] amfa: Fix saAmfPmStart_3 and saAmfResponse_4 to correctly return BAD_HANDLE [#2539]
Ack, code review only. Thanks Praveen On 02-Aug-17 9:31 AM, Nguyen Luu wrote: When called with an uninitialized or already finalized handle, saAmfPmStart_3 and saAmfResponse_4 should return SA_AIS_ERR_BAD_HANDLE instead of SA_AIS_ERR_VERSION as previously done. --- src/amf/agent/amf_agent.cc | 14 ++ 1 file changed, 14 insertions(+) diff --git a/src/amf/agent/amf_agent.cc b/src/amf/agent/amf_agent.cc index 20528e9..b9191dd 100644 --- a/src/amf/agent/amf_agent.cc +++ b/src/amf/agent/amf_agent.cc @@ -2296,6 +2296,13 @@ SaAisErrorT AmfAgent::PmStart_3(SaAmfHandleT hdl, const SaNameT *comp_name, SaAisErrorT rc = SA_AIS_OK; TRACE_ENTER2("SaAmfHandleT passed is %llx", hdl); + /* Verifying the input Handle & global handle */ + if (!gl_ava_hdl || hdl > AVSV_UNS32_HDL_MAX) { +TRACE_2("Invalid SaAmfHandle passed by component: %llx", hdl); +rc = SA_AIS_ERR_BAD_HANDLE; +goto done; + } + /* Version is previously set in in initialize function */ if (!ava_B4_ver_used(0)) { TRACE_2( @@ -2844,6 +2851,13 @@ SaAisErrorT AmfAgent::Response_4(SaAmfHandleT hdl, SaInvocationT inv, SaAisErrorT rc = SA_AIS_OK; TRACE_ENTER2("SaAmfHandleT passed is %llx", hdl); + /* Verifying the input Handle & global handle */ + if (!gl_ava_hdl || hdl > AVSV_UNS32_HDL_MAX) { +TRACE_2("Invalid SaAmfHandle passed by component: %llx", hdl); +rc = SA_AIS_ERR_BAD_HANDLE; +goto done; + } + /* Version is previously set in in initialize function */ if (!ava_B4_ver_used(0)) { TRACE_2( -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] clm: Provide the node address as a parameter to the scale-out script [#2538]
Ack. Thanks Praveen On 01-Aug-17 4:42 PM, Anders Widell wrote: Provide the node address as a command-line parameter when calling the scale-out script. This can be useful if the scale-out script needs to contact the node (e.g. copy some files to it or update some configuration on the node's local disk) as part of the scale-out operation. --- src/clm/clmd/clms_evt.c | 57 ++--- 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/src/clm/clmd/clms_evt.c b/src/clm/clmd/clms_evt.c index ace140db4..84e7b3c6d 100644 --- a/src/clm/clmd/clms_evt.c +++ b/src/clm/clmd/clms_evt.c @@ -488,9 +488,17 @@ static void scale_out_node(CLMS_CB *cb, queue_the_node = false; } if (queue_the_node) { + char node_address[SA_CLM_MAX_ADDRESS_LENGTH + 1]; + size_t addr_len = nodeup_info->address.length; + if (addr_len > SA_CLM_MAX_ADDRESS_LENGTH) + addr_len = SA_CLM_MAX_ADDRESS_LENGTH; + if (nodeup_info->no_of_addresses == 0) + addr_len = 0; + memcpy(node_address, nodeup_info->address.value, addr_len); + node_address[addr_len] = '\0'; char *strp; - if (asprintf(&strp, "%" PRIu32 ",%s,", nodeup_info->node_id, -node_name) != -1) { + if (asprintf(&strp, "%" PRIu32 ",%s,%s,", nodeup_info->node_id, +node_name, node_address) != -1) { LOG_NO("Queuing request to scale out node 0x%" PRIx32 " (%s)", nodeup_info->node_id, node_name); @@ -525,13 +533,10 @@ uint32_t proc_node_up_msg(CLMS_CB *cb, CLMSV_CLMS_EVT *evt) { clmsv_clms_node_up_info_t *nodeup_info = &(evt->info.msg.info.api_info.param).nodeup_info; - CLMS_CLUSTER_NODE *node = NULL; - SaUint32T nodeid; uint32_t rc = NCSCC_RC_SUCCESS; SaNameT node_name = {0}; CLMSV_MSG clm_msg; SaBoolT check_member; - IPLIST *ip = NULL; TRACE_ENTER2("Node up mesg for nodename length %d %s", nodeup_info->node_name.length, @@ -542,10 +547,21 @@ uint32_t proc_node_up_msg(CLMS_CB *cb, CLMSV_CLMS_EVT *evt) (char *)node_name.value, sizeof(node_name.value), "safNode=%s,%s", nodeup_info->node_name.value, osaf_cluster->name.value); - nodeid = evt->info.msg.info.api_info.param.nodeup_info.node_id; + SaUint32T nodeid = nodeup_info->node_id; + + /* Retrieve IP information */ + IPLIST *ip = (IPLIST *)ncs_patricia_tree_get(&clms_cb->iplist, +(uint8_t *)&nodeid); + + if (ip != NULL && ip->addr.length != 0 && + nodeup_info->no_of_addresses == 0) { + nodeup_info->no_of_addresses = 1; + memcpy(&(nodeup_info->address), &(ip->addr), sizeof(ip->addr)); + } - node = clms_node_get_by_name(&node_name); + CLMS_CLUSTER_NODE *node = clms_node_get_by_name(&node_name); clm_msg.info.api_resp_info.rc = SA_AIS_OK; + if (node == NULL) { /* The /etc/opensaf/node_name is an user exposed configuration * file. The node_name file contains the RDN value of the CLM @@ -573,8 +589,7 @@ uint32_t proc_node_up_msg(CLMS_CB *cb, CLMSV_CLMS_EVT *evt) if (node != NULL) { /* Retrieve IP information */ - if ((ip = (IPLIST *)ncs_patricia_tree_get( -&clms_cb->iplist, (uint8_t *)&nodeid)) == NULL) { + if (ip == NULL) { clm_msg.info.api_resp_info.rc = SA_AIS_ERR_NOT_EXIST; LOG_ER( "IP information not found for: %s with node_id: %u", @@ -653,8 +668,7 @@ uint32_t proc_node_up_msg(CLMS_CB *cb, CLMSV_CLMS_EVT *evt) /* Self Node needs to be added tp patricia tree before hand during init */ if (NULL == clms_node_get_by_id(nodeid)) { - node->node_id = - evt->info.msg.info.api_info.param.nodeup_info.node_id; + node->node_id = nodeup_info->node_id; TRACE("node->node_id %u node->nodeup %d", node->node_id, node->nodeup); @@ -665,29 +679,18 @@ uint32_t proc_node_up_msg(CLMS_CB *cb, CLMSV_CLMS_EVT *evt) "/node_name configuration"); } } - node->boot_time = - evt->info.msg.info.api_info.param.nodeup_info.boot_time; + + node->boot_time = nodeup_info->boot_time; /* Update the node with ipaddress information */ - if (ip->addr.length) { - memset(&node->node_addr, 0, sizeof(SaClmNodeAddressT)); - node->node_addr.family = ip->addr.family; - node->node_addr.length = ip->addr.length; - memc
Re: [devel] [PATCH 1/1] clm: Include boot time and node address in join request message [#2489]
Ack. Thanks, Praveen On 07-Aug-17 1:35 PM, Anders Widell wrote: A node can have more than one single network address. If you run the ifconfig command, you get a list of network interfaces. Each one of these interfaces can have several address assigned to it: IPv4 addresses, IPv6 addresses, and alias addresses. In addition, the node can have a TIPC address. So in the case of ticket [#2479], we might need to make both saClmNodeAddress and saClmNodeCurrAddress multi-value. However, I don't intend to implement [#2479] in the near future (or at all), since ticket [#2489] is probably enough for most real-world use cases. In most real-world use cases, it is enough for the application to get one single address for each node, but we need the flexibility to select which one of the addresses to present to the application. saClmNodeAddressFamily and saClmNodeAddress are currently ignored by OpenSAF. I am not sure how saClmNodeAddressFamily and saClmNodeAddress are intended to be used, but my best guess is that saClmNodeAddress is intended for the case when you have statically assigned network addresses, and saClmNodeCurrAddress is intended for the case with dynamically assigned addresses, though there is no reason why we can't present a statically assigned address in saClmNodeCurrAddress as well. Since saClmNodeAddress is a configuration attribute, I am assuming here that you should actually be able to /set/ the node's address using the saClmNodeAddress configuration attribute! But in order for that to work, the node needs to read its IMM configuration immediately after booting, before it has configured its own network address. The only way this could work is if we are not actually talking about the network address used internally by OpenSAF, but the node's address on a separate network intended to be used by the application. Otherwise OpenSAF would not be able to communicate with IMM to read the node's own address. So according to this interpretation, each node has at least two addresses: one address used for internal OpenSAF communication, and another address used by the application. And it is the address used by the application which is configured using the saClmNodeAddress and presented in the saClmNodeCurrAddress attribute. Ticket doesn't favour any particular interpretation though, you are free to select the internal OpenSAF communication address or some other address to present in saClmNodeCurrAddress. regards, Anders Widell On 08/07/2017 07:12 AM, praveen malviya wrote: Hi Anders, I have started reviewing this patch. One initial query: We have two sets of attributes for address in "SaClmNode": set A) saClmNodeAddressFamily & saClmNodeAddress and set B )saClmNodeCurrAddressFamily & saClmNodeCurrAddress. For ticket #2479, its description says making set B as Multi valued. I think it is set A that should be made multi-valued and set B should reflect the address currently in use. This will resolve some backward compatibility issue also as set B remains single valued. Also the set B should reflect which address? address used by OpenSAF or by application? Till this time it has been OpenSAF internal communication address. Thanks, Praveen On 31-Jul-17 6:40 PM, Anders Widell wrote: The node join request message now has two new fields: boot time and node address. This allows us to provide more accurate and correct information in the CLM node runtime attributes in the information model: * The boot time field transmits the node's actual boot time to the CLM server. Previously, the node join time was used as an approximation of the node boot time, but this might be inaccurate or incorrect. For example, if OpenSAF was started much later than the node was booted (e.g. if OpenSAF was restarted without a node reboot), then the node join time will differ significantly from the node boot time. * The node address field transmits the node address to be presented to the application through the information model. Previously, the IP address which was used by OpenSAF internal communication was presented as the one and only node address, and there was no way to select some other address in case the node has multiple network addresses. The application now has the possibility to select which network address to present in the information model. --- 00-README.conf | 8 src/clm/clmd/clms.h| 1 - src/clm/clmd/clms_evt.c| 15 ++- src/clm/clmd/clms_main.c | 22 ++ src/clm/clmd/clms_mbcsv.c | 11 ++--- src/clm/clmd/clms_mbcsv.h | 2 - src/clm/clmd/clms_mds.c| 92 +- src/clm/clmd/clms_util.c | 15 --- src/clm/clmnd/cb.h | 14 --- src/clm/clmnd/clmna.conf | 13 ++ src/clm/clmnd/main.c | 89 +
Re: [devel] [PATCH 1/1] clm: Include boot time and node address in join request message [#2489]
Hi Anders, I have started reviewing this patch. One initial query: We have two sets of attributes for address in "SaClmNode": set A) saClmNodeAddressFamily & saClmNodeAddress and set B )saClmNodeCurrAddressFamily & saClmNodeCurrAddress. For ticket #2479, its description says making set B as Multi valued. I think it is set A that should be made multi-valued and set B should reflect the address currently in use. This will resolve some backward compatibility issue also as set B remains single valued. Also the set B should reflect which address? address used by OpenSAF or by application? Till this time it has been OpenSAF internal communication address. Thanks, Praveen On 31-Jul-17 6:40 PM, Anders Widell wrote: The node join request message now has two new fields: boot time and node address. This allows us to provide more accurate and correct information in the CLM node runtime attributes in the information model: * The boot time field transmits the node's actual boot time to the CLM server. Previously, the node join time was used as an approximation of the node boot time, but this might be inaccurate or incorrect. For example, if OpenSAF was started much later than the node was booted (e.g. if OpenSAF was restarted without a node reboot), then the node join time will differ significantly from the node boot time. * The node address field transmits the node address to be presented to the application through the information model. Previously, the IP address which was used by OpenSAF internal communication was presented as the one and only node address, and there was no way to select some other address in case the node has multiple network addresses. The application now has the possibility to select which network address to present in the information model. --- 00-README.conf | 8 src/clm/clmd/clms.h| 1 - src/clm/clmd/clms_evt.c| 15 ++- src/clm/clmd/clms_main.c | 22 ++ src/clm/clmd/clms_mbcsv.c | 11 ++--- src/clm/clmd/clms_mbcsv.h | 2 - src/clm/clmd/clms_mds.c| 92 +- src/clm/clmd/clms_util.c | 15 --- src/clm/clmnd/cb.h | 14 --- src/clm/clmnd/clmna.conf | 13 ++ src/clm/clmnd/main.c | 89 src/clm/common/clmsv_enc_dec.c | 34 +++- src/clm/common/clmsv_enc_dec.h | 2 + src/clm/common/clmsv_msg.h | 5 +++ 14 files changed, 232 insertions(+), 91 deletions(-) diff --git a/00-README.conf b/00-README.conf index 380cdc2f4..b9ef1 100644 --- a/00-README.conf +++ b/00-README.conf @@ -65,6 +65,14 @@ controller nodes, the default delay of 200 ms should be sufficient. For systems with a very large number of configured system controller nodes and/or with unreliable network connections, values larger than 5000 may be needed. +CLMNA_ADDR_FAMILY and CLMNA_ADDR_VALUE let you specify the values shown in the +saClmNodeCurrAddressFamily and saClmNodeCurrAddress runtime attributes of the +node's SaClmNode IMM object. If these options are not set, CLM will try to try +to figure out the node's address by itself. Since a node can have more than one +network address, the address chosen by CLM may not be the address your +application is interested in. By explicitly specifying the address using +CLMNA_ADDR_FAMILY and CLMNA_ADDR_VALUE, you will be guaranteed that the correct +address is presented. *** dtmd.conf diff --git a/src/clm/clmd/clms.h b/src/clm/clmd/clms.h index 664c0da29..2ac69eade 100644 --- a/src/clm/clmd/clms.h +++ b/src/clm/clmd/clms.h @@ -116,7 +116,6 @@ extern SaAisErrorT clms_plm_init(CLMS_CB *cb); #endif extern void clms_node_add_to_model(CLMS_CLUSTER_NODE *node); extern SaTimeT clms_get_SaTime(void); -extern SaTimeT clms_get_BootTime(void); extern void clms_imm_impl_set(CLMS_CB *cb); extern uint32_t clms_rda_init(CLMS_CB *cb); extern void clms_adminop_pending(void); diff --git a/src/clm/clmd/clms_evt.c b/src/clm/clmd/clms_evt.c index d29925b77..ace140db4 100644 --- a/src/clm/clmd/clms_evt.c +++ b/src/clm/clmd/clms_evt.c @@ -1,6 +1,7 @@ /* -*- OpenSAF -*- * * (C) Copyright 2010,2015 The OpenSAF Foundation + * Copyright Ericsson AB 2017 - All Rights Reserved. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY @@ -664,11 +665,12 @@ uint32_t proc_node_up_msg(CLMS_CB *cb, CLMSV_CLMS_EVT *evt) "/node_name configuration"); } } - node->boot_time = clms_get_SaTime(); + node->boot_time = + evt->info.msg.info.api_info.param.nodeup_info.boot_time; /* Update the node with ipaddress information */ if (ip->addr.length) { -
Re: [devel] [PATCH 0/1] Review Request for clm: Include boot time and node address in join request message [#2489]
Hi Anders, I will review both #2538 and #2538 by Monday. Thanks, Praveen On 04-Aug-17 12:24 PM, Anders Widell wrote: Hi! Did you get a chance to look at this yet? thanks, Anders Widell On 07/31/2017 03:10 PM, Anders Widell wrote: Summary: clm: Include boot time and node address in join request message [#2489] Review request for Ticket(s): 2489 Peer Reviewer(s): Praveen Pull request to: Affected branch(es): develop Development branch: ticket-2489 Base revision: 10b4c9e2f952456c3ef7c4413e224c3365e4b18f Personal repository: git://git.code.sf.net/u/anders-w/review Impacted area Impact y/n Docsn Build systemn RPM/packaging n Configuration files n Startup scripts n SAF servicesy OpenSAF servicesn Core libraries n Samples n Tests n Other n Comments (indicate scope for each "y" above): - NOTE: This ticket depens on ticket [#2535] which is still out on review. revision 56e7a62729974e03f3cd441a182121ea67937136 Author:Anders Widell Date:Mon, 31 Jul 2017 14:02:07 +0200 clm: Include boot time and node address in join request message [#2489] The node join request message now has two new fields: boot time and node address. This allows us to provide more accurate and correct information in the CLM node runtime attributes in the information model: * The boot time field transmits the node's actual boot time to the CLM server. Previously, the node join time was used as an approximation of the node boot time, but this might be inaccurate or incorrect. For example, if OpenSAF was started much later than the node was booted (e.g. if OpenSAF was restarted without a node reboot), then the node join time will differ significantly from the node boot time. * The node address field transmits the node address to be presented to the application through the information model. Previously, the IP address which was used by OpenSAF internal communication was presented as the one and only node address, and there was no way to select some other address in case the node has multiple network addresses. The application now has the possibility to select which network address to present in the information model. Complete diffstat: -- 00-README.conf | 8 src/clm/clmd/clms.h| 1 - src/clm/clmd/clms_evt.c| 15 ++- src/clm/clmd/clms_main.c | 22 ++ src/clm/clmd/clms_mbcsv.c | 11 ++--- src/clm/clmd/clms_mbcsv.h | 2 - src/clm/clmd/clms_mds.c| 92 +- src/clm/clmd/clms_util.c | 15 --- src/clm/clmnd/cb.h | 14 --- src/clm/clmnd/clmna.conf | 13 ++ src/clm/clmnd/main.c | 89 src/clm/common/clmsv_enc_dec.c | 34 +++- src/clm/common/clmsv_enc_dec.h | 2 + src/clm/common/clmsv_msg.h | 5 +++ 14 files changed, 232 insertions(+), 91 deletions(-) Testing Commands: - For boot time: Make a note of saClmNodeBootTimeStamp for a node in the cluster. Run /etc/init.d/opensafd stop followed by /etc/init.d/opensafd start on that node. Check saClmNodeBootTimeStamp of that node again. The time stamp shall not be affected by restarting OpenSAF (without a node reboot). For node address: Configure OpenSAF to use TIPC for internal communication. Set CLMNA_ADDR_FAMILY and CLMNA_ADDR_VALUE in /etc/opensaf/clmna.conf for a node in the cluster. Start the node. Check saClmNodeCurrAddressFamily and saClmNodeCurrAddress of that node. The values shall match what you entered in /etc/opensaf/clmna.conf. Testing, Expected Results: -- See above. Conditions of Submission: - Ack from reviewer(s) Arch Built StartedLinux distro --- mipsn n mips64 n n x86 n n x86_64 y y powerpc n n powerpc64 n n Reviewer Checklist: --- [Submitters: make sure that your review doesn't trigger any checkmarks!] Your checkin has not passed review because (see checked entries): ___ Your RR template is generally incomplete; it has too many blank entries that need proper data filled in. ___ You have failed to nominate the proper persons for review and push. ___ Your patches do not have proper short+long header ___ You have grammar/spelling in your header that is unacceptable. ___ You have exceeded a sensible line length in your headers/comments/text. ___ You have failed to put in a proper Trac Ticket # into your commits. ___ You have incorrectly put/left interna
Re: [devel] [PATCH 1/1] amfd: Do not create duplicated HA state absent SUSI [#2530]
Ack, code review only. Thanks Praveen On 02-Aug-17 5:03 PM, minh chau wrote: Hi Praveen, This ticket as well as the *if* block we are talking about, it is for absent SUSI which is read from IMM as a helper to failover after SG absence stage. The current SG 2N code can not failover an absent SUSI to another present SUSI that both have the same HA state, so we exclude this case as in #2477, #2530. The case you mention that has SU1, SU2, both of SUs have present SUSI, and it won't run in the code of creating absent SUSI. It should be also working as long as no loss of RTA, since after SC absence stage the SG operation resumes to what it was before loss of SCs. Thanks, Minh On 02/08/17 21:18, praveen malviya wrote: Hi Minh, I wanted to highlight a valid case when quiesced HA state can be there in two SUs in 2N model. In switchover situation when one SU1 has successfully quiesced, amfd sends active assignment to standby SU2. While standby SU2 is becoming acitve it faults with comp-failover recovery and AMFD sends it a quiesced HA state. Thus there can be two quiesced valid SUSI in a SG at momentarily. Thanks, Praveen On 02-Aug-17 4:41 PM, minh chau wrote: Hi Praveen, Please find my reply inline. Thanks, Minh On 02/08/17 20:17, praveen malviya wrote: Hi Minh, Please find one query inline with [Praveen]. Thanks, Praveen On 28-Jul-17 7:44 AM, Minh Chau wrote: Symtomp is similar to #2477, this patch fixes for case of 2 STANDBY assignment for same SI --- src/amf/amfd/si.cc| 14 ++ src/amf/amfd/si.h | 1 + src/amf/amfd/siass.cc | 6 ++ 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/src/amf/amfd/si.cc b/src/amf/amfd/si.cc index 3f76c1476..27245339c 100644 --- a/src/amf/amfd/si.cc +++ b/src/amf/amfd/si.cc @@ -1594,6 +1594,20 @@ const AVD_SIRANKEDSU *AVD_SI::get_si_ranked_su( return sirankedsu; } +/* + * @brief Count number of SUSI assignment that are assigned to this SI + *with specified HA state + * @param [in] @ha: HA state + * @return: number of SUSI assignment + */ +uint32_t AVD_SI::count_sisu_with(SaAmfHAStateT ha) { + uint32_t count = 0; + for (AVD_SU_SI_REL *sisu = list_of_sisu; sisu != nullptr; + sisu = sisu->si_next) { +if (sisu->state == ha) count++; + } + return count; +} /* * @brief Update alarm_sent by new value of @alarm_state, diff --git a/src/amf/amfd/si.h b/src/amf/amfd/si.h index 4f8dc5718..af14363b6 100644 --- a/src/amf/amfd/si.h +++ b/src/amf/amfd/si.h @@ -152,6 +152,7 @@ class AVD_SI { const AVD_SIRANKEDSU *get_si_ranked_su(const std::string &su_name) const; bool is_active() const; SaAisErrorT si_swap_validate(); + uint32_t count_sisu_with(SaAmfHAStateT ha); private: bool is_assigned() const { return list_of_sisu ? true : false; } diff --git a/src/amf/amfd/siass.cc b/src/amf/amfd/siass.cc index d14d279dc..267c55c07 100644 --- a/src/amf/amfd/siass.cc +++ b/src/amf/amfd/siass.cc @@ -351,11 +351,9 @@ bool avd_susi_validate_absent_assignment(AVD_SU *su, AVD_SI *si, goto done; } // No need to create absent SUSI assignment for the 2N SI that already has - // ACTIVE SUSI + // the same @imm_ha_state SUSI if (su->sg_of_su->sg_redundancy_model == SA_AMF_2N_REDUNDANCY_MODEL) { -if (si->list_of_sisu != nullptr && -si->list_of_sisu->state == SA_AMF_HA_ACTIVE && -imm_ha_state == SA_AMF_HA_ACTIVE) +if (si->count_sisu_with(imm_ha_state) > 0) [Praveen] Ticket is raised for 2 standby case and earliar fix was for 2 acitve case. This if block now does not check HA state. What will happen in the case of quiesced state? We can have two quiesced state in case of faults when a SU faults when it is becoming active in switchover situation and amf sends quiesced state to this faulted active SU. [Minh]: This *if* block now is for all HA states. It means that we don't create an absent SUSI when amfd already had another SUSI with same HA state in 2N SG. I should have made it this way earlier in ticket #2477 so we would not have this ticket #2530. goto done; } -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] amfd: Do not create duplicated HA state absent SUSI [#2530]
Hi Minh, I wanted to highlight a valid case when quiesced HA state can be there in two SUs in 2N model. In switchover situation when one SU1 has successfully quiesced, amfd sends active assignment to standby SU2. While standby SU2 is becoming acitve it faults with comp-failover recovery and AMFD sends it a quiesced HA state. Thus there can be two quiesced valid SUSI in a SG at momentarily. Thanks, Praveen On 02-Aug-17 4:41 PM, minh chau wrote: Hi Praveen, Please find my reply inline. Thanks, Minh On 02/08/17 20:17, praveen malviya wrote: Hi Minh, Please find one query inline with [Praveen]. Thanks, Praveen On 28-Jul-17 7:44 AM, Minh Chau wrote: Symtomp is similar to #2477, this patch fixes for case of 2 STANDBY assignment for same SI --- src/amf/amfd/si.cc| 14 ++ src/amf/amfd/si.h | 1 + src/amf/amfd/siass.cc | 6 ++ 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/src/amf/amfd/si.cc b/src/amf/amfd/si.cc index 3f76c1476..27245339c 100644 --- a/src/amf/amfd/si.cc +++ b/src/amf/amfd/si.cc @@ -1594,6 +1594,20 @@ const AVD_SIRANKEDSU *AVD_SI::get_si_ranked_su( return sirankedsu; } +/* + * @brief Count number of SUSI assignment that are assigned to this SI + *with specified HA state + * @param [in] @ha: HA state + * @return: number of SUSI assignment + */ +uint32_t AVD_SI::count_sisu_with(SaAmfHAStateT ha) { + uint32_t count = 0; + for (AVD_SU_SI_REL *sisu = list_of_sisu; sisu != nullptr; + sisu = sisu->si_next) { +if (sisu->state == ha) count++; + } + return count; +} /* * @brief Update alarm_sent by new value of @alarm_state, diff --git a/src/amf/amfd/si.h b/src/amf/amfd/si.h index 4f8dc5718..af14363b6 100644 --- a/src/amf/amfd/si.h +++ b/src/amf/amfd/si.h @@ -152,6 +152,7 @@ class AVD_SI { const AVD_SIRANKEDSU *get_si_ranked_su(const std::string &su_name) const; bool is_active() const; SaAisErrorT si_swap_validate(); + uint32_t count_sisu_with(SaAmfHAStateT ha); private: bool is_assigned() const { return list_of_sisu ? true : false; } diff --git a/src/amf/amfd/siass.cc b/src/amf/amfd/siass.cc index d14d279dc..267c55c07 100644 --- a/src/amf/amfd/siass.cc +++ b/src/amf/amfd/siass.cc @@ -351,11 +351,9 @@ bool avd_susi_validate_absent_assignment(AVD_SU *su, AVD_SI *si, goto done; } // No need to create absent SUSI assignment for the 2N SI that already has - // ACTIVE SUSI + // the same @imm_ha_state SUSI if (su->sg_of_su->sg_redundancy_model == SA_AMF_2N_REDUNDANCY_MODEL) { -if (si->list_of_sisu != nullptr && -si->list_of_sisu->state == SA_AMF_HA_ACTIVE && -imm_ha_state == SA_AMF_HA_ACTIVE) +if (si->count_sisu_with(imm_ha_state) > 0) [Praveen] Ticket is raised for 2 standby case and earliar fix was for 2 acitve case. This if block now does not check HA state. What will happen in the case of quiesced state? We can have two quiesced state in case of faults when a SU faults when it is becoming active in switchover situation and amf sends quiesced state to this faulted active SU. [Minh]: This *if* block now is for all HA states. It means that we don't create an absent SUSI when amfd already had another SUSI with same HA state in 2N SG. I should have made it this way earlier in ticket #2477 so we would not have this ticket #2530. goto done; } -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] amfd: Do not create duplicated HA state absent SUSI [#2530]
Hi Minh, Please find one query inline with [Praveen]. Thanks, Praveen On 28-Jul-17 7:44 AM, Minh Chau wrote: Symtomp is similar to #2477, this patch fixes for case of 2 STANDBY assignment for same SI --- src/amf/amfd/si.cc| 14 ++ src/amf/amfd/si.h | 1 + src/amf/amfd/siass.cc | 6 ++ 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/src/amf/amfd/si.cc b/src/amf/amfd/si.cc index 3f76c1476..27245339c 100644 --- a/src/amf/amfd/si.cc +++ b/src/amf/amfd/si.cc @@ -1594,6 +1594,20 @@ const AVD_SIRANKEDSU *AVD_SI::get_si_ranked_su( return sirankedsu; } +/* + * @brief Count number of SUSI assignment that are assigned to this SI + *with specified HA state + * @param [in] @ha: HA state + * @return: number of SUSI assignment + */ +uint32_t AVD_SI::count_sisu_with(SaAmfHAStateT ha) { + uint32_t count = 0; + for (AVD_SU_SI_REL *sisu = list_of_sisu; sisu != nullptr; + sisu = sisu->si_next) { +if (sisu->state == ha) count++; + } + return count; +} /* * @brief Update alarm_sent by new value of @alarm_state, diff --git a/src/amf/amfd/si.h b/src/amf/amfd/si.h index 4f8dc5718..af14363b6 100644 --- a/src/amf/amfd/si.h +++ b/src/amf/amfd/si.h @@ -152,6 +152,7 @@ class AVD_SI { const AVD_SIRANKEDSU *get_si_ranked_su(const std::string &su_name) const; bool is_active() const; SaAisErrorT si_swap_validate(); + uint32_t count_sisu_with(SaAmfHAStateT ha); private: bool is_assigned() const { return list_of_sisu ? true : false; } diff --git a/src/amf/amfd/siass.cc b/src/amf/amfd/siass.cc index d14d279dc..267c55c07 100644 --- a/src/amf/amfd/siass.cc +++ b/src/amf/amfd/siass.cc @@ -351,11 +351,9 @@ bool avd_susi_validate_absent_assignment(AVD_SU *su, AVD_SI *si, goto done; } // No need to create absent SUSI assignment for the 2N SI that already has - // ACTIVE SUSI + // the same @imm_ha_state SUSI if (su->sg_of_su->sg_redundancy_model == SA_AMF_2N_REDUNDANCY_MODEL) { -if (si->list_of_sisu != nullptr && -si->list_of_sisu->state == SA_AMF_HA_ACTIVE && -imm_ha_state == SA_AMF_HA_ACTIVE) +if (si->count_sisu_with(imm_ha_state) > 0) [Praveen] Ticket is raised for 2 standby case and earliar fix was for 2 acitve case. This if block now does not check HA state. What will happen in the case of quiesced state? We can have two quiesced state in case of faults when a SU faults when it is becoming active in switchover situation and amf sends quiesced state to this faulted active SU. goto done; } -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/4] amf: Log CLM initialization error only once on unconfigured nodes [#2509]
Ack for all the patches. Thanks, Praveen On 24-Jul-17 7:27 PM, Anders Widell wrote: Avoid spamming the syslog with more than one log message in case CLM returns SA_AIS_ERR_UNAVAILABLE (i.e. we are running on a currently unconfigured node). --- src/amf/amfd/clm.cc | 6 -- src/amf/amfnd/clm.cc | 6 -- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/amf/amfd/clm.cc b/src/amf/amfd/clm.cc index 86c23ea46..da951d223 100644 --- a/src/amf/amfd/clm.cc +++ b/src/amf/amfd/clm.cc @@ -433,13 +433,15 @@ SaAisErrorT avd_clm_init(AVD_CL_CB *cb) { * BAD_HANDLE. Also, duplicated codes in initialization thread * will be moved to osaf dedicated thread */ + bool has_logged_clm_error = false; for (;;) { SaVersionT Version = {'B', 4, 1}; error = saClmInitialize_4(&clm_handle, &clm_callbacks, &Version); if (error == SA_AIS_ERR_TRY_AGAIN || error == SA_AIS_ERR_TIMEOUT || error == SA_AIS_ERR_UNAVAILABLE) { - if (error != SA_AIS_ERR_TRY_AGAIN) { -LOG_WA("saClmInitialize_4 returned %u", (unsigned)error); + if (error != SA_AIS_ERR_TRY_AGAIN && !has_logged_clm_error) { +LOG_WA("saClmInitialize_4 returned %u", static_cast(error)); +has_logged_clm_error = true; } osaf_nanosleep(&kHundredMilliseconds); continue; diff --git a/src/amf/amfnd/clm.cc b/src/amf/amfnd/clm.cc index 6985f3685..f1f65bcef 100644 --- a/src/amf/amfnd/clm.cc +++ b/src/amf/amfnd/clm.cc @@ -276,13 +276,15 @@ SaAisErrorT avnd_clm_init(AVND_CB *cb) { cb->first_time_up = true; cb->clmHandle = 0; + bool has_logged_clm_error = false; for (;;) { SaVersionT Version = {'B', 4, 1}; error = saClmInitialize_4(&cb->clmHandle, &callbacks, &Version); if (error == SA_AIS_ERR_TRY_AGAIN || error == SA_AIS_ERR_TIMEOUT || error == SA_AIS_ERR_UNAVAILABLE) { - if (error != SA_AIS_ERR_TRY_AGAIN) { -LOG_WA("saClmInitialize_4 returned %u", (unsigned)error); + if (error != SA_AIS_ERR_TRY_AGAIN && !has_logged_clm_error) { +LOG_WA("saClmInitialize_4 returned %u", static_cast(error)); +has_logged_clm_error = true; } osaf_nanosleep(&kHundredMilliseconds); continue; -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 0/4] Review Request for clm: Make it possible for a node to scale out itself using autoscaling [#2509]
I am reviewing the patches. Thanks, Praveen On 24-Jul-17 7:27 PM, Anders Widell wrote: Summary: clm: Make it possible for a node to scale out itself using autoscaling [#2509] Review request for Ticket(s): 2509 Peer Reviewer(s): Praveen Pull request to: Affected branch(es): develop Development branch: ticket-2509 Base revision: de977bacba9b452fe2a8abcae26a1188a7a15f31 Personal repository: git://git.code.sf.net/u/anders-w/review Impacted area Impact y/n Docsn Build systemn RPM/packaging n Configuration files n Startup scripts n SAF servicesy OpenSAF servicesn Core libraries n Samples n Tests n Other n Comments (indicate scope for each "y" above): - revision 7782f5e526729b97a973e11feb52105c120fe2ce Author: Anders Widell Date: Mon, 24 Jul 2017 15:41:51 +0200 clm: Make it possible for a node to scale out itself using autoscaling [#2509] Ticket [#1453] added support for autoscaling, which allows scale-out from an initial cluster containing at least one node. This commit adds support for scaling out from a cluster containing zero nodes, or alternatively, a cluster where the active node is not a configured node. The use cases are as follows: * Support loading a backup that was created on a different cluster where none of the new nodes have the same name as any of the nodes in the old cluster. * Support cluster restart on a system where nodes don't have persistent local storage (or persistent host names / node names) - i.e. a system where a node reboot will always result in a scale-in followed by a scale-out * Make scaling more robust, e.g. imagine a case when a one-node cluster is scaled out by adding a second node, but then the original node is removed before scale-out of the new node has completed. revision 262d4c7a96e663dc00335278df28e0788c19d334 Author: Anders Widell Date: Mon, 24 Jul 2017 15:41:15 +0200 ntf: Re-try initializing CLM on unconfigured nodes [#2509] Re-try initializing the CLM API when it returns SA_AIS_ERR_UNAVAILABLE, so that the NTF service properly waits for the node to become configured by the autoscaling functionality. revision 2e2f4dd43621a1113262caf274bac4989f2d9d7d Author: Anders Widell Date: Mon, 24 Jul 2017 15:34:47 +0200 log: Re-try initializing CLM on unconfigured nodes [#2509] Re-try initializing the CLM API when it returns SA_AIS_ERR_UNAVAILABLE. This error code is returned if the LOG service has been started on an unconfigured node, which may happen for a while when the autoscaling feature is used. revision 31305c94edb9eae1aecc66c6d13105324ffcfa1b Author: Anders Widell Date: Mon, 24 Jul 2017 15:32:24 +0200 amf: Log CLM initialization error only once on unconfigured nodes [#2509] Avoid spamming the syslog with more than one log message in case CLM returns SA_AIS_ERR_UNAVAILABLE (i.e. we are running on a currently unconfigured node). Complete diffstat: -- src/amf/amfd/clm.cc | 6 -- src/amf/amfnd/clm.cc | 6 -- src/clm/clmd/clms_main.c | 12 ++-- src/log/logd/lgs_clm.cc | 3 ++- src/ntf/ntfd/ntfs_clm.c | 3 ++- 5 files changed, 22 insertions(+), 8 deletions(-) Testing Commands: - Enable autoscaling in clmd.conf and by removing the "exit" command from opensaf_scale_out script. Start a cluster where none of the nodes are configured in IMM. Testing, Expected Results: -- The nodes shall be scaled out. Conditions of Submission: - Ack from reviewer(s) Arch Built StartedLinux distro --- mipsn n mips64 n n x86 n n x86_64 y y powerpc n n powerpc64 n n Reviewer Checklist: --- [Submitters: make sure that your review doesn't trigger any checkmarks!] Your checkin has not passed review because (see checked entries): ___ Your RR template is generally incomplete; it has too many blank entries that need proper data filled in. ___ You have failed to nominate the proper persons for review and push. ___ Your patches do not have proper short+long header ___ You have grammar/spelling in your header that is unacceptable. ___ You have exceeded a sensible line length in your headers/comments/text. ___ You have failed to put in a proper Trac Ticket # into your commits. ___ You have incorrectly put/left internal data in your comments/files (i.e. internal bug tracking tool IDs, product names etc) ___ You have not given any evidence of testing beyond basic build tests. Demonstrate some level of runtime or other sanity testing. ___ You have ^M present in some of your files. These hav
Re: [devel] [PATCH 1/1] clm: add clm tool for tracking and for getting node info [#2429]
Hi Anders, Attached is the patch after incorporating the comments. I will be pushing it on Monday. Please go through it. Thanks Praveen On 18-Jul-17 9:20 PM, Anders Widell wrote: Ack with comments: * Indentation seems to be according to Google C++ style guide, although the file is written in C and should be indented according to the Linux Kernel coding style. Either change the file extension from .c to .cc or change the formatting. It can also be a good idea to run the style checkers: "make cpplint" for C++ code or "make checkpatch" for C code. * The program is installed in sbin, but shouldn't it be installed in bin? sbin is intended for system administration tools. * The name of the program is not consistent with already existing tools like clm-adm etc. Maybe rename it to clm-app? Though "app" doesn't say much about what the program is doing. Better choices could be clm-list, clm-show, or clm-print. * Since there is only one source file, it is probably a good idea to give it the same name as the executable, but with a .c or .cc extension and any hyphens replaced with underscores (e.g. clm_print.cc) * It is probably better to remove the -f and -i flags, and replace them with optional arguments for the -n, -a, -m flags. * Node id (-i flag) doesn't support hexadecimal numbers. Use e.g. strtoul() with base 0 instead of atoi(). * It is probably better to use comma (,) instead of vertical bar (|) to separate track flags, because vertical bar is treated specially by the shell and must thus always be quoted. * Why not use the value supplied with the -t parameter also when using -n, instead of the hard-coded TIME_OUT value (if -t was not specified or negative then you can use TIME_OUT)? * Shouldn't the program exit once it has received the asynchronous node get callback? * osaf_extended_name_borrow() and osaf_extended_name_length() are mainly intended to be used in agent libraries. Please use saAisNameBorrow() and strlen(saAisNameBorrow()) instead. * Inconsistent use of EXIT_FAILURE/EXIT_SUCCESS and 1/0 for exit() and return from main (sometimes name is used, sometimes number). regards, Anders Widell On 07/14/2017 11:02 AM, Praveen wrote: Add a utility/application which enables user to: -perform tracking using saClmClusterTrack_4(). -get node info by calling saClmClusterNodeGet_4(). -get node info asynchronously by calling saClmClusterNodeGetAsync(). --- opensaf.spec.in | 1 + src/clm/Makefile.am | 13 ++ src/clm/tools/clm_api_app.c | 349 3 files changed, 363 insertions(+) create mode 100644 src/clm/tools/clm_api_app.c diff --git a/opensaf.spec.in b/opensaf.spec.in index 56e8d78..8c4b2c1 100644 --- a/opensaf.spec.in +++ b/opensaf.spec.in @@ -995,6 +995,7 @@ fi %defattr(-,root,root) %{_sbindir}/amfpm %{_sbindir}/amfclusterstatus +%{_sbindir}/clmapp %if %is_ais_ckpt diff --git a/src/clm/Makefile.am b/src/clm/Makefile.am index be3a668..417dc63 100644 --- a/src/clm/Makefile.am +++ b/src/clm/Makefile.am @@ -85,6 +85,7 @@ noinst_HEADERS += \ src/clm/common/clmsv_enc_dec.h \ src/clm/common/clmsv_msg.h +sbin_PROGRAMS += bin/clmapp osaf_execbin_PROGRAMS += bin/osafclmd bin/osafclmna nodist_pkgclccli_SCRIPTS += \ @@ -165,6 +166,18 @@ dist_bin_SCRIPTS += \ src/clm/tools/clm-find \ src/clm/tools/clm-state +bin_clmapp_CPPFLAGS = \ +-DSA_EXTENDED_NAME_SOURCE \ +$(AM_CPPFLAGS) + +bin_clmapp_SOURCES = \ +src/clm/tools/clm_api_app.c + +bin_clmapp_LDADD = \ +lib/libSaClm.la \ +lib/libopensaf_core.la + + if ENABLE_TESTS bin_PROGRAMS += bin/clmtest diff --git a/src/clm/tools/clm_api_app.c b/src/clm/tools/clm_api_app.c new file mode 100644 index 000..cae27f2 --- /dev/null +++ b/src/clm/tools/clm_api_app.c @@ -0,0 +1,349 @@ +/* -*- OpenSAF -*- + * + * Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. This file and program are licensed + * under the GNU Lesser General Public License Version 2.1, February 1999. + * The complete license can be accessed from the following location: + * https://urldefense.proofpoint.com/v2/url?u=http-3A__opensource.org_licenses_lgpl-2Dlicense.php&d=DwICaQ&c=RoP1YumCXCgaWHvlZYR8PQcxBKCX5YTpkKY057SbK10&r=Lehk1PZKwfDQtYJXNyUKbPAqrw5O--SlPRAF9DIEps4&m=WlWfNt4__h4REFw1hAKezXL8ZHVNlOzgMtMpNjhhWes&s=l2RAg-511WMwwdADnRrm1xBQSVnrb_z3zcohqea1vEA&e= + * See the Copying file included with the OpenSAF distribution for full + * licensing terms. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#define SIZE_NOTIFICATIONS 100 +#define TIME_OUT ((SaTimeT)15
Re: [devel] [PATCH 1/1] clm: increase poll timeout for saClmClusterTrack tests [#2531]
Ack. Thanks Praveen On 21-Jul-17 9:52 AM, Gary Lee wrote: --- src/clm/apitest/tet_saClmClusterTrack.c | 30 +++--- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/clm/apitest/tet_saClmClusterTrack.c b/src/clm/apitest/tet_saClmClusterTrack.c index 82d792729..8a8ca89db 100644 --- a/src/clm/apitest/tet_saClmClusterTrack.c +++ b/src/clm/apitest/tet_saClmClusterTrack.c @@ -528,7 +528,7 @@ void saClmClusterTrack_14(void) fds[0].fd = (int)selectionObject; fds[0].events = POLLIN; - ret = poll(fds, 1, 1000); + ret = poll(fds, 1, 3000); assert(ret == 1); safassert(saClmDispatch(clmHandle, SA_DISPATCH_ALL), SA_AIS_OK); @@ -631,7 +631,7 @@ void saClmClusterTrack_21(void) while (1) { printf("waiting on poll"); - ret = poll(fds, 1, 1000); + ret = poll(fds, 1, 3000); assert(ret == 1); if (fds[0].revents & POLLIN) break; @@ -671,7 +671,7 @@ void saClmClusterTrack_22(void) } while (1) { - ret = poll(fds, 1, 1000); + ret = poll(fds, 1, 3000); assert(ret == 1); if (fds[0].revents & POLLIN) break; @@ -705,7 +705,7 @@ void saClmClusterTrack_23(void) } while (1) { - ret = poll(fds, 1, 1000); + ret = poll(fds, 1, 3000); assert(ret == 1); if (fds[0].revents & POLLIN) break; @@ -741,7 +741,7 @@ void saClmClusterTrack_24(void) printf("thread creation failed"); } while (1) { - ret = poll(fds, 1, 1000); + ret = poll(fds, 1, 3000); assert(ret == 1); if (fds[0].revents & POLLIN) { break; @@ -755,7 +755,7 @@ void saClmClusterTrack_24(void) fds[0].fd = (int)selectionObject; fds[0].events = POLLIN; while (1) { - ret = poll(fds, 1, 1000); + ret = poll(fds, 1, 3000); assert(ret == 1); if (fds[0].revents & POLLIN) { break; @@ -794,7 +794,7 @@ void saClmClusterTrack_25(void) } while (1) { - ret = poll(fds, 1, 1000); + ret = poll(fds, 1, 3000); assert(ret == 1); if (fds[0].revents & POLLIN) { break; @@ -808,7 +808,7 @@ void saClmClusterTrack_25(void) fds[0].fd = (int)selectionObject; fds[0].events = POLLIN; while (1) { - ret = poll(fds, 1, 1000); + ret = poll(fds, 1, 3000); assert(ret == 1); if (fds[0].revents & POLLIN) { break; @@ -847,7 +847,7 @@ void saClmClusterTrack_27(void) } while (1) { - ret = poll(fds, 1, 1000); + ret = poll(fds, 1, 3000); assert(ret == 1); if (fds[0].revents & POLLIN) { break; @@ -861,7 +861,7 @@ void saClmClusterTrack_27(void) fds[0].fd = (int)selectionObject; fds[0].events = POLLIN; while (1) { - ret = poll(fds, 1, 1000); + ret = poll(fds, 1, 3000); assert(ret == 1); if (fds[0].revents & POLLIN) { break; @@ -899,7 +899,7 @@ void saClmClusterTrack_28(void) } while (1) { - ret = poll(fds, 1, 1000); + ret = poll(fds, 1, 3000); assert(ret == 1); if (fds[0].revents & POLLIN) { break; @@ -913,7 +913,7 @@ void saClmClusterTrack_28(void) fds[0].fd = (int)selectionObject; fds[0].events = POLLIN; while (1) { - ret = poll(fds, 1, 1000); + ret = poll(fds, 1, 3000); assert(ret == 1); if (fds[0].revents & POLLIN) { break; @@ -956,7 +956,7 @@ void saClmClusterTrack_31(void) printf("thread creation failed"); } while (1) { - ret = poll(fds, 1, 1000); + ret = poll(fds, 1, 3000); if (ret == 1) { printf("test timed out\n"); fflush(stdout); @@ -974,7 +974,7 @@ void saClmClusterTrack_31(void) fds[0].fd = (int)selectionObject; fds[0].events = POLLIN; while (1) { - ret = poll(fds, 1, 1000); + ret = poll(fds, 1, 3000); if (ret == 1) { printf("test timed out\n"); fflush(stdout); @@ -993,7 +993,7 @@ void saClmClusterTrack_31(void) fds[0].fd = (int)selectionObject; fds[0].events = POLLIN; while (1) { - ret = poll(fds, 1, 1000); + ret =
Re: [devel] [PATCH 1/1] clm: handle ERR_BAD_HANDLE for saImmOmSearchInitialize [#2528]
Ack, code review only. Thanks, Praveen On 14-Jul-17 6:42 PM, Zoran Milinkovic wrote: CLM handles ERR_BAD_HANDLE for saImmOmSearchInitialize in clms_cluster_config_get. As part of this patch, handling of IMM version is improved in the same function. --- src/clm/clmd/clms_imm.c | 18 +- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/clm/clmd/clms_imm.c b/src/clm/clmd/clms_imm.c index 06a7df8..51429ec 100644 --- a/src/clm/clmd/clms_imm.c +++ b/src/clm/clmd/clms_imm.c @@ -432,10 +432,12 @@ SaAisErrorT clms_cluster_config_get(void) SaNameT dn; SaImmAttrValuesT_2 **attributes; const char *className = "SaClmCluster"; + SaVersionT version; TRACE_ENTER(); - (void)immutil_saImmOmInitialize(&imm_om_hdl, NULL, &immVersion); + version = immVersion; + (void)immutil_saImmOmInitialize(&imm_om_hdl, NULL, &version); searchParam.searchOneAttr.attrName = "SaImmAttrClassName"; searchParam.searchOneAttr.attrValueType = SA_IMM_ATTR_SASTRINGT; @@ -446,6 +448,20 @@ SaAisErrorT clms_cluster_config_get(void) SA_IMM_SEARCH_ONE_ATTR | SA_IMM_SEARCH_GET_ALL_ATTR, &searchParam, NULL, &search_hdl); + if (rc == SA_AIS_ERR_BAD_HANDLE) { + // Repeat one more search on ERR_BAD_HANDLE + + // Close the open OM handle, and initialize a new one + (void)immutil_saImmOmFinalize(imm_om_hdl); + version = immVersion; + (void)immutil_saImmOmInitialize(&imm_om_hdl, NULL, &version); + + rc = immutil_saImmOmSearchInitialize_2( + imm_om_hdl, &osaf_cluster->name, SA_IMM_SUBTREE, + SA_IMM_SEARCH_ONE_ATTR | SA_IMM_SEARCH_GET_ALL_ATTR, + &searchParam, NULL, &search_hdl); + } + if (rc != SA_AIS_OK) { LOG_ER("No Object of SaClmCluster Class was found"); goto done1; -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] clm: make CLM tests independent of other CLM tests [#2520]
Ack. Thanks Praveen On 05-Jul-17 8:23 PM, Zoran Milinkovic wrote: The patch removes dependencies between CLM tests. CLM tests can be run more times now. Duplicated CLM tests are removed from clmtest. --- src/clm/apitest/clmtest.c | 15 src/clm/apitest/tet_saClmClusterTrack.c | 130 +++- 2 files changed, 94 insertions(+), 51 deletions(-) diff --git a/src/clm/apitest/clmtest.c b/src/clm/apitest/clmtest.c index 3e8d95e..683bfe4 100644 --- a/src/clm/apitest/clmtest.c +++ b/src/clm/apitest/clmtest.c @@ -38,6 +38,13 @@ SaNameT node_name; void clm_init(void) { FILE *fp; + // Command list to execute before tests start + char *command[] = { + // Unlock PL-3 + "immadm -o 1 safNode=PL-3,safCluster=myClmCluster 2> /dev/null" + }; + int command_list_size = 1; + int i; fp = fopen("/etc/opensaf/node_name", "r"); if (fp == NULL) { @@ -49,6 +56,14 @@ void clm_init(void) if (cnt == 1) node_name.length = strlen((char *)node_name.value); fclose(fp); + + // Execute commands + for(i=0; i const SaVersionT refVersion = CLM_HIGHEST_SUPPORTED_VERSION; diff --git a/src/clm/apitest/tet_saClmClusterTrack.c b/src/clm/apitest/tet_saClmClusterTrack.c index 00a52b4..82d7927 100644 --- a/src/clm/apitest/tet_saClmClusterTrack.c +++ b/src/clm/apitest/tet_saClmClusterTrack.c @@ -25,39 +25,58 @@ SaUint8T trackFlags; SaClmNodeIdT nodeId; SaInvocationT invocation; SaInvocationT lock_inv; +static const char *s_node_name = "safNode=PL-3,safCluster=myClmCluster"; -static void *admin_lock(void *dummy) -{ - int rc; +static int clm_node_lock(const char *nodeName, int ignoreOutput) { char command[256]; - char name[] = "safNode=PL-3,safCluster=myClmCluster"; - sprintf(command, "immadm -o 2 %s", name); - assert((rc = system(command)) != -1); + if (ignoreOutput) { + sprintf(command, "immadm -o 2 %s 2> /dev/null", nodeName); + } else { + sprintf(command, "immadm -o 2 %s", nodeName); + } + return system(command); +} + +static int clm_node_unlock(const char *nodeName, int ignoreOutput) { + char command[256]; + + if (ignoreOutput) { + sprintf(command, "immadm -o 1 %s 2> /dev/null", nodeName); + } else { + sprintf(command, "immadm -o 1 %s", nodeName); + } + return system(command); +} + +static int clm_node_shutdown(const char *nodeName, int ignoreOutput) { + char command[256]; + + if (ignoreOutput) { + sprintf(command, "immadm -o 3 %s 2> /dev/null", nodeName); + } else { + sprintf(command, "immadm -o 3 %s", nodeName); + } + return system(command); +} + +static void *admin_lock(void *dummy) +{ + assert(clm_node_lock(s_node_name, 0) != -1); /*test_validate(WEXITSTATUS(rc), 0);*/ return NULL; } static void *admin_unlock(void *dummy) { - int rc; - char command[256]; - char name[] = "safNode=PL-3,safCluster=myClmCluster"; - - sprintf(command, "immadm -o 1 %s", name); - assert((rc = system(command)) != -1); + assert(clm_node_unlock(s_node_name, 0) != -1); /*test_validate(WEXITSTATUS(rc), 0);*/ return NULL; } static void *admin_shutdown(void *dummy) { - int rc; - char command[256]; - char name[] = "safNode=PL-3,safCluster=myClmCluster"; - - sprintf(command, "immadm -o 3 %s", name); - assert((rc = system(command)) != -1); + assert(clm_node_shutdown(s_node_name, 0) != -1); /*test_validate(WEXITSTATUS(rc), 0);*/ return NULL; } @@ -68,9 +87,15 @@ static void saClmadmin_lock1(void) char command[256]; char name[] = "safNode=PL-3,safCluster=myClmCluster"; + // Lock node + clm_node_lock(name, 1); + sprintf(command, "immadm -o 2 %s", name); assert((rc = system(command)) != -1); test_validate(WEXITSTATUS(rc), 1); + + // Reset CLM state + clm_node_unlock(name, 1); } static void saClmadmin_unlock1(void) @@ -90,9 +115,15 @@ static void saClmadmin_shutdown1(void) char command[256]; char name[] = "safNode=PL-3,safCluster=myClmCluster"; + // Shutdown node + clm_node_shutdown(name, 1); + sprintf(command, "immadm -o 3 %s", name); assert((rc = system(command)) != -1); test_validate(WEXITSTATUS(rc), 1); + + // Reset CLM state + clm_node_unlock(name, 1); } static void *plm_admin_trylock(void *dummy) @@ -610,6 +641,9 @@ void saClmClusterTrack_21(void) safassert(saClmClusterTrackStop(clmHandle), SA_AIS_OK); safassert(saClmFinalize(clmHandle), SA_AIS_OK); test_validate(rc, SA_AIS_OK); + + // Reset CLM state + clm_node_unlock(s_node_name, 1); } void saClmClusterTrack_22(voi
Re: [devel] [PATCH 1/1] amfd: increase msg priority for node ups [#2510]
10514:10514:src/amf/amfd/main.cc:0818] << process_event Jun 20 17:20:45.580850 osafamfd [10514:10514:src/amf/amfd/main.cc:0770] >> process_event: evt->rcv_evt 8 Jun 20 17:20:45.581504 osafamfd [10514:10514:src/amf/amfd/main.cc:0818] << process_event Jun 20 17:20:45.582168 osafamfd [10514:10514:src/amf/amfd/main.cc:0770] >> process_event: evt->rcv_evt 4 Jun 20 17:20:45.584366 osafamfd [10514:10514:src/amf/amfd/main.cc:0818] << process_event Jun 20 17:20:45.585550 osafamfd [10514:10514:src/amf/amfd/main.cc:0770] >> process_event: evt->rcv_evt 8 Jun 20 17:20:45.585754 osafamfd [10514:10514:src/amf/amfd/main.cc:0818] << process_event Jun 20 17:20:45.586324 osafamfd [10514:10514:src/amf/amfd/main.cc:0770] >> process_event: evt->rcv_evt 8 Jun 20 17:20:45.586531 osafamfd [10514:10514:src/amf/amfd/main.cc:0818] << process_event Jun 20 17:20:45.587070 osafamfd [10514:10514:src/amf/amfd/main.cc:0770] >> process_event: evt->rcv_evt 8 Jun 20 17:20:45.587742 osafamfd [10514:10514:src/amf/amfd/main.cc:0818] << process_event Jun 20 17:20:46.048978 osafamfd [10514:10514:src/amf/amfd/main.cc:0770] >> process_event: evt->rcv_evt 4 Jun 20 17:20:46.052609 osafamfd [10514:10514:src/amf/amfd/main.cc:0818] << process_event Jun 20 17:20:46.054629 osafamfd [10514:10514:src/amf/amfd/main.cc:0770] >> process_event: evt->rcv_evt 1 Jun 20 17:20:46.054834 osafamfd [10514:10514:src/amf/amfd/main.cc:0818] << process_event I will send you the full trace file privately. Thanks Gary On 06/07/17 14:42, praveen malviya wrote: Hi Gary, When SC joins back after SC Absence state, there will not be many messages in the queue to be processed even in large cluster also. Also there is no error like ipc send failure. Is AMFD stuck somewhere and it could not take any message from queue? I think raising priority will not solve problem in such a situation. Thanks Praveen On 23-Jun-17 7:18 AM, Gary Lee wrote: --- src/amf/amfd/ndmsg.cc | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/amf/amfd/ndmsg.cc b/src/amf/amfd/ndmsg.cc index 11bc8ac35..9bfab6fd8 100644 --- a/src/amf/amfd/ndmsg.cc +++ b/src/amf/amfd/ndmsg.cc @@ -371,7 +371,12 @@ uint32_t avd_n2d_msg_rcv(AVD_DND_MSG *rcv_msg, NODE_ID node_id, evt->info.avnd_msg = rcv_msg; - if (m_NCS_IPC_SEND(&cb->avd_mbx, evt, NCS_IPC_PRIORITY_HIGH) != + NCS_IPC_PRIORITY priority = NCS_IPC_PRIORITY_HIGH; + if (evt->rcv_evt == AVD_EVT_NODE_UP_MSG) { +priority = NCS_IPC_PRIORITY_VERY_HIGH; + } + + if (m_NCS_IPC_SEND(&cb->avd_mbx, evt, priority) != NCSCC_RC_SUCCESS) { LOG_ER("%s: ncs_ipc_send failed", __FUNCTION__); avsv_dnd_msg_free(rcv_msg); -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] amfd: increase msg priority for node ups [#2510]
Hi Gary, When SC joins back after SC Absence state, there will not be many messages in the queue to be processed even in large cluster also. Also there is no error like ipc send failure. Is AMFD stuck somewhere and it could not take any message from queue? I think raising priority will not solve problem in such a situation. Thanks Praveen On 23-Jun-17 7:18 AM, Gary Lee wrote: --- src/amf/amfd/ndmsg.cc | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/amf/amfd/ndmsg.cc b/src/amf/amfd/ndmsg.cc index 11bc8ac35..9bfab6fd8 100644 --- a/src/amf/amfd/ndmsg.cc +++ b/src/amf/amfd/ndmsg.cc @@ -371,7 +371,12 @@ uint32_t avd_n2d_msg_rcv(AVD_DND_MSG *rcv_msg, NODE_ID node_id, evt->info.avnd_msg = rcv_msg; - if (m_NCS_IPC_SEND(&cb->avd_mbx, evt, NCS_IPC_PRIORITY_HIGH) != + NCS_IPC_PRIORITY priority = NCS_IPC_PRIORITY_HIGH; + if (evt->rcv_evt == AVD_EVT_NODE_UP_MSG) { +priority = NCS_IPC_PRIORITY_VERY_HIGH; + } + + if (m_NCS_IPC_SEND(&cb->avd_mbx, evt, priority) != NCSCC_RC_SUCCESS) { LOG_ER("%s: ncs_ipc_send failed", __FUNCTION__); avsv_dnd_msg_free(rcv_msg); -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] ntfd: Do not stop ntfimcn if surveillance thread has not started [#2508]
Ack. Thanks, Praveen On 05-Jul-17 2:49 PM, Minh Chau wrote: ntfd can sometimes receive SIG_TERM in stopping opensafd even surveillance thread and ntfimcnd have not been started. The patch prevents to stop surveillance thread and ntfimcnd in such case. --- src/ntf/ntfd/ntfs_imcnutil.c | 13 ++--- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/ntf/ntfd/ntfs_imcnutil.c b/src/ntf/ntfd/ntfs_imcnutil.c index cc257703b..00c2c0039 100644 --- a/src/ntf/ntfd/ntfs_imcnutil.c +++ b/src/ntf/ntfd/ntfs_imcnutil.c @@ -48,7 +48,7 @@ typedef struct { bool ntfimcn_on; } init_params_t; -static init_params_t ipar; +static init_params_t ipar = {0, 0, 0, false}; pthread_mutex_t ntfimcn_mutex; /** @@ -357,13 +357,12 @@ int stop_ntfimcn(void) int rc = 0; TRACE_ENTER(); + if (ipar.ntfimcn_on == false) goto done; /* Kill ntfimcn */ osaf_mutex_lock_ordie(&ntfimcn_mutex); - if (ipar.ntfimcn_on == true) { - ipar.ntfimcn_on = false; - TRACE("%s: Terminating osafntfimcnd process", __FUNCTION__); - timedwait_imcn_exit(); - } + ipar.ntfimcn_on = false; + TRACE("%s: Terminating osafntfimcnd process", __FUNCTION__); + timedwait_imcn_exit(); osaf_mutex_unlock_ordie(&ntfimcn_mutex); /* Cancel the surveillance thread */ @@ -377,7 +376,7 @@ int stop_ntfimcn(void) rc = pthread_mutex_destroy(&ntfimcn_mutex); if (rc != 0) osaf_abort(rc); - +done: TRACE_LEAVE(); return rc; } -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] ntfd: Ensure mutex is not taken after cnsurvail_thread is canceled V2 [#2508]
Ack, code review only. Thanks Praveen On 03-Jul-17 6:04 AM, minh chau wrote: Hi Lennart, Praveen If we don't have any comments, I would like to push the patch today Thanks, Minh On 28/06/17 14:32, Minh Chau wrote: In the scenario of shutting down SC while SC switchover is on going, ntfd coredump is generated due to failure of pthread_mutex_destroy() with errorcode:16(EBUSY). That means the mutex had been taken and was not unlocked at the time phtread_mutex_destroy() is called. This patch changes the way ntfd stops ntfimcn and cnsruvail_thread() so that the cnsurvai_thread does not restart ntfimcn in stop sequence. Therefore, when cnsurval_thread receives cancellation request, this thread does not do anything that may lead to cancellation point with a locked mutex. --- src/ntf/ntfd/ntfs_imcnutil.c | 58 +--- 1 file changed, 39 insertions(+), 19 deletions(-) diff --git a/src/ntf/ntfd/ntfs_imcnutil.c b/src/ntf/ntfd/ntfs_imcnutil.c index dd27a255c..cc257703b 100644 --- a/src/ntf/ntfd/ntfs_imcnutil.c +++ b/src/ntf/ntfd/ntfs_imcnutil.c @@ -44,6 +44,8 @@ typedef struct { SaAmfHAStateT ha_state; pid_t pid; pthread_t thread; +/* ntfimcn functionality: true(enabled), false(disabled) */ +bool ntfimcn_on; } init_params_t; static init_params_t ipar; @@ -240,8 +242,17 @@ static void *cnsurvail_thread(void *_init_params) while (1) { osaf_mutex_lock_ordie(&ntfimcn_mutex); -pid = create_imcnprocess(ipar->ha_state); -ipar->pid = pid; +/* Only start ntfimcn process if this functionality is + * enabled, this is to avoid restarting ntfimcn when ntfd + * receives SIGTERM (shutting down) + * NOTE: Do not add any code outside below *if @ntfimcn_on* + * block that may lead to a thread cancellation point while + * ntfimcn_mutex is being locked + */ +if (ipar->ntfimcn_on == true) { +pid = create_imcnprocess(ipar->ha_state); +ipar->pid = pid; +} osaf_mutex_unlock_ordie(&ntfimcn_mutex); /* Wait for child process to exit */ @@ -271,7 +282,8 @@ static void *cnsurvail_thread(void *_init_params) /** * Start the imcn process surveillance thread - * + * When surveillance thread is running, this thread + * will start and monitor ntfimcn process in cnsurvail_thread() * @param ha_state[in] */ static void start_cnprocess(SaAmfHAStateT ha_state) @@ -285,7 +297,8 @@ static void start_cnprocess(SaAmfHAStateT ha_state) osaf_abort(rc); ipar.ha_state = ha_state; - +ipar.ntfimcn_on = true; +ipar.pid = 0; rc = pthread_create(&ipar.thread, NULL, cnsurvail_thread, (void *)&ipar); if (rc != 0) @@ -330,33 +343,40 @@ void handle_state_ntfimcn(SaAmfHAStateT ha_state) } /** - * Cancel the surveillance trhead and kill the imcn process. + * This function stops functionality of ntfimcn by: + * First: Kill imcn process + * Second: Cancel the surveillance thread + * (in reversed order of start ntfimcn) * Use the pid and thread id saved when the process was started * This will terminate the process permanently. * - * @return -1 if error + * @return 0 if success, abort() on any error */ int stop_ntfimcn(void) { -void *join_ret; int rc = 0; TRACE_ENTER(); -if (ipar.ha_state != 0) { -TRACE("%s: Cancel the imcn surveillance thread", __FUNCTION__); -rc = pthread_cancel(ipar.thread); -if (rc != 0) -osaf_abort(rc); -rc = pthread_join(ipar.thread, &join_ret); -if (rc != 0) -osaf_abort(rc); -rc = pthread_mutex_destroy(&ntfimcn_mutex); -if (rc != 0) -osaf_abort(rc); - +/* Kill ntfimcn */ +osaf_mutex_lock_ordie(&ntfimcn_mutex); +if (ipar.ntfimcn_on == true) { +ipar.ntfimcn_on = false; TRACE("%s: Terminating osafntfimcnd process", __FUNCTION__); timedwait_imcn_exit(); } +osaf_mutex_unlock_ordie(&ntfimcn_mutex); + +/* Cancel the surveillance thread */ +TRACE("%s: Cancel the imcn surveillance thread", __FUNCTION__); +rc = pthread_cancel(ipar.thread); +if (rc != 0) +osaf_abort(rc); +rc = pthread_join(ipar.thread, NULL); +if (rc != 0) +osaf_abort(rc); +rc = pthread_mutex_destroy(&ntfimcn_mutex); +if (rc != 0) +osaf_abort(rc); TRACE_LEAVE(); return rc; -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 2/3] amf: support SC status change callback, non SAF [#2475]
Hi Gary, Thanks for the reviewing the patches. Please see inline with [Praveen]. Thanks, Praveen On 28-Jun-17 12:56 PM, Gary Lee wrote: I think currently the callback is called in the context of the agent MDS thread? This could introduce threading issues to an application as they normally expect callbacks to be executed in the thread calling Dispatch(). [Praveen] Yes, callback will be invoked in the context of MDS thread. But in AMFA, it is protected by a lock. So there will not be a deadlock situation. I had also thought of invoking the callback from Dispatch(), but I considered following points for not going for that: 1) An application will have to call two extra APIs SelectionObjectGet() and Dispatch(). With invocation from Dispatch(), any AMF application which is already an AMF component will easily get integrated. At the same time without Dispatch() makes it easier to integrate with application which do not want to become AMF component. 2)No new SAF version. I think what can be done is one more API for installation of the callback which will take SaAmfHandle as input. Also it can still be handles without upgrading SAF version: SaAisErrorT osafAmfInstallSCStatusChangeCallback_2(SaAmfHandleT amfHandle, void (*OsafAmfSCStatusChangeCallbackT)(OsafAmfSCStatusT status)); If an application uses this API to install callback, then AMF will post callback in the mailbox associated with the passed handle to be processed from Dispatch(). This will be useful for applications which are AMF component. Non AMF component applications can use already published API. Attached is the patch 2475_4.patch which introduces this API and also changes the current demo (I will write a new demo for this). With these two installation/registration APIs a user has the flexibility of using the callback. Note: There are some TODOs in 2475_4.patch. Also I will refine the patch 4. Thanks, Praveen On 28/6/17, 5:16 pm, "Gary Lee" wrote: Hi Praveen Is it possible to make the callback to be triggered from saAmfDispatch()? Thanks Gary On 28/6/17, 5:06 pm, "Gary Lee" wrote: Hi Praveen Ack for the series (with 2 comments). One very minor comment below, the other is in Patch 3. Thanks Gary On 1/6/17, 9:46 pm, "Praveen" wrote: * A callback that will be invoked by AMFA whenever a SC joins cluster and both SCs leaves cluster if SC Absence feature is enabled. Callback and its argument: void (*OsafAmfSCStatusChangeCallbackT)(OsafAmfSCStatusT state) where OsafAmfSCStatusT is defined as: typedef enum { OSAF_AMF_SC_PRESENT = 1, OSAF_AMF_SC_ABSENT = 2, } OsafAmfSCStatusT; This callback can be integrated with standard AMF component (even with legacy one also). * An API to register/install above callback function: void osafAmfInstallSCStatusChangeCallback( void (*OsafAmfSCStatusChangeCallbackT)(OsafAmfSCStatusT status)); --- src/amf/agent/ava_mds.cc | 35 ++- src/amf/agent/ava_mds.h | 6 -- src/amf/agent/ava_op.cc | 5 + src/amf/amfnd/avnd_mds.h | 2 +- src/amf/amfnd/di.cc | 39 ++- src/amf/amfnd/mds.cc | 5 - src/amf/common/amf_amfparam.h | 7 +++ src/amf/common/amf_n2avamsg.h | 1 + src/amf/saf/libSaAmf.map | 1 + src/amf/saf/saAmf_B_04_02.h | 8 10 files changed, 103 insertions(+), 6 deletions(-) diff --git a/src/amf/agent/ava_mds.cc b/src/amf/agent/ava_mds.cc index 54a0cfa..42bc139 100644 --- a/src/amf/agent/ava_mds.cc +++ b/src/amf/agent/ava_mds.cc @@ -60,7 +60,8 @@ static uint32_t dec_csi_attr_change_msg(NCS_UBAID *uba, AVSV_NDA_AVA_MSG *msg); static const MDS_CLIENT_MSG_FORMAT_VER ava_avnd_msg_fmt_map_table[AVA_AVND_SUBPART_VER_MAX] = { -AVSV_AVND_AVA_MSG_FMT_VER_1, AVSV_AVND_AVA_MSG_FMT_VER_2}; +AVSV_AVND_AVA_MSG_FMT_VER_1, AVSV_AVND_AVA_MSG_FMT_VER_2, +AVSV_AVND_AVA_MSG_FMT_VER_3}; /** * function called when MDS down for avnd (AMF) is received @@ -68,6 +69,26 @@ static const MDS_CLIENT_MSG_FORMAT_VER */ static void (*amf_down_cb)(void); +/** + * @brief SC status chan
Re: [devel] [PATCH 0/5] Review Request for ntf: ntfimcn does not handle SA_ERR_UNAVAILABLE [#2506]
Hi Lennart, NTFIMCN initializes with IMM with SAF version A.02.12. IMM integration with CLM is done in last release and for that SAF version is A.02.18 (src/imm/README mentions it). Like any legacy application, NTFIMCN should not get ERR_UNAVAILABLE from IMM for any API call. Thanks, Praveen On 28-Jun-17 8:48 PM, Lennart Lund wrote: Summary: ntf: ntfimcn does not handle SA_ERR_UNAVAILABLE [#2506] Review request for Ticket(s): 2506 Peer Reviewer(s): praveen.malv...@oracle.com; minh.c...@dektech.com.au Pull request to: *** LIST THE PERSON WITH PUSH ACCESS HERE *** Affected branch(es): develop Development branch: ticket-2506 Base revision: f089f030a322a43c79f3f259f07a4c42bb4d0da1 Personal repository: git://git.code.sf.net/u/elunlen/review Impacted area Impact y/n Docsn Build systemn RPM/packaging n Configuration files n Startup scripts n SAF servicesy OpenSAF servicesn Core libraries n Samples n Tests n Other n Comments (indicate scope for each "y" above): - *** EXPLAIN/COMMENT THE PATCH SERIES HERE *** revision beae5c3dacaa05fe68b50433251947d8c045cca7 Author: Lennart Lund Date: Wed, 28 Jun 2017 17:09:23 +0200 ntf: ntfimcn does not handle SA_ERR_UNAVAILABLE [#2506] Changed according to comment from Minh. OM Handle is now initialized only when imcn process start and is reinitialized if SA_AIS_ERR_UNAVAILABLE Also comment about resource handling in case of error in get_rdn_attr_name() revision 7c3bc31dda6099becf7f246093d0bd7b0d652340 Author: Lennart Lund Date: Wed, 28 Jun 2017 16:49:04 +0200 ntf: ntfimcn does not handle SA_ERR_UNAVAILABLE [#2506] Changed according to comment from Minh. OM Handle is now initialized only when imcn process start and is reinitialized if SA_AIS_ERR_UNAVAILABLE Also comment about resource handling in case of error in get_rdn_attr_name() long_description revision caa854ce873f90208a5a14894b51c997e25a924c Author: Lennart Lund Date: Wed, 28 Jun 2017 16:49:04 +0200 ntf: ntfimcn does not handle SA_ERR_UNAVAILABLE [#2506] In ntfimcn the OM handle shall have a short lifespan. Change from creating a handle once when ntfimcn process starts to create a handle each time it is needed and finalize when no longer needed. revision 0db3b249ffa8d00b05cc7aefecb1d422c30c8faf Author: Lennart Lund Date: Wed, 28 Jun 2017 16:49:04 +0200 ntf: ntfimcn does not handle SA_ERR_UNAVAILABLE [#2506] In ntfimcn the OM handle shall have a short lifespan. Change from creating a handle once when ntfimcn process starts to create a handle each time it is needed and finalize when no longer needed. Change start handling of ntfimcn (in ntf process) so the ntfimcn process is started on the active node only since the ntfimcn process is not doing anything on the standby node. Refactor/simplify code accordingly. revision 71763df94b6d58d6e553fa26cc41dbd7cb7d264a Author: Lennart Lund Date: Tue, 27 Jun 2017 16:05:44 +0200 ntf: ntfimcn does not handle SA_ERR_UNAVAILABLE [#2506] In ntfimcn the OM handle shall have a short lifespan. Change from creating a handle once when ntfimcn process starts to create a handle each time it is needed and finalize when no longer needed. revision 8f43f5d28b282812926d5a5bad29e604c76e4697 Author: Lennart Lund Date: Mon, 26 Jun 2017 14:03:38 +0200 ntf: ntfimcn does not handle SA_ERR_UNAVAILABLE [#2506] In ntfimcn the OM handle shall have a short lifespan. Change from creating a handle once when ntfimcn process starts to create a handle each time it is needed and finalize when no longer needed. Change start handling of ntfimcn (in ntf process) so the ntfimcn process is started on the active node only since the ntfimcn process is not doing anything on the standby node. Refactor/simplify code accordingly. Complete diffstat: -- src/ntf/ntfimcnd/ntfimcn_imm.c | 205 src/ntf/ntfimcnd/ntfimcn_imm.h | 11 ++- src/ntf/ntfimcnd/ntfimcn_main.h | 2 +- 3 files changed, 158 insertions(+), 60 deletions(-) Testing Commands: - *** LIST THE COMMAND LINE TOOLS/STEPS TO TEST YOUR CHANGES *** Testing, Expected Results: -- *** PASTE COMMAND OUTPUTS / TEST RESULTS *** Conditions of Submission: - *** HOW MANY DAYS BEFORE PUSHING, CONSENSUS ETC *** Arch Built StartedLinux distro --- mipsn n mips64 n n x86 n n x86_64 n n powerpc n n powerpc64 n n Reviewer Checklist: --- [Submitters: make sure that your review doesn't trigger any checkmarks!] Your checkin has not passed review because (see checked entries
Re: [devel] [PATCH 1/1] amfd: Do not log warning when create (or delete) a existed(or nonexisted) SUSI [#2467]
Ack, not tested. Thanks, Praveen On 27-Jun-17 2:17 PM, minh chau wrote: Hi Praveen, I had the same thought as yours, but in case lock then unlock , Create_sync (from unlock) gets ERR_EXIST because the previous Delete_sync(from lock) first fails and is still in queue, then we will not create the object. This patch doesn't mean to change any logic, it just doesn't make the log confusing Thanks, Minh On 27/06/17 18:33, praveen malviya wrote: Hi Minh, One comment inline with [ Praveen]. Thanks Praveen On 25-May-17 12:53 PM, Minh Chau wrote: --- src/amf/amfd/imm.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/amf/amfd/imm.cc b/src/amf/amfd/imm.cc index 7b1aa333e..26faffcb5 100644 --- a/src/amf/amfd/imm.cc +++ b/src/amf/amfd/imm.cc @@ -1893,7 +1893,7 @@ void avd_saImmOiRtObjectCreate_sync(const std::string &className, rc = saImmOiRtObjectCreate_2(avd_cb->immOiHandle, const_cast(className.c_str()), parent_name, attrValues); -if (rc != SA_AIS_OK) { +if (rc != SA_AIS_OK && rc != SA_AIS_ERR_EXIST) { LOG_WA("saImmOiRtObjectCreate_2 of className:'%s', parentName:'%s'," " failed with %u", className.c_str(), parentName.c_str(), rc); } [Praveen] if return code is ERR_EXIST, then it means RT object exists in IMM. In such a situation second if block in this function should not push it in job queue. @@ -1946,7 +1946,7 @@ void avd_saImmOiRtObjectDelete_sync(const std::string &dn) { if (isImmReady == true) { rc = saImmOiRtObjectDelete_o3(avd_cb->immOiHandle, dn.c_str()); -if (rc != SA_AIS_OK) { +if (rc != SA_AIS_OK && rc != SA_AIS_ERR_NOT_EXIST) { LOG_WA("saImmOiRtObjectDelete_o3 of '%s' failed with %u", dn.c_str(), rc); } } -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] clmtest: update non-member node_id for new test environment [#2512]
Ack. Thanks, Praveen On 27-Jun-17 3:09 PM, Hoang Vo wrote: clmtest 7 7 check saClmClusterNodeGet with non-member node, previously designed as 0x2060F. change test node_id to 0x2990F to avoid future conflict when test environment is upgraded. --- src/clm/apitest/tet_saClmClusterNodeGet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/clm/apitest/tet_saClmClusterNodeGet.c b/src/clm/apitest/tet_saClmClusterNodeGet.c index 51683f9..c26939d 100644 --- a/src/clm/apitest/tet_saClmClusterNodeGet.c +++ b/src/clm/apitest/tet_saClmClusterNodeGet.c @@ -126,7 +126,7 @@ void saClmClusterNodeGet_06(void) void saClmClusterNodeGet_07(void) { - nodeId = 132623; /*node is non member*/ + nodeId = 170255; /*node is non member, 0x2990F*/ safassert(saClmInitialize(&clmHandle, &clmCallbacks_1, &clmVersion_1), SA_AIS_OK); rc = saClmClusterNodeGet(clmHandle, nodeId, timeout, &clusterNode_1); @@ -134,7 +134,7 @@ void saClmClusterNodeGet_07(void) /*test_validate(rc, SA_AIS_ERR_UNAVAILABLE);*/ test_validate(rc, SA_AIS_ERR_NOT_EXIST); - nodeId = 132623; + nodeId = 170255; safassert(saClmInitialize_4(&clmHandle, &clmCallbacks_4, &clmVersion_4), SA_AIS_OK); rc = saClmClusterNodeGet_4(clmHandle, nodeId, timeout, &clusterNode_4); -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] amfnd: retry on ERR_NOT_EXIST [#2490]
Ack, code review only. Thanks Praveen On 28-Jun-17 6:48 AM, Gary Lee wrote: Hi I would like to push this patch on Friday if there are no objections. For the medium term, we should consider making AMFND an applier to avoid these issues? Thanks Gary On 15/6/17, 5:41 pm, "Gary Lee" wrote: On a congested network, sometimes a newly created IMM object can take some time to be available on other nodes. In our test, a new SU is created on SC-1 and unlocked. But sometimes it fails on a remote node due to: 2017-05-19 13:55:19 SC-2 osafamfnd[258]: ER amf_saImmOmSearchInitialize_o2 failed: 12 To get around this, we will retry on SA_AIS_ERR_NOT_EXIST a few times. --- src/amf/amfnd/util.cc | 15 +++ 1 file changed, 15 insertions(+) diff --git a/src/amf/amfnd/util.cc b/src/amf/amfnd/util.cc index ed0905ce2..bca642eac 100644 --- a/src/amf/amfnd/util.cc +++ b/src/amf/amfnd/util.cc @@ -38,6 +38,9 @@ #include #include "osaf/configmake.h" #include "amf/amfnd/avnd.h" +#include "base/osaf_time.h" + +extern struct ImmutilWrapperProfile immutilWrapperProfile; const char *presence_state[] = { "OUT_OF_RANGE", "UNINSTANTIATED", "INSTANTIATING", @@ -335,6 +338,18 @@ SaAisErrorT amf_saImmOmSearchInitialize_o2( scope, searchOptions, searchParam, attributeNames, &searchHandle); } + } else if (rc == SA_AIS_ERR_NOT_EXIST) { +// it is possible for 'rootName' to be not yet available +// at the local immnd. Retry a few times to allow CCB to be propagated. +unsigned int nTries = 1; +while (rc == SA_AIS_ERR_NOT_EXIST && + nTries < immutilWrapperProfile.nTries) { + osaf_nanosleep(&kHundredMilliseconds); + rc = immutil_saImmOmSearchInitialize_o2(immHandle, rootName.c_str(), +scope, searchOptions, searchParam, +attributeNames, &searchHandle); + nTries++; +} } return rc; } -- 2.11.0 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] amfd: Do not log warning when create (or delete) a existed(or nonexisted) SUSI [#2467]
Hi Minh, One comment inline with [ Praveen]. Thanks Praveen On 25-May-17 12:53 PM, Minh Chau wrote: --- src/amf/amfd/imm.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/amf/amfd/imm.cc b/src/amf/amfd/imm.cc index 7b1aa333e..26faffcb5 100644 --- a/src/amf/amfd/imm.cc +++ b/src/amf/amfd/imm.cc @@ -1893,7 +1893,7 @@ void avd_saImmOiRtObjectCreate_sync(const std::string &className, rc = saImmOiRtObjectCreate_2(avd_cb->immOiHandle, const_cast(className.c_str()), parent_name, attrValues); -if (rc != SA_AIS_OK) { +if (rc != SA_AIS_OK && rc != SA_AIS_ERR_EXIST) { LOG_WA("saImmOiRtObjectCreate_2 of className:'%s', parentName:'%s'," " failed with %u", className.c_str(), parentName.c_str(), rc); } [Praveen] if return code is ERR_EXIST, then it means RT object exists in IMM. In such a situation second if block in this function should not push it in job queue. @@ -1946,7 +1946,7 @@ void avd_saImmOiRtObjectDelete_sync(const std::string &dn) { if (isImmReady == true) { rc = saImmOiRtObjectDelete_o3(avd_cb->immOiHandle, dn.c_str()); -if (rc != SA_AIS_OK) { +if (rc != SA_AIS_OK && rc != SA_AIS_ERR_NOT_EXIST) { LOG_WA("saImmOiRtObjectDelete_o3 of '%s' failed with %u", dn.c_str(), rc); } } -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] ntf: Test cases fail on SC nodes [#2505]
If rdegetrole does not exist (on payload) then also else block will be excuted and else block is for payload. So present patch will always work. Thanks, Praveen On 27-Jun-17 12:50 PM, Lennart Lund wrote: Hi Praveen I removed the check if rdegetrole exist on the node but I think it is a good idea to still have this check. This means that the check if we are on a payload node should be to first check if rdegetrole exist and if it does then check if the return code is Fail. Thanks Lennart -Original Message- From: praveen malviya [mailto:praveen.malv...@oracle.com] Sent: den 27 juni 2017 08:29 To: Lennart Lund Cc: opensaf-devel@lists.sourceforge.net Subject: Re: [PATCH 1/1] ntf: Test cases fail on SC nodes [#2505] Ack. Thanks Praveen On 26-Jun-17 8:35 PM, Lennart Lund wrote: Fix incorrect detection of node type and misleading information to the user of ntftest --- src/ntf/apitest/tet_ntf_clm.c | 14 -- 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/ntf/apitest/tet_ntf_clm.c b/src/ntf/apitest/tet_ntf_clm.c index 5b1d8c6..0f2c7d0 100644 --- a/src/ntf/apitest/tet_ntf_clm.c +++ b/src/ntf/apitest/tet_ntf_clm.c @@ -444,13 +444,13 @@ __attribute__((constructor)) static void ntf_clm_constructor(void) // printf("lock_cmd:'%s'\n",lock_cmd); // printf("unlock_cmd:'%s'\n",unlock_cmd); - // Add these test cases on other than active controller. + // The following tests are added only if not running on an Active + // controller node int rc = 0; char role[80]; - rc = system("which rdegetrole"); + rc = system("rdegetrole"); if (rc == 0) { - printf("This is a controller node\n"); - // Command rdegetrole exists means a controller. + // Command rdegetrole returning OK means controller node. memset(buffer, '\0', sizeof(buffer)); memset(role, '\0', sizeof(role)); strcpy(buffer, "rdegetrole"); @@ -459,14 +459,16 @@ __attribute__((constructor)) static void ntf_clm_constructor(void) if ((ptr = strchr(role, '\n')) != NULL) *ptr = '\0'; if (!strcmp((char *)role, "ACTIVE")) { - // printf("Active controller node\n"); + printf("Active controller node. " + "Do not run CLM tests\n"); pclose(fp); return; } + printf("Standby controller node. Run CLM tests\n"); } pclose(fp); } else { - printf("This is a payload node\n"); + printf("Payload node. Run CLM tests\n"); } test_suite_add(40, "Ntf CLM Integration test suite\n"); -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] ntf: Test cases fail on SC nodes [#2505]
Ack. Thanks Praveen On 26-Jun-17 8:35 PM, Lennart Lund wrote: Fix incorrect detection of node type and misleading information to the user of ntftest --- src/ntf/apitest/tet_ntf_clm.c | 14 -- 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/ntf/apitest/tet_ntf_clm.c b/src/ntf/apitest/tet_ntf_clm.c index 5b1d8c6..0f2c7d0 100644 --- a/src/ntf/apitest/tet_ntf_clm.c +++ b/src/ntf/apitest/tet_ntf_clm.c @@ -444,13 +444,13 @@ __attribute__((constructor)) static void ntf_clm_constructor(void) // printf("lock_cmd:'%s'\n",lock_cmd); // printf("unlock_cmd:'%s'\n",unlock_cmd); - // Add these test cases on other than active controller. + // The following tests are added only if not running on an Active + // controller node int rc = 0; char role[80]; - rc = system("which rdegetrole"); + rc = system("rdegetrole"); if (rc == 0) { - printf("This is a controller node\n"); - // Command rdegetrole exists means a controller. + // Command rdegetrole returning OK means controller node. memset(buffer, '\0', sizeof(buffer)); memset(role, '\0', sizeof(role)); strcpy(buffer, "rdegetrole"); @@ -459,14 +459,16 @@ __attribute__((constructor)) static void ntf_clm_constructor(void) if ((ptr = strchr(role, '\n')) != NULL) *ptr = '\0'; if (!strcmp((char *)role, "ACTIVE")) { - // printf("Active controller node\n"); + printf("Active controller node. " + "Do not run CLM tests\n"); pclose(fp); return; } + printf("Standby controller node. Run CLM tests\n"); } pclose(fp); } else { - printf("This is a payload node\n"); + printf("Payload node. Run CLM tests\n"); } test_suite_add(40, "Ntf CLM Integration test suite\n"); -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] amfd: Accept ERR_NOT_EXIST on stopping track callback [#2469]
Ack, code review only. Thanks, Praveen On 26-Jun-17 9:25 AM, Minh Chau wrote: During switchover, standby amfd tries to stop clm tracking, amfd first got ERR_TIMEOUT and second tries got ERR_NOT_EXIST. In CLM spec, ERR_TIMEOUT return means the stop clm tracking may or may not be successful. If the first call doesn't succeed, the second call will be OK. In the scope of this ticket, the first already succeeded, therefore amfd got ERR_NOT_EXIST. Note that ERR_NOT_EXIST doesn't mean that the CLM handle is invalid (or BAD HANDLE), thus this error code can be ignored since standby amfd doesn't need to track clm anymore. --- src/amf/amfd/clm.cc | 4 src/amf/amfd/role.cc | 5 +++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/amf/amfd/clm.cc b/src/amf/amfd/clm.cc index 4f69d4a58..1b451e9e7 100644 --- a/src/amf/amfd/clm.cc +++ b/src/amf/amfd/clm.cc @@ -495,6 +495,10 @@ SaAisErrorT avd_clm_track_stop(void) { if (error == SA_AIS_ERR_TRY_AGAIN || error == SA_AIS_ERR_TIMEOUT || error == SA_AIS_ERR_UNAVAILABLE) { LOG_WA("Failed to stop cluster tracking %u", error); +} else if (error == SA_AIS_ERR_NOT_EXIST) { + /* track changes was not started or stopped successfully */ + LOG_WA("Failed to stop cluster tracking %u", error); + avd_cb->is_clm_track_started = false; } else { LOG_ER("Failed to stop cluster tracking %u", error); } diff --git a/src/amf/amfd/role.cc b/src/amf/amfd/role.cc index 85cde7fb7..ec13c3bd8 100644 --- a/src/amf/amfd/role.cc +++ b/src/amf/amfd/role.cc @@ -1105,7 +1105,7 @@ uint32_t amfd_switch_actv_qsd(AVD_CL_CB *cb) { uint32_t amfd_switch_qsd_stdby(AVD_CL_CB *cb) { uint32_t status = NCSCC_RC_SUCCESS; - + SaAisErrorT ais_rc; TRACE_ENTER(); LOG_NO("Switching Quiesced --> StandBy"); @@ -1139,7 +1139,8 @@ uint32_t amfd_switch_qsd_stdby(AVD_CL_CB *cb) { } if (cb->is_clm_track_started == true) { -if (avd_clm_track_stop() != SA_AIS_OK) { +ais_rc = avd_clm_track_stop(); +if (ais_rc != SA_AIS_OK && ais_rc != SA_AIS_ERR_NOT_EXIST) { LOG_ER("Failed to stop cluster tracking after switch over"); } } -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] amfnd: retry on ERR_NOT_EXIST [#2490]
Hi Gary, Is there any ticket in IMM for any related issue? Both creation of SU and admin operation on it will go through IMM only. I do not know how IMM handles it and how IMM spec talks about it. Does IMM spec allow admin operation on a entity for which all synced IMMNDS (leaving non synced IMMNDS on the nodes joining the cluster) are not updated? Thanks, Praveen On 15-Jun-17 1:11 PM, Gary Lee wrote: On a congested network, sometimes a newly created IMM object can take some time to be available on other nodes. In our test, a new SU is created on SC-1 and unlocked. But sometimes it fails on a remote node due to: 2017-05-19 13:55:19 SC-2 osafamfnd[258]: ER amf_saImmOmSearchInitialize_o2 failed: 12 To get around this, we will retry on SA_AIS_ERR_NOT_EXIST a few times. --- src/amf/amfnd/util.cc | 15 +++ 1 file changed, 15 insertions(+) diff --git a/src/amf/amfnd/util.cc b/src/amf/amfnd/util.cc index ed0905ce2..bca642eac 100644 --- a/src/amf/amfnd/util.cc +++ b/src/amf/amfnd/util.cc @@ -38,6 +38,9 @@ #include #include "osaf/configmake.h" #include "amf/amfnd/avnd.h" +#include "base/osaf_time.h" + +extern struct ImmutilWrapperProfile immutilWrapperProfile; const char *presence_state[] = { "OUT_OF_RANGE", "UNINSTANTIATED", "INSTANTIATING", @@ -335,6 +338,18 @@ SaAisErrorT amf_saImmOmSearchInitialize_o2( scope, searchOptions, searchParam, attributeNames, &searchHandle); } + } else if (rc == SA_AIS_ERR_NOT_EXIST) { +// it is possible for 'rootName' to be not yet available +// at the local immnd. Retry a few times to allow CCB to be propagated. +unsigned int nTries = 1; +while (rc == SA_AIS_ERR_NOT_EXIST && + nTries < immutilWrapperProfile.nTries) { + osaf_nanosleep(&kHundredMilliseconds); + rc = immutil_saImmOmSearchInitialize_o2(immHandle, rootName.c_str(), +scope, searchOptions, searchParam, +attributeNames, &searchHandle); + nTries++; +} } return rc; } -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] clmtest: correct independent test cases [#2497]
Ack. Thanks Praveen On 15-Jun-17 2:01 PM, Hoang Vo wrote: clmtest 10 3 and 10 4 does not initialize its data struct but reuse global one lead to failure result when run alone. --- src/clm/apitest/tet_saClmClusterNotificationFree.c | 12 1 file changed, 12 insertions(+) diff --git a/src/clm/apitest/tet_saClmClusterNotificationFree.c b/src/clm/apitest/tet_saClmClusterNotificationFree.c index 5ce002a..22faf15 100644 --- a/src/clm/apitest/tet_saClmClusterNotificationFree.c +++ b/src/clm/apitest/tet_saClmClusterNotificationFree.c @@ -53,6 +53,12 @@ void saClmClusterNotificationFree_02(void) void saClmClusterNotificationFree_03(void) { + notificationBuffer_4.numberOfItems = 1; + notificationBuffer_4.notification = + (SaClmClusterNotificationT_4 *)malloc( + sizeof(SaClmClusterNotificationT_4) * + notificationBuffer_4.numberOfItems); + rc = saClmClusterNotificationFree_4(0, notificationBuffer_4.notification); test_validate(rc, SA_AIS_ERR_BAD_HANDLE); @@ -62,6 +68,12 @@ void saClmClusterNotificationFree_03(void) void saClmClusterNotificationFree_04(void) { + notificationBuffer_4.numberOfItems = 1; + notificationBuffer_4.notification = + (SaClmClusterNotificationT_4 *)malloc( + sizeof(SaClmClusterNotificationT_4) * + notificationBuffer_4.numberOfItems); + rc = saClmClusterNotificationFree_4(-1, notificationBuffer_4.notification); test_validate(rc, SA_AIS_ERR_BAD_HANDLE); -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/3] amfnd: Refactor AVND_COMP for simpler environment variable handling [#1945]
Ack for the series. Thanks PRaveen On 13-Jun-17 4:54 PM, Hans Nordeback wrote: --- src/amf/amfnd/avnd_comp.h | 134 ++ src/amf/amfnd/avnd_err.h | 4 +- src/amf/amfnd/avnd_hc.h | 2 +- src/amf/amfnd/avnd_proc.h | 10 ++-- src/amf/amfnd/avnd_util.h | 2 +- src/amf/amfnd/clc.cc | 125 -- src/amf/amfnd/comp.cc | 35 src/amf/amfnd/compdb.cc | 36 +++-- src/amf/amfnd/proxydb.cc | 2 +- 9 files changed, 138 insertions(+), 212 deletions(-) diff --git a/src/amf/amfnd/avnd_comp.h b/src/amf/amfnd/avnd_comp.h index 611e90e11..68de4cc8e 100644 --- a/src/amf/amfnd/avnd_comp.h +++ b/src/amf/amfnd/avnd_comp.h @@ -31,6 +31,8 @@ #define AMF_AMFND_AVND_COMP_H_ #include +#include +#include struct avnd_cb_tag; struct avnd_su_si_rec; @@ -57,7 +59,7 @@ struct avnd_srm_req_tag; /* clc event handler declaration */ typedef uint32_t (*AVND_COMP_CLC_FSM_FN)(struct avnd_cb_tag *, - struct avnd_comp_tag *); + AVND_COMP *); /* clc fsm events */ typedef enum avnd_comp_clc_pres_fsm_ev { @@ -141,7 +143,7 @@ typedef struct avnd_cbk_tag { AVSV_AMF_CBK_INFO *cbk_info; /* callbk info */ /* link to other elements */ - struct avnd_comp_tag *comp; /* bk ptr to the comp */ + AVND_COMP *comp; /* bk ptr to the comp */ struct avnd_cbk_tag *next; std::string comp_name; /* For checkpointing */ } AVND_COMP_CBK; @@ -182,7 +184,7 @@ typedef struct avnd_comp_csi_rec { wrt prv ha state */ /* links to other entities */ - struct avnd_comp_tag *comp; /* bk ptr to the comp */ + AVND_COMP *comp; /* bk ptr to the comp */ struct avnd_su_si_rec *si; /* bk ptr to the si record */ std::string comp_name; /* For Checkpointing */ std::string si_name;/* For Checkpointing */ @@ -256,7 +258,7 @@ typedef struct avnd_hc_rec_tag { uint32_t opq_hdl; /* hdl returned by hdl-mngr (used during tmr expiry) */ AVND_COMP_HC_STATUS status; /* indicates status of hc rec */ - struct avnd_comp_tag *comp; /* back ptr to the comp */ + AVND_COMP *comp; /* back ptr to the comp */ struct avnd_hc_rec_tag *next; std::string comp_name; /* For checkpoiting */ } AVND_COMP_HC_REC; @@ -278,7 +280,7 @@ typedef struct avnd_pm_rec { } rec_rcvr; /* links to other entities */ - struct avnd_comp_tag *comp; /* back ptr to the comp */ + AVND_COMP *comp; /* back ptr to the comp */ } AVND_COMP_PM_REC; /*## @@ -288,7 +290,7 @@ typedef struct avnd_pm_rec { /* proxied info */ typedef struct avnd_pxied_rec { NCS_DB_LINK_LIST_NODE comp_dll_node; /* node in the comp-pxied dll */ - struct avnd_comp_tag *pxied_comp;/* ptr to the proxied comp */ + AVND_COMP *pxied_comp;/* ptr to the proxied comp */ } AVND_COMP_PXIED_REC; #define AVND_COMP_TYPE_LOCAL_NODE 0x0001 @@ -319,100 +321,115 @@ enum UsedComptypeAttrs { NumAttrs }; -typedef struct avnd_comp_tag { - NCS_DB_LINK_LIST_NODE su_dll_node; /* su dll node (key is inst-level) */ +class AVND_COMP { + public: + // TODO(uabhano) replace the NCS_DB_LINK_LIST_NODE with C++ STL. Now su_dll_node must be first in AVND_COMP + // as the macro m_AVND_COMP_SU_DLL_NODE_OFFSET depends on the offset. offsetof is to be avoided in classes. + NCS_DB_LINK_LIST_NODE su_dll_node {}; /* su dll node (key is inst-level) */ + AVND_COMP() {} + ~AVND_COMP() {} std::string name; /* comp name */ std::string saAmfCompType; - uint32_t numOfCompCmdEnv; /* number of comp command environment variables */ - SaStringT *saAmfCompCmdEnv; /* comp command environment variables */ - uint32_t inst_level;/* comp instantiation level */ - uint32_t comp_hdl; /* hdl returned by hdl-mngr */ + uint32_t inst_level {};/* comp instantiation level */ + + uint32_t comp_hdl {}; /* hdl returned by hdl-mngr */ /* component attributes */ - uint32_t flag; /* comp attributes */ - bool is_restart_en; /* flag to indicate if comp-restart is allowed */ - SaAmfCompCapabilityModelT cap; /* comp capability model */ - bool is_am_en; - bool is_hc_cmd_configured; + uint32_t flag {}; /* comp attributes */ + bool is_restart_en {}; /* flag to indicate if comp-restart is allowed */ + SaAmfCompCapabilityModelT cap {}; /* comp capability model */ + bool is_am_en {}; + bool is_hc_cmd_configured {}; /* clc info */ - AVND_COMP_CLC_INFO clc_info; + AVND_COMP_CLC_INFO clc_info {}; /* Update received flag, which will normally be false and will be * true if updates are received from the AVD on fail-over.*/ - bool avd_updt_flag; + bool avd_updt_flag {}; /* compon
Re: [devel] [PATCH 3/3] amfnd: Refactor AVND_COMP for simpler cmd argument handling V2 [#1945]
Hi Hans, One comment on this patch inline with [Praveen]. Thanks, Praveen On 18-May-17 3:32 PM, Hans Nordeback wrote: --- src/amf/amfnd/avnd_comp.h | 71 ++--- src/amf/amfnd/avnd_tmr.h | 6 +-- src/amf/amfnd/cam.cc | 2 +- src/amf/amfnd/chc.cc | 2 +- src/amf/amfnd/clc.cc | 34 -- src/amf/amfnd/comp.cc | 45 ++ src/amf/amfnd/compdb.cc | 113 +++--- src/amf/amfnd/susm.cc | 2 +- 8 files changed, 140 insertions(+), 135 deletions(-) diff --git a/src/amf/amfnd/avnd_comp.h b/src/amf/amfnd/avnd_comp.h index a2fc22691..52bf84e47 100644 --- a/src/amf/amfnd/avnd_comp.h +++ b/src/amf/amfnd/avnd_comp.h @@ -33,11 +33,14 @@ #include #include #include +#include +#include "avnd_tmr.h" struct avnd_cb_tag; struct avnd_su_si_rec; class AVND_SU; struct avnd_srm_req_tag; +class AVND_COMP; /*** ** S T R U C T U R E / E N U M D E F I N I T I O N S *** @@ -90,16 +93,37 @@ typedef enum avnd_comp_clc_cmd_type { } AVND_COMP_CLC_CMD_TYPE; /* clc command parameter definition */ -typedef struct avnd_comp_clc_param { - char cmd[SAAMF_CLC_LEN]; /* cmd ascii string */ - SaTimeT timeout; /* cmd timeout value */ - uint32_t len;/* cmd len */ -} AVND_COMP_CLC_CMD_PARAM; +struct CompClcCmdParam { + void init_clc_cli_command(const char *clc_cmd, char **clc_cmd_argv, +const SaImmAttrValuesT_2 **attributes, +const char *attr_name); + std::string cmd; + std::vector cmd_argv; /* cmd argv */ + SaTimeT timeout;/* cmd timeout value */ +}; /* clc info definition (top level wrapper structure) */ -typedef struct avnd_comp_clc_info { - /* clc commands (indexed by cmd type) */ - AVND_COMP_CLC_CMD_PARAM cmds[AVND_COMP_CLC_CMD_TYPE_MAX - 1]; +struct CompClcInfo { + std::map cmds; + std::string get_cmd(AVND_COMP_CLC_CMD_TYPE cmd_type); + + template + void create_argv(std::array &argv, uint32_t &argc, AVND_COMP_CLC_CMD_TYPE cmd_type) { +argc = 0; + +std::string tmp = saAmfNodeSwBundlePathPrefix + cmds[cmd_type].cmd; +argv[argc++] = strdup(tmp.data()); + +for (auto str : cmds[cmd_type].cmd_argv) { + if (argc >= argv.size()) { +LOG_WA("Too many arguments given, max %zu arguments are supported", argv.size()); +break; + } + argv[argc++] = strdup(str.data()); +} + } + + std::string saAmfNodeSwBundlePathPrefix; uint32_t inst_retry_max; /* configured no of instantiate retry attempts */ uint32_t inst_retry_cnt; /* curr no of instantiate retry attempts */ @@ -123,7 +147,7 @@ typedef struct avnd_comp_clc_info { uint32_t inst_code_rcvd; /* Store the error value received from the instantiate script */ -} AVND_COMP_CLC_INFO; +}; /*## COMPONENT CALLBACK DEFINITIONS @@ -341,7 +365,7 @@ class AVND_COMP { bool is_hc_cmd_configured {}; /* clc info */ - AVND_COMP_CLC_INFO clc_info {}; + CompClcInfo clc_info {}; /* Update received flag, which will normally be false and will be * true if updates are received from the AVD on fail-over.*/ @@ -684,33 +708,6 @@ class AVND_COMP { void m_AVND_COMP_OPER_STATE_AVD_SYNC(struct avnd_cb_tag *cb, const AVND_COMP *comp, uint32_t &o_rc); -/* macro to parse the clc cmd string */ -#define m_AVND_COMP_CLC_STR_PARSE(st, sc, ac, av, tav) \ - { \ -char str[SAAMF_CLC_LEN], *tok = nullptr; \ -/* copy the str as strtok modifies the original str */ \ -strcpy(str, st); \ -ac = 0; \ -if (nullptr != (tok = strtok(str, " "))) { \ - strncpy(sc, tok, SAAMF_CLC_LEN - 1); \ - av[ac] = sc; \ -}\ -ac++;\ -while ((nullptr != (tok = strtok(nullptr, " "))) && \ - (ac < (AVND_COMP_CLC_PARAM_MAX + 1))) { \ - if (strlen(tok) > AVND_COMP_CLC_PARAM_SIZE_MAX) break; \ - strcpy(tav[ac], tok); \ - av[ac] = tav[ac]; \ - ac++; \ -}\ -if (nullptr != tok) {\ - sc[0] = (char)(long)nullptr; \ - av[0] = nullptr;
Re: [devel] [PATCH 1/3] amfnd: Refactor AVND_COMP for simpler environment variable handling [#1945]
Hi Hans, One comment on this patch inline with [Praveen]. Thanks, Praveen On 18-May-17 3:32 PM, Hans Nordeback wrote: --- src/amf/amfnd/avnd_comp.h | 134 ++ src/amf/amfnd/avnd_err.h | 4 +- src/amf/amfnd/avnd_hc.h | 2 +- src/amf/amfnd/avnd_proc.h | 10 ++-- src/amf/amfnd/avnd_util.h | 2 +- src/amf/amfnd/clc.cc | 125 -- src/amf/amfnd/comp.cc | 35 src/amf/amfnd/compdb.cc | 36 +++-- src/amf/amfnd/proxydb.cc | 2 +- 9 files changed, 138 insertions(+), 212 deletions(-) diff --git a/src/amf/amfnd/avnd_comp.h b/src/amf/amfnd/avnd_comp.h index 611e90e11..68de4cc8e 100644 --- a/src/amf/amfnd/avnd_comp.h +++ b/src/amf/amfnd/avnd_comp.h @@ -31,6 +31,8 @@ #define AMF_AMFND_AVND_COMP_H_ #include +#include +#include struct avnd_cb_tag; struct avnd_su_si_rec; @@ -57,7 +59,7 @@ struct avnd_srm_req_tag; /* clc event handler declaration */ typedef uint32_t (*AVND_COMP_CLC_FSM_FN)(struct avnd_cb_tag *, - struct avnd_comp_tag *); + AVND_COMP *); /* clc fsm events */ typedef enum avnd_comp_clc_pres_fsm_ev { @@ -141,7 +143,7 @@ typedef struct avnd_cbk_tag { AVSV_AMF_CBK_INFO *cbk_info; /* callbk info */ /* link to other elements */ - struct avnd_comp_tag *comp; /* bk ptr to the comp */ + AVND_COMP *comp; /* bk ptr to the comp */ struct avnd_cbk_tag *next; std::string comp_name; /* For checkpointing */ } AVND_COMP_CBK; @@ -182,7 +184,7 @@ typedef struct avnd_comp_csi_rec { wrt prv ha state */ /* links to other entities */ - struct avnd_comp_tag *comp; /* bk ptr to the comp */ + AVND_COMP *comp; /* bk ptr to the comp */ struct avnd_su_si_rec *si; /* bk ptr to the si record */ std::string comp_name; /* For Checkpointing */ std::string si_name;/* For Checkpointing */ @@ -256,7 +258,7 @@ typedef struct avnd_hc_rec_tag { uint32_t opq_hdl; /* hdl returned by hdl-mngr (used during tmr expiry) */ AVND_COMP_HC_STATUS status; /* indicates status of hc rec */ - struct avnd_comp_tag *comp; /* back ptr to the comp */ + AVND_COMP *comp; /* back ptr to the comp */ struct avnd_hc_rec_tag *next; std::string comp_name; /* For checkpoiting */ } AVND_COMP_HC_REC; @@ -278,7 +280,7 @@ typedef struct avnd_pm_rec { } rec_rcvr; /* links to other entities */ - struct avnd_comp_tag *comp; /* back ptr to the comp */ + AVND_COMP *comp; /* back ptr to the comp */ } AVND_COMP_PM_REC; /*## @@ -288,7 +290,7 @@ typedef struct avnd_pm_rec { /* proxied info */ typedef struct avnd_pxied_rec { NCS_DB_LINK_LIST_NODE comp_dll_node; /* node in the comp-pxied dll */ - struct avnd_comp_tag *pxied_comp;/* ptr to the proxied comp */ + AVND_COMP *pxied_comp;/* ptr to the proxied comp */ } AVND_COMP_PXIED_REC; #define AVND_COMP_TYPE_LOCAL_NODE 0x0001 @@ -319,100 +321,115 @@ enum UsedComptypeAttrs { NumAttrs }; -typedef struct avnd_comp_tag { - NCS_DB_LINK_LIST_NODE su_dll_node; /* su dll node (key is inst-level) */ +class AVND_COMP { + public: + // TODO(uabhano) replace the NCS_DB_LINK_LIST_NODE with C++ STL. Now su_dll_node must be first in AVND_COMP + // as the macro m_AVND_COMP_SU_DLL_NODE_OFFSET depends on the offset. offsetof is to be avoided in classes. + NCS_DB_LINK_LIST_NODE su_dll_node {}; /* su dll node (key is inst-level) */ + AVND_COMP() {} + ~AVND_COMP() {} std::string name; /* comp name */ std::string saAmfCompType; - uint32_t numOfCompCmdEnv; /* number of comp command environment variables */ - SaStringT *saAmfCompCmdEnv; /* comp command environment variables */ - uint32_t inst_level;/* comp instantiation level */ - uint32_t comp_hdl; /* hdl returned by hdl-mngr */ + uint32_t inst_level {};/* comp instantiation level */ + + uint32_t comp_hdl {}; /* hdl returned by hdl-mngr */ /* component attributes */ - uint32_t flag; /* comp attributes */ - bool is_restart_en; /* flag to indicate if comp-restart is allowed */ - SaAmfCompCapabilityModelT cap; /* comp capability model */ - bool is_am_en; - bool is_hc_cmd_configured; + uint32_t flag {}; /* comp attributes */ + bool is_restart_en {}; /* flag to indicate if comp-restart is allowed */ + SaAmfCompCapabilityModelT cap {}; /* comp capability model */ + bool is_am_en {}; + bool is_hc_cmd_configured {}; /* clc info */ - AVND_COMP_CLC_INFO clc_info; + AVND_COMP_CLC_INFO clc_info {}; /* Update received flag, which will normally be false and will be * true if updates are received from the AVD on fail-over.*/ - bool avd_updt_flag; + b
Re: [devel] [PATCH 1/1] amfnd: Send pending susi response message after restarting component finishes [#2485]
Ack. I think this entire if block can be removed and Restarting macro can be Ored with Assigning macro in if-else block: diff --git a/src/amf/amfnd/comp.cc b/src/amf/amfnd/comp.cc index 74b33a3..2f8bb3a 100644 --- a/src/amf/amfnd/comp.cc +++ b/src/amf/amfnd/comp.cc @@ -1615,19 +1615,14 @@ uint32_t avnd_comp_csi_assign_done(AVND_CB *cb, AVND_COMP *comp, /* delete any pending cbk rec for csi assignment / removal */ avnd_comp_cbq_csi_rec_del(cb, comp, (csi) ? csi->name : ""); - /* while restarting, we wont use assign all, so csi will not be null */ - if (csi && m_AVND_COMP_CSI_CURR_ASSIGN_STATE_IS_RESTARTING(csi)) { -m_AVND_COMP_CSI_CURR_ASSIGN_STATE_SET(csi, - AVND_COMP_CSI_ASSIGN_STATE_ASSIGNED); -goto done; - } if (!csi && m_AVND_COMP_IS_ALL_CSI(comp)) { m_AVND_COMP_ALL_CSI_RESET(comp); } /* mark the csi(s) assigned */ if (csi) { -if (m_AVND_COMP_CSI_CURR_ASSIGN_STATE_IS_ASSIGNING(csi)) { +if (m_AVND_COMP_CSI_CURR_ASSIGN_STATE_IS_ASSIGNING(csi) || + m_AVND_COMP_CSI_CURR_ASSIGN_STATE_IS_RESTARTING(csi)) { m_AVND_COMP_CSI_CURR_ASSIGN_STATE_SET( csi, AVND_COMP_CSI_ASSIGN_STATE_ASSIGNED); } Thanks Praveen On 13-Jun-17 8:29 AM, Minh Chau wrote: --- src/amf/amfnd/comp.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/amf/amfnd/comp.cc b/src/amf/amfnd/comp.cc index 9dfe87a12..4693df217 100644 --- a/src/amf/amfnd/comp.cc +++ b/src/amf/amfnd/comp.cc @@ -1621,7 +1621,6 @@ uint32_t avnd_comp_csi_assign_done(AVND_CB *cb, AVND_COMP *comp, if (csi && m_AVND_COMP_CSI_CURR_ASSIGN_STATE_IS_RESTARTING(csi)) { m_AVND_COMP_CSI_CURR_ASSIGN_STATE_SET(csi, AVND_COMP_CSI_ASSIGN_STATE_ASSIGNED); -goto done; } if (!csi && m_AVND_COMP_IS_ALL_CSI(comp)) { -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] clmd: update saClmNodeCurrAddress and saClmNodeCurrAddressFamily in IMM [#2331]
I have published v2. immlist will show empty for TIPC. Thanks, PRaveen On 01-Jun-17 7:56 PM, Anders Widell wrote: Hi! What is the expected behaviour when using TIPC transport? This is what I got when I tried: Name Type Value(s) safNodeSA_STRING_T safNode=SC-1 saClmNodeLockCallbackTimeout SA_TIME_T 500 (0xba43b7400, Thu Jan 1 01:00:50 1970) saClmNodeIsMember SA_UINT32_T 1 (0x1) saClmNodeInitialViewNumber SA_UINT64_T 4 (0x4) saClmNodeIDSA_UINT32_T 131343 (0x2010f) saClmNodeEESA_NAME_T saClmNodeDisableReboot SA_UINT32_T 0 (0x0) saClmNodeCurrAddressFamily SA_UINT32_T 1 (0x1) saClmNodeCurrAddress SA_STRING_T saClmNodeBootTimeStamp SA_TIME_T 1496326541720341032 (0x14c4050fd33aca28, Thu Jun 1 16:15:41 2017) saClmNodeAdminStateSA_UINT32_T 1 (0x1) saClmNodeAddressFamily SA_UINT32_T saClmNodeAddress SA_STRING_T SaImmAttrImplementerName SA_STRING_T safClmService SaImmAttrClassName SA_STRING_T SaClmNode SaImmAttrAdminOwnerNameSA_STRING_T IMMLOADER The best would be if we could add a SA_CLM_AF_TIPC = 3, but if you don't have the time to do that now it is probably better to keep the attributes empty (as before) when using TIPC. regards, Anders Widell On 05/26/2017 08:54 AM, Praveen wrote: CLM gets ip address and address family from MDS in node up event. When node will join CLM cluster, CLM will update saClmNodeCurrAddress and saClmNodeCurrAddressFamily in IMM. Also changed permission of CLM tool commands. --- src/clm/clmd/clms_imm.c | 21 +++-- src/clm/clmd/clms_mds.c | 35 +-- src/clm/tools/clm-adm | 0 src/clm/tools/clm-find | 0 src/clm/tools/clm-state | 0 5 files changed, 44 insertions(+), 12 deletions(-) mode change 100644 => 100755 src/clm/tools/clm-adm mode change 100644 => 100755 src/clm/tools/clm-find mode change 100644 => 100755 src/clm/tools/clm-state diff --git a/src/clm/clmd/clms_imm.c b/src/clm/clmd/clms_imm.c index a363b50..9c7e018 100644 --- a/src/clm/clmd/clms_imm.c +++ b/src/clm/clmd/clms_imm.c @@ -695,15 +695,20 @@ void clms_admin_state_update_rattr(CLMS_CLUSTER_NODE *nd) */ void clms_node_update_rattr(CLMS_CLUSTER_NODE *nd) { -SaImmAttrModificationT_2 attr_Mod[4]; +SaImmAttrModificationT_2 attr_Mod[6]; SaAisErrorT rc; +SaImmAttrValueT address[1]; SaImmAttrValueT attrUpdateValue[] = {&nd->member}; SaImmAttrValueT attrUpdateValue1[] = {&nd->node_id}; SaImmAttrValueT attrUpdateValue2[] = {&nd->boot_time}; SaImmAttrValueT attrUpdateValue3[] = {&nd->init_view}; +SaImmAttrValueT attrUpdateValue4[] = {&nd->node_addr.family}; +address[0] = &nd->node_addr.value; +SaImmAttrValueT attrUpdateValue5[] = {address}; const SaImmAttrModificationT_2 *attrMods[] = { -&attr_Mod[0], &attr_Mod[1], &attr_Mod[2], &attr_Mod[3], NULL}; +&attr_Mod[0], &attr_Mod[1], &attr_Mod[2], &attr_Mod[3], +&attr_Mod[4], &attr_Mod[5], NULL}; CLMS_CLUSTER_NODE *node = NULL; @@ -743,6 +748,18 @@ void clms_node_update_rattr(CLMS_CLUSTER_NODE *nd) attr_Mod[3].modAttr.attrValueType = SA_IMM_ATTR_SAUINT64T; attr_Mod[3].modAttr.attrValues = attrUpdateValue3; +attr_Mod[4].modType = SA_IMM_ATTR_VALUES_REPLACE; +attr_Mod[4].modAttr.attrName = "saClmNodeCurrAddressFamily"; +attr_Mod[4].modAttr.attrValuesNumber = 1; +attr_Mod[4].modAttr.attrValueType = SA_IMM_ATTR_SAUINT32T; +attr_Mod[4].modAttr.attrValues = attrUpdateValue4; + +attr_Mod[5].modType = SA_IMM_ATTR_VALUES_REPLACE; +attr_Mod[5].modAttr.attrName = "saClmNodeCurrAddress"; +attr_Mod[5].modAttr.attrValuesNumber = 1; +attr_Mod[5].modAttr.attrValueType = SA_IMM_ATTR_SASTRINGT; +attr_Mod[5].modAttr.attrValues = attrUpdateValue5; + rc = saImmOiRtObjectUpdate_2(clms_cb->immOiHandle, &nd->node_name, attrMods); diff --git a/src/clm/clmd/clms_mds.c b/src/clm/clmd/clms_mds.c index cffcfaf..69982a7 100644 --- a/src/clm/clmd/clms_mds.c +++ b/src/clm/clmd/clms_mds.c @@ -1013,6 +1013,8 @@ static uint32_t clms_mds_node_event(struct ncsmds_callback_info *mds_info) * AF_INET4 before sending it to the CLM clients. */ TRACE("Adding ipinformation to the ip list: %u", node_id); +TRACE("addr_family:%u", mds_info->info.node_evt.addr_family); +TRACE("ip_addr:%s", mds_info->info.n
Re: [devel] [PATCH 1/1] clmtest: correct test case following AIS [#2478]
Ack. Thanks, Praveen On 02-Jun-17 1:01 PM, Hoang Vo wrote: --- src/clm/apitest/tet_saClmClusterNodeGet.c | 8 src/clm/apitest/tet_saClmClusterTrack.c | 4 ++-- src/clm/apitest/tet_saClmSelectionObjectGet.c | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/clm/apitest/tet_saClmClusterNodeGet.c b/src/clm/apitest/tet_saClmClusterNodeGet.c index 923578e..51683f9 100644 --- a/src/clm/apitest/tet_saClmClusterNodeGet.c +++ b/src/clm/apitest/tet_saClmClusterNodeGet.c @@ -80,14 +80,14 @@ void saClmClusterNodeGet_04(void) SA_AIS_OK); rc = saClmClusterNodeGet(clmHandle, nodeId, 0, &clusterNode_1); safassert(saClmFinalize(clmHandle), SA_AIS_OK); - test_validate(rc, SA_AIS_ERR_TIMEOUT); + test_validate(rc, SA_AIS_OK); nodeId = 131343; safassert(saClmInitialize_4(&clmHandle, &clmCallbacks_4, &clmVersion_4), SA_AIS_OK); rc = saClmClusterNodeGet_4(clmHandle, nodeId, 0, &clusterNode_4); safassert(saClmFinalize(clmHandle), SA_AIS_OK); - test_validate(rc, SA_AIS_ERR_TIMEOUT); + test_validate(rc, SA_AIS_OK); } void saClmClusterNodeGet_05(void) @@ -126,7 +126,7 @@ void saClmClusterNodeGet_06(void) void saClmClusterNodeGet_07(void) { - nodeId = 131855; /*node is non member*/ + nodeId = 132623; /*node is non member*/ safassert(saClmInitialize(&clmHandle, &clmCallbacks_1, &clmVersion_1), SA_AIS_OK); rc = saClmClusterNodeGet(clmHandle, nodeId, timeout, &clusterNode_1); @@ -134,7 +134,7 @@ void saClmClusterNodeGet_07(void) /*test_validate(rc, SA_AIS_ERR_UNAVAILABLE);*/ test_validate(rc, SA_AIS_ERR_NOT_EXIST); - nodeId = 131855; + nodeId = 132623; safassert(saClmInitialize_4(&clmHandle, &clmCallbacks_4, &clmVersion_4), SA_AIS_OK); rc = saClmClusterNodeGet_4(clmHandle, nodeId, timeout, &clusterNode_4); diff --git a/src/clm/apitest/tet_saClmClusterTrack.c b/src/clm/apitest/tet_saClmClusterTrack.c index e62ae24..00a52b4 100644 --- a/src/clm/apitest/tet_saClmClusterTrack.c +++ b/src/clm/apitest/tet_saClmClusterTrack.c @@ -376,7 +376,7 @@ void saClmClusterTrack_08(void) clmHandle, notificationBuffer_4.notification), SA_AIS_OK); safassert(saClmFinalize(clmHandle), SA_AIS_OK); - test_validate(rc, SA_AIS_ERR_INVALID_PARAM); + test_validate(rc, SA_AIS_ERR_BAD_HANDLE); trackFlags = (SA_TRACK_CURRENT | SA_TRACK_LOCAL); notificationBuffer_4.numberOfItems = 1; @@ -413,7 +413,7 @@ void saClmClusterTrack_09(void) clmHandle, notificationBuffer_4.notification), SA_AIS_OK); safassert(saClmFinalize(clmHandle), SA_AIS_OK); - test_validate(rc, SA_AIS_ERR_INVALID_PARAM); + test_validate(rc, SA_AIS_ERR_BAD_FLAGS); } void saClmClusterTrack_10(void) diff --git a/src/clm/apitest/tet_saClmSelectionObjectGet.c b/src/clm/apitest/tet_saClmSelectionObjectGet.c index 6767feb..34961c1 100644 --- a/src/clm/apitest/tet_saClmSelectionObjectGet.c +++ b/src/clm/apitest/tet_saClmSelectionObjectGet.c @@ -41,7 +41,7 @@ void saClmSelectionObjectGet_02(void) SA_AIS_OK); rc = saClmSelectionObjectGet(0, &selectionObject); safassert(saClmFinalize(clmHandle), SA_AIS_OK); - test_validate(rc, SA_AIS_ERR_INVALID_PARAM); + test_validate(rc, SA_AIS_ERR_BAD_HANDLE); } void saClmSelectionObjectGet_03(void) -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] mds: clear mds lib valgrind warning [#2474]
ack code review only. One minor comment: Magic no 1024 should be replaced by some #define or constant. Thanks Praveen On 01-Jun-17 2:32 PM, A V Mahesh wrote: --- src/mds/mds_c_api.c | 4 +++- src/mds/mds_c_db.c| 12 ++-- src/mds/mds_dt_tcp.c | 1 + src/mds/mds_dt_tipc.c | 1 + src/mds/mds_log.cc| 3 +++ 5 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/mds/mds_c_api.c b/src/mds/mds_c_api.c index a5986c2..f5ba318 100644 --- a/src/mds/mds_c_api.c +++ b/src/mds/mds_c_api.c @@ -1778,7 +1778,7 @@ uint32_t mds_mcm_svc_up(PW_ENV_ID pwe_id, MDS_SVC_ID svc_id, V_DEST_RL role, uint32_t status = NCSCC_RC_SUCCESS; NCSMDS_SCOPE_TYPE local_subtn_scope; MDS_VIEW local_subtn_view; - MDS_DEST active_adest; + MDS_DEST active_adest = 0; V_DEST_RL dest_role; bool tmr_running; NCSMDS_CALLBACK_INFO cbinfo; @@ -1964,6 +1964,7 @@ uint32_t mds_mcm_svc_up(PW_ENV_ID pwe_id, MDS_SVC_ID svc_id, V_DEST_RL role, present */ char to_adest_details [MDS_MAX_PROCESS_NAME_LEN]; + memset(to_adest_details, 0, MDS_MAX_PROCESS_NAME_LEN); get_subtn_adest_details( m_MDS_GET_PWE_HDL_FROM_SVC_HDL( local_svc_hdl), @@ -4932,6 +4933,7 @@ uint32_t mds_mcm_init(void) /* STEP 1: Initialize MCM-CB. */ gl_mds_mcm_cb = m_MMGR_ALLOC_MCM_CB; + memset(gl_mds_mcm_cb, 0, sizeof(MDS_MCM_CB)); /* VDEST TREE */ memset(&pat_tree_params, 0, sizeof(NCS_PATRICIA_PARAMS)); diff --git a/src/mds/mds_c_db.c b/src/mds/mds_c_db.c index 7d56ad9..46b8eb4 100644 --- a/src/mds/mds_c_db.c +++ b/src/mds/mds_c_db.c @@ -37,12 +37,16 @@ void get_adest_details(MDS_DEST adest, char *adest_details) char *token, *saveptr; struct stat s; uint32_t process_id = 0; - SlotSubslotId slot_subslot_id; + SlotSubslotId slot_subslot_id = 0; char pid_path[1024]; char *pid_name = NULL; char process_name[MDS_MAX_PROCESS_NAME_LEN]; bool remote = false; + memset(adest_details, 0, MDS_MAX_PROCESS_NAME_LEN); + memset(process_name, 0, MDS_MAX_PROCESS_NAME_LEN); + memset(pid_path, 0, 1024); + slot_subslot_id = GetSlotSubslotIdFromNodeId(m_NCS_NODE_ID_FROM_MDS_DEST(adest)); @@ -139,7 +143,7 @@ void get_subtn_adest_details(MDS_PWE_HDL pwe_hdl, MDS_SVC_ID svc_id, MDS_DEST adest, char *adest_details) { uint32_t process_id = 0; - SlotSubslotId slot_subslot_id; + SlotSubslotId slot_subslot_id = 0; char process_name[MDS_MAX_PROCESS_NAME_LEN]; bool remote = false; MDS_SVC_INFO *svc_info = NULL; @@ -148,6 +152,9 @@ void get_subtn_adest_details(MDS_PWE_HDL pwe_hdl, MDS_SVC_ID svc_id, char *pid_name = NULL; struct stat s; + memset(process_name, 0, MDS_MAX_PROCESS_NAME_LEN); + memset(pid_path, 0, 1024); + slot_subslot_id = GetSlotSubslotIdFromNodeId(m_NCS_NODE_ID_FROM_MDS_DEST(adest)); process_id = m_MDS_GET_PROCESS_ID_FROM_ADEST(adest); @@ -2404,6 +2411,7 @@ uint32_t mds_subtn_res_tbl_get(MDS_SVC_HDL svc_hdl, MDS_SVC_ID subscr_svc_id, if (subtn_res_info == NULL) { /* Subscription result entry doesn't exist for active result */ m_MDS_LOG_DBG("MDS:DB: Subscription Result not present"); + *adest = 0; m_MDS_LEAVE(); return NCSCC_RC_FAILURE; } else { diff --git a/src/mds/mds_dt_tcp.c b/src/mds/mds_dt_tcp.c index 0b45c07..1407eb1 100644 --- a/src/mds/mds_dt_tcp.c +++ b/src/mds/mds_dt_tcp.c @@ -104,6 +104,7 @@ uint32_t mds_mdtm_init_tcp(NODE_ID nodeid, uint32_t *mds_tcp_ref) mdtm_num_subscriptions = 0; mdtm_handle = 0; mdtm_global_frag_num_tcp = 0; + *mds_tcp_ref = 0; memset(&server_addr_un, 0, sizeof(struct sockaddr_un)); memset(&dhserver_addr_un, 0, sizeof(struct sockaddr_un)); diff --git a/src/mds/mds_dt_tipc.c b/src/mds/mds_dt_tipc.c index 66f69cc..37745e7 100644 --- a/src/mds/mds_dt_tipc.c +++ b/src/mds/mds_dt_tipc.c @@ -182,6 +182,7 @@ uint32_t mdtm_tipc_init(NODE_ID nodeid, uint32_t *mds_tipc_ref) socklen_t sz = sizeof(addr); memset(&tipc_cb, 0, sizeof(tipc_cb)); + *mds_tipc_ref = 0; /* Added to assist the shutdown bug */ mdtm_ref_hdl_list_hdr = NULL; diff --git a/src/mds/mds_log.cc b/src/mds/mds_log.cc index 67f2f46..94c94e6 100644 --- a/src/mds/mds_log.cc +++ b/src/mds/mds_log.cc @@ -94,6 +94,9 @@ bool MdsLog::Init() { char *token, *saveptr; char *pid_name = nullptr; + memset(app_name,
Re: [devel] [PATCH 1/1] amfnd: Only report OperState in SURestart recovery if su is under SMF maintenance campaign [#2476]
As per 3.11.1.4.2 Restrictions to Auto-Repair, AMFD has to disable the SU only when sumaintenance campaign is set. Ack from me, code review only. Thanks Praveen, On 05-Jun-17 10:35 AM, praveen malviya wrote: From surestart perspective: For SURestart recovery AMFD is not informed for disabled state barbecue it would be spec deviation. While fixing spec deviation for surestart recovery, I had taken care of this by writing a new function su_send_suRestart_recovery_msg(). AMFND internally keeps su disabled to correctly execute the SU and comp FSM but does not inform AMFD. I am going to analyses it further from suMaintenance perspective.. Thanks Praveen On 05-Jun-17 6:14 AM, minh chau wrote: Hi Alex, In legacy recovery, the SuRestart has not reported oper state to disabled and back to enabled when recovery completes. The state remains unchanged. In AMF spec, 3.11.1.2, it seems the states should be enabled and in-service during restart. For feature of su maintenance campaign, I think amfnd needs to report oper state as disabled for su restart, than the su will be manually repaired, but that oper state report is for the new feature only. thanks, Minh On 03/06/17 04:56, Alex Jones wrote: Hi Minh, In the legacy recovery, does the oper state change to disabled (and then enabled) ever get reported to amfd? Really, the question is, do both the NTF notifications that report disabled, and then enabled get generated? I seem to remember that they didn't, which is why I put this there. Alex On 06/02/2017 08:08 AM, Minh Chau wrote: NOTICE: This email was received from an EXTERNAL sender Patch keeps legacy behavior of SURestart recovery before saAmfSUMaintenanceCampaign feature --- src/amf/amfnd/err.cc | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/amf/amfnd/err.cc b/src/amf/amfnd/err.cc index 2abaf2007..e4cb9f08b 100644 --- a/src/amf/amfnd/err.cc +++ b/src/amf/amfnd/err.cc @@ -707,8 +707,14 @@ uint32_t avnd_err_rcvr_su_restart(AVND_CB *cb, AVND_SU *su, uint32_t rc = avnd_comp_oper_state_avd_sync(cb, failed_comp); if (NCSCC_RC_SUCCESS != rc) goto done; - avnd_di_uns32_upd_send(AVSV_SA_AMF_SU, saAmfSUOperState_ID, su->name, - su->oper); + /* Keep SURestart recovery not to always report OperState to amfd + as legacy recovery. Only report OperState if SU is under SMF maintenance + campaign + */ + if (!su->suMaintenanceCampaign.empty()) { + avnd_di_uns32_upd_send(AVSV_SA_AMF_SU, saAmfSUOperState_ID, su->name, + su->oper); + } set_suRestart_flag(su); -- 2.11.0 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! https://urldefense.proofpoint.com/v2/url?u=http-3A__sdm.link_slashdot&d=DwICAg&c=RoP1YumCXCgaWHvlZYR8PQcxBKCX5YTpkKY057SbK10&r=Lehk1PZKwfDQtYJXNyUKbPAqrw5O--SlPRAF9DIEps4&m=Q4kvIyr7bvemvKanM42H-wxdcMqTegXtaHjroZ116w8&s=Tda-67mquksw0rVQdwLNhr_iVG4mzi5bKP3Rv2Rt1dM&e= ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.sourceforge.net_lists_listinfo_opensaf-2Ddevel&d=DwICAg&c=RoP1YumCXCgaWHvlZYR8PQcxBKCX5YTpkKY057SbK10&r=Lehk1PZKwfDQtYJXNyUKbPAqrw5O--SlPRAF9DIEps4&m=Q4kvIyr7bvemvKanM42H-wxdcMqTegXtaHjroZ116w8&s=SkHfX55d6ZsjhrIkPpTOTfZzpmCgZOxconpjuP7gKMI&e= -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] amfnd: Only report OperState in SURestart recovery if su is under SMF maintenance campaign [#2476]
From surestart perspective: For SURestart recovery AMFD is not informed for disabled state barbecue it would be spec deviation. While fixing spec deviation for surestart recovery, I had taken care of this by writing a new function su_send_suRestart_recovery_msg(). AMFND internally keeps su disabled to correctly execute the SU and comp FSM but does not inform AMFD. I am going to analyses it further from suMaintenance perspective.. Thanks Praveen On 05-Jun-17 6:14 AM, minh chau wrote: Hi Alex, In legacy recovery, the SuRestart has not reported oper state to disabled and back to enabled when recovery completes. The state remains unchanged. In AMF spec, 3.11.1.2, it seems the states should be enabled and in-service during restart. For feature of su maintenance campaign, I think amfnd needs to report oper state as disabled for su restart, than the su will be manually repaired, but that oper state report is for the new feature only. thanks, Minh On 03/06/17 04:56, Alex Jones wrote: Hi Minh, In the legacy recovery, does the oper state change to disabled (and then enabled) ever get reported to amfd? Really, the question is, do both the NTF notifications that report disabled, and then enabled get generated? I seem to remember that they didn't, which is why I put this there. Alex On 06/02/2017 08:08 AM, Minh Chau wrote: NOTICE: This email was received from an EXTERNAL sender Patch keeps legacy behavior of SURestart recovery before saAmfSUMaintenanceCampaign feature --- src/amf/amfnd/err.cc | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/amf/amfnd/err.cc b/src/amf/amfnd/err.cc index 2abaf2007..e4cb9f08b 100644 --- a/src/amf/amfnd/err.cc +++ b/src/amf/amfnd/err.cc @@ -707,8 +707,14 @@ uint32_t avnd_err_rcvr_su_restart(AVND_CB *cb, AVND_SU *su, uint32_t rc = avnd_comp_oper_state_avd_sync(cb, failed_comp); if (NCSCC_RC_SUCCESS != rc) goto done; - avnd_di_uns32_upd_send(AVSV_SA_AMF_SU, saAmfSUOperState_ID, su->name, - su->oper); + /* Keep SURestart recovery not to always report OperState to amfd + as legacy recovery. Only report OperState if SU is under SMF maintenance + campaign + */ + if (!su->suMaintenanceCampaign.empty()) { + avnd_di_uns32_upd_send(AVSV_SA_AMF_SU, saAmfSUOperState_ID, su->name, + su->oper); + } set_suRestart_flag(su); -- 2.11.0 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] amfd: do not assert unnecessarily [#2458]
Ack, code review only. Thanks, Praveen On 11-May-17 3:08 PM, Gary Lee wrote: > IMM APIs can fail if immnd finishes shutting down before amfd. > amfd should not unnecessarily assert and cause core dumps > to be created. > --- > src/amf/amfd/app.cc | 7 +-- > src/amf/amfd/comp.cc | 7 +-- > src/amf/amfd/sg.cc | 7 +-- > src/amf/amfd/si.cc | 7 +-- > src/amf/amfd/su.cc | 7 +-- > 5 files changed, 25 insertions(+), 10 deletions(-) > > diff --git a/src/amf/amfd/app.cc b/src/amf/amfd/app.cc > index 62ad34acb..424d82847 100644 > --- a/src/amf/amfd/app.cc > +++ b/src/amf/amfd/app.cc > @@ -470,8 +470,11 @@ SaAisErrorT avd_app_config_get(void) { > if (avd_si_config_get(app) != SA_AIS_OK) goto done2; > } > > - osafassert(rc == SA_AIS_ERR_NOT_EXIST); > - error = SA_AIS_OK; > + if (rc == SA_AIS_ERR_NOT_EXIST) { > +error = SA_AIS_OK; > + } else { > +LOG_ER("avd_app_config_get FAILED %u", rc); > + } > done2: > (void)immutil_saImmOmSearchFinalize(searchHandle); > done1: > diff --git a/src/amf/amfd/comp.cc b/src/amf/amfd/comp.cc > index 3e0dc5de1..e3f0f9051 100644 > --- a/src/amf/amfd/comp.cc > +++ b/src/amf/amfd/comp.cc > @@ -811,8 +811,11 @@ SaAisErrorT avd_comp_config_get(const std::string > &su_name, AVD_SU *su) { > goto done2; > } > > - osafassert(rc == SA_AIS_ERR_NOT_EXIST); > - error = SA_AIS_OK; > + if (rc == SA_AIS_ERR_NOT_EXIST) { > +error = SA_AIS_OK; > + } else { > +LOG_ER("avd_comp_config_get FAILED %u", rc); > + } > > done2: > (void)immutil_saImmOmSearchFinalize(searchHandle); > diff --git a/src/amf/amfd/sg.cc b/src/amf/amfd/sg.cc > index 9b04a423c..79e8f5fd7 100644 > --- a/src/amf/amfd/sg.cc > +++ b/src/amf/amfd/sg.cc > @@ -499,8 +499,11 @@ SaAisErrorT avd_sg_config_get(const std::string &app_dn, > AVD_APP *app) { > } > } > > - osafassert(rc == SA_AIS_ERR_NOT_EXIST); > - error = SA_AIS_OK; > + if (rc == SA_AIS_ERR_NOT_EXIST) { > +error = SA_AIS_OK; > + } else { > +LOG_ER("avd_sg_config_get FAILED %u", rc); > + } > > done2: > (void)immutil_saImmOmSearchFinalize(searchHandle); > diff --git a/src/amf/amfd/si.cc b/src/amf/amfd/si.cc > index 298188a84..51dd9a662 100644 > --- a/src/amf/amfd/si.cc > +++ b/src/amf/amfd/si.cc > @@ -705,8 +705,11 @@ SaAisErrorT avd_si_config_get(AVD_APP *app) { > if (avd_csi_config_get(si_str, si) != SA_AIS_OK) goto done2; > } > > - osafassert(rc == SA_AIS_ERR_NOT_EXIST); > - error = SA_AIS_OK; > + if (rc == SA_AIS_ERR_NOT_EXIST) { > +error = SA_AIS_OK; > + } else { > +LOG_ER("avd_si_config_get FAILED %u", rc); > + } > > done2: > (void)immutil_saImmOmSearchFinalize(searchHandle); > diff --git a/src/amf/amfd/su.cc b/src/amf/amfd/su.cc > index fac1188b5..62c372822 100644 > --- a/src/amf/amfd/su.cc > +++ b/src/amf/amfd/su.cc > @@ -750,8 +750,11 @@ SaAisErrorT avd_su_config_get(const std::string > &sg_name, AVD_SG *sg) { > } > } > > - osafassert(rc == SA_AIS_ERR_NOT_EXIST); > - error = SA_AIS_OK; > + if (rc == SA_AIS_ERR_NOT_EXIST) { > +error = SA_AIS_OK; > + } else { > +LOG_ER("avd_su_config_get FAILED %u", rc); > + } > > done2: > (void)immutil_saImmOmSearchFinalize(searchHandle); > -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] amfd: only increment su_cnt_admin_oper for non-opensaf SUs [#2466]
Ack (not tested). Thanks Praveen On 24-May-17 6:21 AM, Gary Lee wrote: > Only increment su_cnt_admin_oper for non-opensaf SUs. Otherwise a nodegroup > admin operation may fail to finish if it is started while a node is joining. > > based on analysis from Minh / Praveen > --- > src/amf/amfd/sgproc.cc | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/src/amf/amfd/sgproc.cc b/src/amf/amfd/sgproc.cc > index cd95fe82c..20549808b 100644 > --- a/src/amf/amfd/sgproc.cc > +++ b/src/amf/amfd/sgproc.cc > @@ -262,6 +262,7 @@ npisu_done: > AVD_AVND *node = su->su_on_node; > if ((node->admin_node_pend_cbk.invocation != 0) || > ((node->admin_ng != nullptr) && > + (su->sg_of_su->sg_ncs_spec == false) && > (node->admin_ng->admin_ng_pend_cbk.invocation != 0))) { > node->su_cnt_admin_oper++; > TRACE("node:'%s', su_cnt_admin_oper:%u", node->name.c_str(), > -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] amfnd: Buffered not-ack susi assignment response after both SC go down V2 [#2105]
Ack. Thanks Praveen On 19-May-17 12:48 PM, Minh Chau wrote: > When amfnd-payload responds susi assignment response just before both SC > go down, and that response message does not come to director. Therefore, > the status of that assignment could be seen as "modifying" in IMM. When > SC comes back, active amfd will be waiting for that response forever. > > Patch checks if a susi assignment response is sent but not-ack just before > both SC come down, amfnd-payload will buffer it in a way as a susi get > assigned during SC absence > --- > src/amf/amfnd/avnd_di.h | 2 +- > src/amf/amfnd/di.cc | 85 > +++-- > 2 files changed, 70 insertions(+), 17 deletions(-) > > diff --git a/src/amf/amfnd/avnd_di.h b/src/amf/amfnd/avnd_di.h > index 07222eb67..d7ccd68fd 100644 > --- a/src/amf/amfnd/avnd_di.h > +++ b/src/amf/amfnd/avnd_di.h > @@ -79,7 +79,7 @@ uint32_t avnd_di_pg_act_send(struct avnd_cb_tag *, const > std::string &, >AVSV_PG_TRACK_ACT, bool); > uint32_t avnd_di_msg_send(struct avnd_cb_tag *, AVND_MSG *); > void avnd_di_msg_ack_process(struct avnd_cb_tag *, uint32_t); > -void avnd_diq_del(struct avnd_cb_tag *); > +void avnd_diq_rec_check_buffered_msg(struct avnd_cb_tag *); > AVND_DND_MSG_LIST *avnd_diq_rec_add(struct avnd_cb_tag *cb, AVND_MSG *msg); > void avnd_diq_rec_del(struct avnd_cb_tag *cb, AVND_DND_MSG_LIST *rec); > void avnd_diq_rec_send_buffered_msg(struct avnd_cb_tag *cb); > diff --git a/src/amf/amfnd/di.cc b/src/amf/amfnd/di.cc > index e06b9260d..6f0a76cda 100644 > --- a/src/amf/amfnd/di.cc > +++ b/src/amf/amfnd/di.cc > @@ -698,8 +698,8 @@ uint32_t avnd_evt_mds_avd_dn_evh(AVND_CB *cb, AVND_EVT > *evt) { > } > } > } else { > -TRACE("Delete all pending messages to be sent to AMFD"); > -avnd_diq_del(cb); > +TRACE("Delete/Buffer pending messages to be sent to AMFD"); > +avnd_diq_rec_check_buffered_msg(cb); > } > > // check for pending messages FROM director > @@ -1271,9 +1271,15 @@ void avnd_di_msg_ack_process(AVND_CB *cb, uint32_t > mid) { > } > > > / > - Name : avnd_diq_del > + Name : avnd_diq_rec_check_buffered_msg > + > + Description : The routine buffers messages that are waiting for ack and > will > + resend to AMFD when AMFD is up. > + All messages are deleted, except following messages to be > + buffered: > + - AVSV_N2D_INFO_SU_SI_ASSIGN_MSG > + - AVSV_N2D_OPERATION_STATE_MSG > > - Description : This routine clears the AvD msg list. > > Arguments : cb - ptr to the AvND control block > > @@ -1281,18 +1287,65 @@ void avnd_di_msg_ack_process(AVND_CB *cb, uint32_t > mid) { > > Notes : None. > > **/ > -void avnd_diq_del(AVND_CB *cb) { > - AVND_DND_MSG_LIST *rec = 0; > - > - do { > -/* pop the record */ > -m_AVND_DIQ_REC_POP(cb, rec); > -if (!rec) break; > - > -/* delete the record */ > -avnd_diq_rec_del(cb, rec); > - } while (1); > - > +void avnd_diq_rec_check_buffered_msg(AVND_CB *cb) { > + if ((cb->dnd_list.head != nullptr)) { > +AVND_DND_MSG_LIST *rec = 0; > +bool found = true; > +while (found) { > + found = false; > + for (rec = cb->dnd_list.head; rec != nullptr;) { > +osafassert(rec->msg.type == AVND_MSG_AVD); > +m_AVND_DIQ_REC_POP(cb, rec); > +// Assignment response had been sent, but not ack because > +// last controller go down, reset msg_id and will be resent later > +if (rec->msg.info.avd->msg_type == AVSV_N2D_INFO_SU_SI_ASSIGN_MSG) { > + if (rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_id != 0) { > +rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_id = 0; > +found = true; > +LOG_NO( > +"Found not-ack su_si_assign msg for SU:'%s', " > +"SI:'%s', ha_state:'%u', msg_act:'%u', single_csi:'%u', " > +"error:'%u', msg_id:'%u'", > +osaf_extended_name_borrow(&rec->msg.info.avd->msg_info > + .n2d_su_si_assign.su_name), > +osaf_extended_name_borrow(&rec->msg.info.avd->msg_info > + .n2d_su_si_assign.si_name), > +rec->msg.info.avd->msg_info.n2d_su_si_assign.ha_state, > +rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_act, > +rec->msg.info.avd->msg_info.n2d_su_si_assign > +.single_csi, > +rec->msg.info.avd->msg_info.n2d_su_si_assign.error, > +rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_id); > + } > + m_AVND_DIQ_REC_PUSH(cb, rec); > +
Re: [devel] [PATCH 1/1] amfnd: Buffered not-ack susi assignment response after both SC go down [#2105]
Hi Minh, I had analysed the traces you attached. Based on that I am able to test that. When MDS returns success patch works fine. Minor correction is needed when MDS return failure. I think susi message should be kept independent of no. of tries in avnd_diq_del(). Thanks Praveen On 18-May-17 12:41 PM, minh chau wrote: > Hi Praveen, > > Some comments in line with [Minh] > > thanks, > Minh > > On 18/05/17 14:54, praveen malviya wrote: >> Hi Minh, >> >> In the description of the ticket there is a log which is : >> " >> Oct 7 18:31:41 SYSTEST-PLD-1 osafamfnd[12467]: NO Assigned >> 'safSi=TestApp_SI4,safApp=TestApp_TwoN' ACTIVE to >> 'safSu=TestApp_SU1,safSg=TestApp_SG1,safApp=TestApp_TwoN' >> Oct 7 18:31:41 SYSTEST-PLD-1 osafamfnd[12467]: NO >> avnd_di_susi_resp_send() deferred as AMF director is offline >> " >> Last line in above log means AMFND was sending the message when it new >> about SC absence state. I think this issue is already fixed during >> #1725 and this published patch is not required. Why? After led set >> message amfnd will anyway send this message. > [Minh] I have reproduced the problem and attached to ticket for your > reference. > Some outlined logs: > The step is stopping SC1, SC2. > In SC2, amfd sent susi assignment req to amfnd-PL3 > May 18 16:32:03.633226 osafamfd [245:245:src/amf/amfd/sgproc.cc:2444] >> > avd_sg_su_si_mod_snd: 'safSu=SU3,safSg=AmfDemoTwon,safApp=AmfDemoTwon', > state 1 > > In PL3, amfnd completed this susi req, and sent susi resp successfully > but it did not reach to amfd-SC2 > May 18 16:32:03.641156 osafamfnd [186:186:src/amf/amfnd/su.cc:0373] >> > avnd_evt_avd_info_su_si_assign_evh: > 'safSu=SU3,safSg=AmfDemoTwon,safApp=AmfDemoTwon' > May 18 16:32:03.641744 osafamfnd [186:186:src/amf/amfnd/di.cc:0866] >> > avnd_di_susi_resp_send: Sending Resp > su=safSu=SU3,safSg=AmfDemoTwon,safApp=AmfDemoTwon, > si=safSi=AmfDemoTwon,safApp=AmfDemoTwon, curr_state=1, prv_state=2 > > amfnd-PL3 is notified NCSMDS_DOWN, amfnd deleted all pending msg waiting > for ack > May 18 16:32:05.568471 osafamfnd [186:186:src/amf/amfnd/di.cc:0629] >> > avnd_evt_mds_avd_dn_evh > May 18 16:32:05.568492 osafamfnd [186:186:src/amf/amfnd/di.cc:0651] WA > AMF director unexpectedly crashed > May 18 16:32:05.568495 osafamfnd [186:186:src/amf/amfnd/di.cc:0701] TR > Delete all pending messages to be sent to AMFD > May 18 16:32:05.568498 osafamfnd [186:186:src/amf/amfnd/di.cc:1353] >> > avnd_diq_rec_del > May 18 16:32:05.568503 osafamfnd [186:186:src/amf/amfnd/di.cc:1369] << > avnd_diq_rec_del > > When SC restarts, amfd-SC1 thinks this assignment being in progress, so > it waits and waits forever > May 18 16:32:28.954967 osafamfd [257:257:src/amf/amfd/su.cc:2588] >> > any_susi_fsm_in: SU:'safSu=SU3,safSg=AmfDemoTwon,safApp=AmfDemoTwon', > check_fsm:5 > May 18 16:32:28.954975 osafamfd [257:257:src/amf/amfd/su.cc:2593] TR > SUSI:'safSu=SU3,safSg=AmfDemoTwon,safApp=AmfDemoTwon,safSi=AmfDemoTwon,safApp=AmfDemoTwon', > > fsm:'5' > May 18 16:32:28.954982 osafamfd [257:257:src/amf/amfd/su.cc:2596] TR Found > May 18 16:32:28.954989 osafamfd [257:257:src/amf/amfd/su.cc:2599] << > any_susi_fsm_in > May 18 16:32:28.954996 osafamfd [257:257:src/amf/amfd/sg.cc:2340] << > any_assignment_in_progress > > This problem is very close to the one you mentioned and fixed in #1725. > In #1725, amfnd surely knows amfd down, so amfnd buffers msg. In #2105, > amfnd sends msg out just before amfnd detects amfd being down. >> >> The logs that I have attached can be ignored. I was simulating the bug >> on different assumptions. >> >> One question regarding the patch: >> If the goal is to fix the issue when the message is being sent and >> system has become SC-less. In this situation, then avnd_mds_send() >> will return, most probably, a failure as MDS will not find the >> destination. In mds failure case, rec->no_retries will not be >> incremented and will remain zero. Now AMFND will process down of SC >> and it will call avnd_diq_del(). In this function, since no_retries is >> zero for this message(first message), the message will be deleted. >> > [Minh]: Thanks, it's good to handle failure code returned from MDS. I > will update the patch >> >> Thanks, >> Praveen >> >> >> On 18-May-17 9:14 AM, minh chau wrote: >>> Hi Praveen, >>> >>> Do you have any idea why @is_avd_down was false that made amfnd to >>> send susi_resp at 12:37:20
Re: [devel] [PATCH 1/1] amfnd: Buffered not-ack susi assignment response after both SC go down [#2105]
Hi Minh, In the description of the ticket there is a log which is : " Oct 7 18:31:41 SYSTEST-PLD-1 osafamfnd[12467]: NO Assigned 'safSi=TestApp_SI4,safApp=TestApp_TwoN' ACTIVE to 'safSu=TestApp_SU1,safSg=TestApp_SG1,safApp=TestApp_TwoN' Oct 7 18:31:41 SYSTEST-PLD-1 osafamfnd[12467]: NO avnd_di_susi_resp_send() deferred as AMF director is offline " Last line in above log means AMFND was sending the message when it new about SC absence state. I think this issue is already fixed during #1725 and this published patch is not required. Why? After led set message amfnd will anyway send this message. The logs that I have attached can be ignored. I was simulating the bug on different assumptions. One question regarding the patch: If the goal is to fix the issue when the message is being sent and system has become SC-less. In this situation, then avnd_mds_send() will return, most probably, a failure as MDS will not find the destination. In mds failure case, rec->no_retries will not be incremented and will remain zero. Now AMFND will process down of SC and it will call avnd_diq_del(). In this function, since no_retries is zero for this message(first message), the message will be deleted. Thanks, Praveen On 18-May-17 9:14 AM, minh chau wrote: > Hi Praveen, > > Do you have any idea why @is_avd_down was false that made amfnd to send > susi_resp at 12:37:20.453974? > It should be true by the end of avnd_evt_mds_avd_dn_evh() at > 12:37:16.741518, is it right? > > Thanks, > Minh > On 17/05/17 21:31, minh chau wrote: >> Hi Praveen, >> >> Thanks for looking at the issue. >> Here is what I am observing >> >> amfnd-PL3 received NCSMDS_DOWN indicating no active amfd >> >> May 17 12:37:16.741308 osafamfnd [8141:8141:src/amf/amfnd/di.cc:0629] >> >> avnd_evt_mds_avd_dn_evh >> May 17 12:37:16.741342 osafamfnd [8141:8141:src/amf/amfnd/di.cc:0651] >> WA AMF director unexpectedly crashed >> May 17 12:37:16.741354 osafamfnd [8141:8141:src/amf/amfnd/di.cc:0701] >> TR Delete all pending messages to be sent to AMFD >> May 17 12:37:16.741379 osafamfnd [8141:8141:src/amf/amfnd/di.cc:0709] >> NO Checking 'safSu=PL-3,safSg=NoRed,safApp=OpenSAF' for pending messages >> May 17 12:37:16.741405 osafamfnd [8141:8141:src/amf/amfnd/di.cc:0709] >> NO Checking 'safSu=SU1,safSg=AmfDemo,safApp=AmfDemo1' for pending >> messages >> May 17 12:37:16.741430 osafamfnd [8141:8141:src/amf/amfnd/di.cc:0709] >> NO Checking 'safSu=SU2,safSg=AmfDemo,safApp=AmfDemo1' for pending >> messages >> May 17 12:37:16.741505 osafamfnd [8141:8141:src/amf/amfnd/tmr.cc:0083] >> TR SC absence timer started >> May 17 12:37:16.741518 osafamfnd [8141:8141:src/amf/amfnd/di.cc:0742] >> << avnd_evt_mds_avd_dn_evh >> >> But a bit later, susi got assigned, and amfnd-PL3 did send this susi >> response (it should not send out and buffer it, since the @is_avd_down >> should be true) >> >> May 17 12:37:20.453974 osafamfnd [8141:8141:src/amf/amfnd/di.cc:0866] >> >> avnd_di_susi_resp_send: Sending Resp >> su=safSu=SU1,safSg=AmfDemo,safApp=AmfDemo1, >> si=safSi=AmfDemo,safApp=AmfDemo1, curr_state=3, prv_state=1 >> ... >> May 17 12:37:20.454083 osafamfnd [8141:8141:src/amf/amfnd/mds.cc:1482] >> >> avnd_mds_send: Msg type '1' >> May 17 12:37:20.454244 osafamfnd [8141:8141:src/amf/amfnd/mds.cc:1537] >> ER ncsmds_api for 0 FAILED, dest=0 >> >> When SC1 restarted, amfd received the very first messages from PL3 >> starting with msg_id=1 (it should be starting from 0) >> >> May 17 12:37:28.398633 osafamfd >> [7686:7686:src/amf/amfd/ndproc.cc:0330] NO Receive message with event >> type:12, msg_type:31, from node:2030f, msg_id:1 >> May 17 12:37:28.413018 osafamfd [7686:7686:src/amf/amfd/ndfsm.cc:0334] >> NO Received node_up_msg from all nodes >> May 17 12:37:28.413069 osafamfd [7686:7686:src/amf/amfd/ndfsm.cc:0254] >> NO Received node_up from 2030f: msg_id 2 >> >> Looks to me something should not happen inside >> avnd_evt_mds_avd_dn_evh(). In this avnd_evt_mds_avd_dn_evh(), >> @is_avd_down should be true, the msg counter should be reset to 0, but >> I do see the SC absence timer started. I couldn't figure how it >> happened for now >> >> Thanks, >> Minh >> >> On 17/05/17 20:03, praveen malviya wrote: >>> What I see is avnd_diq_del() is called as soon as system becomes >>> headless. This will delete all pending messages. But when component >>> will respond during SCs absence a new message will be generated and >>> buffered.
Re: [devel] [PATCH 1/1] amfnd: Buffered not-ack susi assignment response after both SC go down [#2105]
What I see is avnd_diq_del() is called as soon as system becomes headless. This will delete all pending messages. But when component will respond during SCs absence a new message will be generated and buffered. For node_up AMFD will ack the message, but amfnd calls avnd_diq_rec_del() (not avnd_diq_del()) in avnd_di_msg_ack_process(). We need to call avnd_diq_del() in ack message so that msg_id gets updated. Further looking into it.. Thanks. Praveen On 17-May-17 1:50 PM, praveen malviya wrote: > Hi Minh, > > While testing this, I am observing that amfd is dropping the assignment > message because of message id mismatch: > May 17 12:37:39.522117 osafamfd [7686:7686:src/amf/amfd/sgproc.cc:1171] > >> avd_su_si_assign_evh: id:1, node:2030f, act:5, > 'safSu=SU1,safSg=AmfDemo,safApp=AmfDemo1', '', ha:3, err:1, single:0 > > > May 17 12:37:39.522404 osafamfd [7686:7686:src/amf/amfd/ndproc.cc:0075] > WA avd_msg_sanity_chk: invalid msg id 1, msg type 5, from 2030f should be 3 > May 17 12:37:39.522418 osafamfd [7686:7686:src/amf/amfd/sgproc.cc:1777] > << avd_su_si_assign_evh > > I am also looking into this. For your reference I had attached amfd and > amfnd traces from SC-1 and PL-3 respectively in the ticket. > I am working with one controller and one payload. > > > Thanks > Praveen > > On 15-May-17 1:06 PM, Minh Chau wrote: >> When amfnd-payload responds susi assignment response just before both SC >> go down, and that response message does not come to director. Therefore, >> the status of that assignment could be seen as "modifying" in IMM. When >> SC comes back, active amfd will be waiting for that response forever. >> >> Patch checks if a susi assignment response is sent but not-ack just before >> both SC come down, amfnd-payload will buffer it in a way as a susi get >> assigned during SC absence >> --- >>src/amf/amfnd/di.cc | 53 >> + >>1 file changed, 45 insertions(+), 8 deletions(-) >> >> diff --git a/src/amf/amfnd/di.cc b/src/amf/amfnd/di.cc >> index e06b9260d..3776a09dc 100644 >> --- a/src/amf/amfnd/di.cc >> +++ b/src/amf/amfnd/di.cc >> @@ -1282,16 +1282,53 @@ void avnd_di_msg_ack_process(AVND_CB *cb, uint32_t >> mid) { >> Notes : None. >> >> **/ >>void avnd_diq_del(AVND_CB *cb) { >> - AVND_DND_MSG_LIST *rec = 0; >> >> - do { >> -/* pop the record */ >> -m_AVND_DIQ_REC_POP(cb, rec); >> -if (!rec) break; >> + if ((cb->dnd_list.head != nullptr)) { >> +AVND_DND_MSG_LIST *rec = 0; >> +bool found = true; >> +while (found) { >> + found = false; >> + for (rec = cb->dnd_list.head; rec != nullptr; >> + rec = rec->next) { >> +osafassert(rec->msg.type == AVND_MSG_AVD); >> +// delete all pending messages that haven't been sent out >> +if (rec->no_retries == 0) { >> + m_AVND_DIQ_REC_POP(cb, rec); >> + avnd_diq_rec_del(cb, rec); >> + break; >> +} else { >> + // Assignment response had been sent, but not ack because last >> + // controller go down, reset msg_id and will be resent later >> + if (rec->msg.info.avd->msg_type == >> AVSV_N2D_INFO_SU_SI_ASSIGN_MSG) { >> +if (rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_id != 0) { >> + rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_id = 0; >> + found = true; >> + LOG_NO( >> + "Found not-ack su_si_assign msg for SU:'%s', " >> + "SI:'%s', ha_state:'%u', msg_act:'%u', single_csi:'%u', " >> + "error:'%u', msg_id:'%u'", >> + osaf_extended_name_borrow(&rec->msg.info.avd->msg_info >> + .n2d_su_si_assign.su_name), >> + osaf_extended_name_borrow(&rec->msg.info.avd->msg_info >> + .n2d_su_si_assign.si_name), >> + rec->msg.info.avd->msg_info.n2d_su_si_assign.ha_state, >> + rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_act, >> + rec->msg.info.avd->msg_info.n2d_su_si_assign >> + .single_csi, >> +
Re: [devel] [PATCH 1/1] amfd: Check IMM service status before use IMM call [#2416]
Ack. Thanks Praveen On 17-May-17 3:21 AM, Minh Chau wrote: > When Opensaf 2N switchover, amfd tries to update some attributes > to IMM. But this time, IMM is not available since Opensaf 2N SI > is in QUIESCED. > > SC-1 osafamfnd[510]: NO Assigned 'safSi=SC-2N,safApp=OpenSAF' QUIESCED to > 'safSu=SC-1,safSg=2N,safApp=OpenSAF' > SC-1 osafamfd[496]: WA saImmOiRtObjectUpdate of > 'safSISU=safSu=SC-1\,safSg=2N\,safApp=OpenSAF,safSi=SC-2N,safApp=OpenSAF' > osafAmfSISUFsmState failed with 5 > SC-1 osafimmnd[441]: WA ERR_BAD_HANDLE: Handle use is blocked by pending > reply on syncronous call > > This patch corrects the problem by checking IMM service status > before call avd_saImmOiRtXXX_sync. The problem does not appear > previously because IMM update is queued and FiFo::execute() does > check IMM status before executing a job. A check of @avail_state_avd > against SA_AMF_HA_ACTIVE is not enough in avd_saImmOiRtXXX_sync. > This patch does check IMM status in avd_saImmOiRtXXX_sync as similar > as FiFo::execute(). > --- > src/amf/amfd/imm.cc | 35 --- > 1 file changed, 24 insertions(+), 11 deletions(-) > > diff --git a/src/amf/amfd/imm.cc b/src/amf/amfd/imm.cc > index 65df73f73..7b1aa333e 100644 > --- a/src/amf/amfd/imm.cc > +++ b/src/amf/amfd/imm.cc > @@ -131,22 +131,32 @@ static char *StrDup(const char *s) { > // > Job::~Job() {} > > -// > -bool ImmJob::isRunnable(const AVD_CL_CB *cb) { > +// TODO: Make isImmServiceReady as static to limit its scope > +// This function should belong to AVD_CB class as a method > +static bool isImmServiceReady(const AVD_CL_CB *cb) { > TRACE_ENTER(); > bool rc = true; > + > + if (avd_cb->active_services_exist == false) { > + TRACE("No active service"); > + rc = false; > + } > if ((!avd_cb->is_implementer) && > (avd_cb->avail_state_avd == SA_AMF_HA_STANDBY)) { > rc = false; > } > - > if (avd_cb->avd_imm_status == AVD_IMM_INIT_ONGOING) { > TRACE("Already IMM init is going, try again after sometime"); > rc = false; > } > - TRACE_LEAVE(); > + TRACE_LEAVE2("%u:", rc); > return rc; > } > + > +// > +bool ImmJob::isRunnable(const AVD_CL_CB *cb) { > + return isImmServiceReady(cb); > +} > // > AvdJobDequeueResultT ImmObjCreate::exec(const AVD_CL_CB *cb) { > SaAisErrorT rc; > @@ -1707,10 +1717,11 @@ SaAisErrorT avd_saImmOiRtObjectUpdate_sync( > SaImmAttrModificationT_2 attrMod; > const SaImmAttrModificationT_2 *attrMods[] = {&attrMod, nullptr}; > SaImmAttrValueT attrValues[] = {value}; > - > const std::string attribute_name(attributeName); > + bool isImmReady = isImmServiceReady(avd_cb); > + > TRACE_ENTER2("'%s' %s", dn.c_str(), attributeName); > - if (avd_cb->avail_state_avd == SA_AMF_HA_ACTIVE) { > + if (isImmReady == true) { > attrMod.modType = modifyType; > attrMod.modAttr.attrName = attributeName; > attrMod.modAttr.attrValuesNumber = 1; > @@ -1723,7 +1734,7 @@ SaAisErrorT avd_saImmOiRtObjectUpdate_sync( >attributeName, rc); > } > > - if (rc != SA_AIS_OK || avd_cb->avail_state_avd != SA_AMF_HA_ACTIVE) { > + if (rc != SA_AIS_OK || isImmReady == false) { > // Now it will be updated through job queue. > avd_saImmOiRtObjectUpdate(dn, attribute_name, attrValueType, value); > } > @@ -1875,8 +1886,9 @@ void avd_saImmOiRtObjectCreate_sync(const std::string > &className, > TRACE_ENTER2("%s %s", className.c_str(), parentName.c_str()); > > SaAisErrorT rc = SA_AIS_OK; > + bool isImmReady = isImmServiceReady(avd_cb); > > - if (avd_cb->avail_state_avd == SA_AMF_HA_ACTIVE) { > + if (isImmReady == true) { > const SaNameTWrapper parent_name(parentName); > rc = saImmOiRtObjectCreate_2(avd_cb->immOiHandle, > const_cast(className.c_str()), > @@ -1887,7 +1899,7 @@ void avd_saImmOiRtObjectCreate_sync(const std::string > &className, > } > } > > - if (rc != SA_AIS_OK || avd_cb->avail_state_avd != SA_AMF_HA_ACTIVE) { > + if (rc != SA_AIS_OK || isImmReady == false) { > // Now it will be updated through job queue. > avd_saImmOiRtObjectCreate(className, parentName, attrValues); > } > @@ -1930,14 +1942,15 @@ void avd_saImmOiRtObjectCreate(const std::string > &className, > void avd_saImmOiRtObjectDelete_sync(const std::string &dn) { > TRACE_ENTER2("%s", dn.c_str()); > SaAisErrorT rc = SA_AIS_OK; > + bool isImmReady = isImmServiceReady(avd_cb); > > - if (avd_cb->avail_state_avd == SA_AMF_HA_ACTIVE) { > + if (isImmReady == true) { > rc = saImmOiRtObjectDelete_o3(avd_cb->immOiHandle, dn.c_str()); > if (rc != SA_AIS_OK) { > LOG_WA("saImmOiRtObjectDelete_o3 of '%s' failed with %u", dn.c_str(), > rc); > } > } > - if (rc != SA_AIS_OK || avd_cb->avail_state_avd != SA_AMF_HA_ACTIVE) { > + if (rc != SA_AIS_OK || isImmReady == false) { > // Now it will be updated through job queue. > avd_s
Re: [devel] [PATCH 1/1] amfnd: Buffered not-ack susi assignment response after both SC go down [#2105]
Hi Minh, While testing this, I am observing that amfd is dropping the assignment message because of message id mismatch: May 17 12:37:39.522117 osafamfd [7686:7686:src/amf/amfd/sgproc.cc:1171] >> avd_su_si_assign_evh: id:1, node:2030f, act:5, 'safSu=SU1,safSg=AmfDemo,safApp=AmfDemo1', '', ha:3, err:1, single:0 May 17 12:37:39.522404 osafamfd [7686:7686:src/amf/amfd/ndproc.cc:0075] WA avd_msg_sanity_chk: invalid msg id 1, msg type 5, from 2030f should be 3 May 17 12:37:39.522418 osafamfd [7686:7686:src/amf/amfd/sgproc.cc:1777] << avd_su_si_assign_evh I am also looking into this. For your reference I had attached amfd and amfnd traces from SC-1 and PL-3 respectively in the ticket. I am working with one controller and one payload. Thanks Praveen On 15-May-17 1:06 PM, Minh Chau wrote: > When amfnd-payload responds susi assignment response just before both SC > go down, and that response message does not come to director. Therefore, > the status of that assignment could be seen as "modifying" in IMM. When > SC comes back, active amfd will be waiting for that response forever. > > Patch checks if a susi assignment response is sent but not-ack just before > both SC come down, amfnd-payload will buffer it in a way as a susi get > assigned during SC absence > --- > src/amf/amfnd/di.cc | 53 > + > 1 file changed, 45 insertions(+), 8 deletions(-) > > diff --git a/src/amf/amfnd/di.cc b/src/amf/amfnd/di.cc > index e06b9260d..3776a09dc 100644 > --- a/src/amf/amfnd/di.cc > +++ b/src/amf/amfnd/di.cc > @@ -1282,16 +1282,53 @@ void avnd_di_msg_ack_process(AVND_CB *cb, uint32_t > mid) { > Notes : None. > > **/ > void avnd_diq_del(AVND_CB *cb) { > - AVND_DND_MSG_LIST *rec = 0; > > - do { > -/* pop the record */ > -m_AVND_DIQ_REC_POP(cb, rec); > -if (!rec) break; > + if ((cb->dnd_list.head != nullptr)) { > +AVND_DND_MSG_LIST *rec = 0; > +bool found = true; > +while (found) { > + found = false; > + for (rec = cb->dnd_list.head; rec != nullptr; > + rec = rec->next) { > +osafassert(rec->msg.type == AVND_MSG_AVD); > +// delete all pending messages that haven't been sent out > +if (rec->no_retries == 0) { > + m_AVND_DIQ_REC_POP(cb, rec); > + avnd_diq_rec_del(cb, rec); > + break; > +} else { > + // Assignment response had been sent, but not ack because last > + // controller go down, reset msg_id and will be resent later > + if (rec->msg.info.avd->msg_type == AVSV_N2D_INFO_SU_SI_ASSIGN_MSG) > { > +if (rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_id != 0) { > + rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_id = 0; > + found = true; > + LOG_NO( > + "Found not-ack su_si_assign msg for SU:'%s', " > + "SI:'%s', ha_state:'%u', msg_act:'%u', single_csi:'%u', " > + "error:'%u', msg_id:'%u'", > + osaf_extended_name_borrow(&rec->msg.info.avd->msg_info > + .n2d_su_si_assign.su_name), > + osaf_extended_name_borrow(&rec->msg.info.avd->msg_info > + .n2d_su_si_assign.si_name), > + rec->msg.info.avd->msg_info.n2d_su_si_assign.ha_state, > + rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_act, > + rec->msg.info.avd->msg_info.n2d_su_si_assign > + .single_csi, > + rec->msg.info.avd->msg_info.n2d_su_si_assign.error, > + rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_id); > +} > + } else { > +// delete other messages for now > +m_AVND_DIQ_REC_POP(cb, rec); > +avnd_diq_rec_del(cb, rec); > +break; > + } > +} > > -/* delete the record */ > -avnd_diq_rec_del(cb, rec); > - } while (1); > + } > +} > + } > > return; > } > -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] amfd: Check IMM service status before use IMM call [#2416]
Hi Minh, Is this reproducible all the time? I performed 4 switchvoers on default branch but did not observed. Thanks Praveen On 17-May-17 3:21 AM, Minh Chau wrote: > When Opensaf 2N switchover, amfd tries to update some attributes > to IMM. But this time, IMM is not available since Opensaf 2N SI > is in QUIESCED. > > SC-1 osafamfnd[510]: NO Assigned 'safSi=SC-2N,safApp=OpenSAF' QUIESCED to > 'safSu=SC-1,safSg=2N,safApp=OpenSAF' > SC-1 osafamfd[496]: WA saImmOiRtObjectUpdate of > 'safSISU=safSu=SC-1\,safSg=2N\,safApp=OpenSAF,safSi=SC-2N,safApp=OpenSAF' > osafAmfSISUFsmState failed with 5 > SC-1 osafimmnd[441]: WA ERR_BAD_HANDLE: Handle use is blocked by pending > reply on syncronous call > > This patch corrects the problem by checking IMM service status > before call avd_saImmOiRtXXX_sync. The problem does not appear > previously because IMM update is queued and FiFo::execute() does > check IMM status before executing a job. A check of @avail_state_avd > against SA_AMF_HA_ACTIVE is not enough in avd_saImmOiRtXXX_sync. > This patch does check IMM status in avd_saImmOiRtXXX_sync as similar > as FiFo::execute(). > --- > src/amf/amfd/imm.cc | 35 --- > 1 file changed, 24 insertions(+), 11 deletions(-) > > diff --git a/src/amf/amfd/imm.cc b/src/amf/amfd/imm.cc > index 65df73f73..7b1aa333e 100644 > --- a/src/amf/amfd/imm.cc > +++ b/src/amf/amfd/imm.cc > @@ -131,22 +131,32 @@ static char *StrDup(const char *s) { > // > Job::~Job() {} > > -// > -bool ImmJob::isRunnable(const AVD_CL_CB *cb) { > +// TODO: Make isImmServiceReady as static to limit its scope > +// This function should belong to AVD_CB class as a method > +static bool isImmServiceReady(const AVD_CL_CB *cb) { > TRACE_ENTER(); > bool rc = true; > + > + if (avd_cb->active_services_exist == false) { > + TRACE("No active service"); > + rc = false; > + } > if ((!avd_cb->is_implementer) && > (avd_cb->avail_state_avd == SA_AMF_HA_STANDBY)) { > rc = false; > } > - > if (avd_cb->avd_imm_status == AVD_IMM_INIT_ONGOING) { > TRACE("Already IMM init is going, try again after sometime"); > rc = false; > } > - TRACE_LEAVE(); > + TRACE_LEAVE2("%u:", rc); > return rc; > } > + > +// > +bool ImmJob::isRunnable(const AVD_CL_CB *cb) { > + return isImmServiceReady(cb); > +} > // > AvdJobDequeueResultT ImmObjCreate::exec(const AVD_CL_CB *cb) { > SaAisErrorT rc; > @@ -1707,10 +1717,11 @@ SaAisErrorT avd_saImmOiRtObjectUpdate_sync( > SaImmAttrModificationT_2 attrMod; > const SaImmAttrModificationT_2 *attrMods[] = {&attrMod, nullptr}; > SaImmAttrValueT attrValues[] = {value}; > - > const std::string attribute_name(attributeName); > + bool isImmReady = isImmServiceReady(avd_cb); > + > TRACE_ENTER2("'%s' %s", dn.c_str(), attributeName); > - if (avd_cb->avail_state_avd == SA_AMF_HA_ACTIVE) { > + if (isImmReady == true) { > attrMod.modType = modifyType; > attrMod.modAttr.attrName = attributeName; > attrMod.modAttr.attrValuesNumber = 1; > @@ -1723,7 +1734,7 @@ SaAisErrorT avd_saImmOiRtObjectUpdate_sync( >attributeName, rc); > } > > - if (rc != SA_AIS_OK || avd_cb->avail_state_avd != SA_AMF_HA_ACTIVE) { > + if (rc != SA_AIS_OK || isImmReady == false) { > // Now it will be updated through job queue. > avd_saImmOiRtObjectUpdate(dn, attribute_name, attrValueType, value); > } > @@ -1875,8 +1886,9 @@ void avd_saImmOiRtObjectCreate_sync(const std::string > &className, > TRACE_ENTER2("%s %s", className.c_str(), parentName.c_str()); > > SaAisErrorT rc = SA_AIS_OK; > + bool isImmReady = isImmServiceReady(avd_cb); > > - if (avd_cb->avail_state_avd == SA_AMF_HA_ACTIVE) { > + if (isImmReady == true) { > const SaNameTWrapper parent_name(parentName); > rc = saImmOiRtObjectCreate_2(avd_cb->immOiHandle, > const_cast(className.c_str()), > @@ -1887,7 +1899,7 @@ void avd_saImmOiRtObjectCreate_sync(const std::string > &className, > } > } > > - if (rc != SA_AIS_OK || avd_cb->avail_state_avd != SA_AMF_HA_ACTIVE) { > + if (rc != SA_AIS_OK || isImmReady == false) { > // Now it will be updated through job queue. > avd_saImmOiRtObjectCreate(className, parentName, attrValues); > } > @@ -1930,14 +1942,15 @@ void avd_saImmOiRtObjectCreate(const std::string > &className, > void avd_saImmOiRtObjectDelete_sync(const std::string &dn) { > TRACE_ENTER2("%s", dn.c_str()); > SaAisErrorT rc = SA_AIS_OK; > + bool isImmReady = isImmServiceReady(avd_cb); > > - if (avd_cb->avail_state_avd == SA_AMF_HA_ACTIVE) { > + if (isImmReady == true) { > rc = saImmOiRtObjectDelete_o3(avd_cb->immOiHandle, dn.c_str()); > if (rc != SA_AIS_OK) { > LOG_WA("saImmOiRtObjectDelete_o3 of '%s' failed with %u", dn.c_str(), > rc); > } > } > - if (rc != SA_AIS_OK || avd_cb->avail_state_avd != SA_AMF_HA_ACTIVE) { > + if (rc
Re: [devel] [PATCH 1/1] amf: send oper_state when NCS SUs already instantiated [#2443]
Hi Long, This check is very generic. During su restart cases, a PI SU having NPI components will send unnecessary enabled events to AMFD. When AMFD will receive this events it will try to assign this SU and can lead to assignments in other than 2N red models cases. I think check should be moved to the event handler of presence state message. Attached is the patch based on this idea. What do you think? Thanks Praveen On 28-Apr-17 9:42 AM, Long H Buu Nguyen wrote: --- src/amf/amfnd/susm.cc | 10 ++ 1 file changed, 10 insertions(+) diff --git a/src/amf/amfnd/susm.cc b/src/amf/amfnd/susm.cc index 52af63b83..04ced426d 100644 --- a/src/amf/amfnd/susm.cc +++ b/src/amf/amfnd/susm.cc @@ -1608,6 +1608,16 @@ uint32_t avnd_su_pres_fsm_run(AVND_CB *cb, AVND_SU *su, AVND_COMP *comp, /* process state change */ if (prv_st != final_st) rc = avnd_su_pres_st_chng_prc(cb, su, prv_st, final_st); + else { +// If SU has been already instantiated, inform amfd +if (SA_AMF_PRESENCE_INSTANTIATED == final_st && +su_all_pi_comps_instantiated(su) == true) { + if (m_AVND_SU_OPER_STATE_IS_ENABLED(su)) { +TRACE("SU oper state is enabled"); +rc = avnd_di_oper_send(cb, su, 0); + } +} + } done: TRACE_LEAVE2("%u", rc); diff --git a/src/amf/amfnd/susm.cc b/src/amf/amfnd/susm.cc index 04ced42..18b8fc8 100644 --- a/src/amf/amfnd/susm.cc +++ b/src/amf/amfnd/susm.cc @@ -1608,16 +1608,6 @@ uint32_t avnd_su_pres_fsm_run(AVND_CB *cb, AVND_SU *su, AVND_COMP *comp, /* process state change */ if (prv_st != final_st) rc = avnd_su_pres_st_chng_prc(cb, su, prv_st, final_st); - else { -// If SU has been already instantiated, inform amfd -if (SA_AMF_PRESENCE_INSTANTIATED == final_st && -su_all_pi_comps_instantiated(su) == true) { - if (m_AVND_SU_OPER_STATE_IS_ENABLED(su)) { -TRACE("SU oper state is enabled"); -rc = avnd_di_oper_send(cb, su, 0); - } -} - } done: TRACE_LEAVE2("%u", rc); @@ -4133,7 +4123,16 @@ uint32_t avnd_evt_ir_evh(struct avnd_cb_tag *cb, struct avnd_evt_tag *evt) { } TRACE("SU instantiation for PI SUs, running the SU presence state FSM:'%s'", su->name.c_str()); -rc = avnd_su_pres_fsm_run(cb, su, 0, AVND_SU_PRES_FSM_EV_INST); +// If SU has been already instantiated, inform amfd +if ((cb->led_state == AVND_LED_STATE_RED) && +(su->pres == SA_AMF_PRESENCE_INSTANTIATED) && +(su_all_pi_comps_instantiated(su) == true) && +(m_AVND_SU_OPER_STATE_IS_ENABLED(su))) { +TRACE("SU oper state is enabled and pres state is instantiated."); +rc = avnd_di_oper_send(cb, su, 0); +} else { +rc = avnd_su_pres_fsm_run(cb, su, 0, AVND_SU_PRES_FSM_EV_INST); +} } else { if (m_AVND_SU_IS_REG_FAILED(su)) { /* The SU configuration is bad, we cannot do much other transition to -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] amfnd: Buffered not-ack susi assignment response after both SC go down [#2105]
Hi Minh, I am reviewing this patch. Thanks, Praveen On 15-May-17 1:06 PM, Minh Chau wrote: > When amfnd-payload responds susi assignment response just before both SC > go down, and that response message does not come to director. Therefore, > the status of that assignment could be seen as "modifying" in IMM. When > SC comes back, active amfd will be waiting for that response forever. > > Patch checks if a susi assignment response is sent but not-ack just before > both SC come down, amfnd-payload will buffer it in a way as a susi get > assigned during SC absence > --- > src/amf/amfnd/di.cc | 53 > + > 1 file changed, 45 insertions(+), 8 deletions(-) > > diff --git a/src/amf/amfnd/di.cc b/src/amf/amfnd/di.cc > index e06b9260d..3776a09dc 100644 > --- a/src/amf/amfnd/di.cc > +++ b/src/amf/amfnd/di.cc > @@ -1282,16 +1282,53 @@ void avnd_di_msg_ack_process(AVND_CB *cb, uint32_t > mid) { > Notes : None. > > **/ > void avnd_diq_del(AVND_CB *cb) { > - AVND_DND_MSG_LIST *rec = 0; > > - do { > -/* pop the record */ > -m_AVND_DIQ_REC_POP(cb, rec); > -if (!rec) break; > + if ((cb->dnd_list.head != nullptr)) { > +AVND_DND_MSG_LIST *rec = 0; > +bool found = true; > +while (found) { > + found = false; > + for (rec = cb->dnd_list.head; rec != nullptr; > + rec = rec->next) { > +osafassert(rec->msg.type == AVND_MSG_AVD); > +// delete all pending messages that haven't been sent out > +if (rec->no_retries == 0) { > + m_AVND_DIQ_REC_POP(cb, rec); > + avnd_diq_rec_del(cb, rec); > + break; > +} else { > + // Assignment response had been sent, but not ack because last > + // controller go down, reset msg_id and will be resent later > + if (rec->msg.info.avd->msg_type == AVSV_N2D_INFO_SU_SI_ASSIGN_MSG) > { > +if (rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_id != 0) { > + rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_id = 0; > + found = true; > + LOG_NO( > + "Found not-ack su_si_assign msg for SU:'%s', " > + "SI:'%s', ha_state:'%u', msg_act:'%u', single_csi:'%u', " > + "error:'%u', msg_id:'%u'", > + osaf_extended_name_borrow(&rec->msg.info.avd->msg_info > + .n2d_su_si_assign.su_name), > + osaf_extended_name_borrow(&rec->msg.info.avd->msg_info > + .n2d_su_si_assign.si_name), > + rec->msg.info.avd->msg_info.n2d_su_si_assign.ha_state, > + rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_act, > + rec->msg.info.avd->msg_info.n2d_su_si_assign > + .single_csi, > + rec->msg.info.avd->msg_info.n2d_su_si_assign.error, > + rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_id); > +} > + } else { > +// delete other messages for now > +m_AVND_DIQ_REC_POP(cb, rec); > +avnd_diq_rec_del(cb, rec); > +break; > + } > +} > > -/* delete the record */ > -avnd_diq_rec_del(cb, rec); > - } while (1); > + } > +} > + } > > return; > } > -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 0/1] Review Request for amf: send oper_state when NCS SUs already instantiated [#2443]
Hi Long, I will review it by tomorrow. Thanks Praveen On 15-May-17 8:55 AM, Long Nguyen wrote: > Dear maintainers, > > Can you please help to review the patch? > > Thanks so much, > Long Nguyen. > > On 5/9/2017 9:29 AM, Long Nguyen wrote: >> Hi, >> >> Have you had time to look into the patch? >> >> Best regards, >> Long Nguyen. >> >> On 4/28/2017 11:12 AM, Long H Buu Nguyen wrote: >>> Summary: amf: send oper_state when NCS SUs already instantiated [#2443] >>> Review request for Ticket(s): 2443 >>> Peer Reviewer(s): AMF devs >>> Pull request to: AMF maintainers >>> Affected branch(es): develop, release >>> Development branch: ticket-2443 >>> Base revision: 94fe6f2ca5c34bafc86f001807ea08ce39f60a34 >>> Personal repository: git://git.code.sf.net/u/xlobung/review >>> >>> >>> Impacted area Impact y/n >>> >>> Docsn >>> Build systemn >>> RPM/packaging n >>> Configuration files n >>> Startup scripts n >>> SAF servicesn >>> OpenSAF servicesy >>> Core libraries n >>> Samples n >>> Tests n >>> Other n >>> >>> >>> Comments (indicate scope for each "y" above): >>> - >>> Assume after headless, SC-1 becomes ACTIVE. Amfnd in SC-2 sends >>> a node_up >>> message to amfd-SC-1. amfnd-SC-2 will instantiate NCS SUs in SC-2 >>> as soon >>> as amfd-SC-1 receives the node_up message. At the time NCS SUs in >>> SC-2 >>> are INSTANTIATED, if SC-1 is rebooted, amfnd-SC-2 receives >>> NEW_ACTIVE >>> because amfd-SC-2 is set to ACTIVE by RDE. amfnd-SC-2 sends a >>> node_up >>> message to amfd-SC-2. Later, amfnd-SC-2 continues to instantiate >>> NCS SUs >>> in SC-2. However, the NCS SUs in SC-2 are already INSTANTIATED. >>> amfnd-SC-2 >>> does not send oper_state message to amfd-SC-2 because the NCS SU >>> presence >>> states do not change. As a result, amf does not continue with the >>> normal >>> startup process. >>> >>> revision 01dc86166f3ed1b9b46534092089d5bcfaf1ef57 >>> Author:Long H Buu Nguyen >>> Date:Thu, 27 Apr 2017 19:39:09 +0700 >>> >>> amf: send oper_state when NCS SUs already instantiated [#2443] >>> >>> >>> >>> Complete diffstat: >>> -- >>> src/amf/amfnd/susm.cc | 10 ++ >>> 1 file changed, 10 insertions(+) >>> >>> >>> Testing Commands: >>> - >>> As described in the ticket. >>> >>> >>> Testing, Expected Results: >>> -- >>> Opensaf starts successfully. >>> >>> >>> Conditions of Submission: >>> - >>> Ack'ed from reviewers. >>> >>> >>> Arch Built StartedLinux distro >>> --- >>> mipsn n >>> mips64 n n >>> x86 n n >>> x86_64 y y >>> powerpc n n >>> powerpc64 n n >>> >>> >>> Reviewer Checklist: >>> --- >>> [Submitters: make sure that your review doesn't trigger any checkmarks!] >>> >>> >>> Your checkin has not passed review because (see checked entries): >>> >>> ___ Your RR template is generally incomplete; it has too many blank >>> entries >>> that need proper data filled in. >>> >>> ___ You have failed to nominate the proper persons for review and push. >>> >>> ___ Your patches do not have proper short+long header >>> >>> ___ You have grammar/spelling in your header that is unacceptable. >>> >>> ___ You have exceeded a sensible line length in your >>> headers/comments/text. >>> >>> ___ You have failed to put in a proper Trac Ticket # into your commits. >>> >>> ___ You have incorrectly put/left internal data in your comments/files >>> (i.e. internal bug tracking tool IDs, product names etc) >>> >>> ___ You have not given any evidence of testing beyond basic build tests. >>> Demonstrate some level of runtime or other sanity testing. >>> >>> ___ You have ^M present in some of your files. These have to be removed. >>> >>> ___ You have needlessly changed whitespace or added whitespace crimes >>> like trailing spaces, or spaces before tabs. >>> >>> ___ You have mixed real technical changes with whitespace and other >>> cosmetic code cleanup changes. These have to be separate commits. >>> >>> ___ You need to refactor your submission into logical chunks; there is >>> too much content into a single commit. >>> >>> ___ You have extraneous garbage in your review (merge commits etc) >>> >>> ___ You have giant attachments which should never have been sent; >>> Instead you should place your content in a public tree to be >>> pulled. >>> >>> ___ You have too many commits attached to an e-mail; resend as threaded >>> commits, or place in a public tree for a pull. >>> >>> ___ You have resent this content multiple times wi
Re: [devel] [PATCH 1/1] amfd: make auto repair restriction configurable [#2435]
On 26-Apr-17 3:25 PM, Gary Lee wrote: > Hi Praveen > > From talking with some SMF maintainers, some applications could be using > saAmfSUMaintenanceCampaign even though AMF does not. How it is exposed to the application? I guess an SMF application can register for SMF callback SaSmfCampaignCallbackT only which has nothing to do with setting and unsetting in AMF. Also AMF does not send campaign name in notifications before 5.2. Thanks, Praveen > So in terms of backwards compatibility, it is better to put this > configuration in AMF instead. > > Thanks > Gary > > -----Original Message- > From: praveen malviya > Organization: Oracle Corporation > Date: Wednesday, 26 April 2017 at 7:47 pm > To: gary > Cc: > Subject: Re: [PATCH 1/1] amfd: make auto repair restriction configurable > [#2435] > > Hi Gary, > > If I understand, before 5.2, while running campaign SMF used to set > saAmfSUMaintenanceCampaign attribute in affected SUs using CCB > operations. Since AMF feature "Restrictions to auto repair" was not > implemented (implemented in #2144, 5.2), AMF was still taking actions if > components faults while campaign is running and also su maintenance > related notifications were not generated. With 2144 in 5.2 release, SMF > is still setting the saAmfSUMaintenanceCampaign. But if some faults > happens now, AMF will be taking action and also it sends su maintenance > related notification. > > I guess before 5.2 release SMF was just setting and unsetting > saAmfSUMaintenanceCampaign without any other use as #2144 was not > implemented before 5.2? If it is so, it means no application and even > SMF itself does not track this attribute value before 5.2 other than > setting and unsetting? Based on this one solution could be: if SMF > skips the step/command of setting saAmfSUMaintenanceCampaign based on a > new attribute in class SaSmfCampaign. One object of this class is > created for each campaign before starting the campaign. > > Note:All Non-spec configuration attributes are named as "osafAmf*" in > AMF. > > > Thanks > Praveen > > On 21-Apr-17 3:21 PM, Gary Lee wrote: > > This adds a configuration object for AMF at > amfConfig=1,safApp=safAmfService. > > > > A configuration attribute 'amfRestrictAutoRepairEnable' is added. > > This determines if 'suMaintenanceCampaign' should be ignored to > maintain > > legacy AMF behaviour. The default behaviour is not to support auto > repair > > restriction. > > > > To enable restriction: > > immcfg -a amfRestrictAutoRepairEnable=1 > amfConfig=1,safApp=safAmfService > > > > To disable restriction: > > immcfg -a amfRestrictAutoRepairEnable=0 > amfConfig=1,safApp=safAmfService > > --- > > src/amf/Makefile.am| 3 + > > src/amf/amfd/comp.cc | 2 +- > > src/amf/amfd/config.cc | 179 > + > > src/amf/amfd/config.h | 21 + > > src/amf/amfd/imm.cc| 35 ++-- > > src/amf/amfd/ndproc.cc | 4 +- > > src/amf/amfd/node.cc | 4 +- > > src/amf/amfd/sgproc.cc | 14 ++-- > > src/amf/amfd/su.cc | 37 +++-- > > src/amf/amfd/su.h | 3 +- > > src/amf/common/amf_defs.h | 3 + > > src/amf/config/amf_classes.xml | 15 > > src/amf/config/amf_objects.xml | 7 ++ > > 13 files changed, 300 insertions(+), 27 deletions(-) > > create mode 100644 src/amf/amfd/config.cc > > create mode 100644 src/amf/amfd/config.h > > > > diff --git a/src/amf/Makefile.am b/src/amf/Makefile.am > > index 8c175c2..1d6ca60 100644 > > --- a/src/amf/Makefile.am > > +++ b/src/amf/Makefile.am > > @@ -103,6 +103,7 @@ noinst_HEADERS += \ > >src/amf/amfd/clm.h \ > >src/amf/amfd/cluster.h \ > >src/amf/amfd/comp.h \ > > + src/amf/amfd/config.h \ > >src/amf/amfd/csi.h \ > >src/amf/amfd/def.h \ > >src/amf/amfd/evt.h \ > > @@ -213,6 +214,7 @@ bin_testamfd_LDFLAGS = \ > >src/amf/amfd/bin_osafamfd-ckpt_updt.o \ > >src/amf/amfd/bin_osafamfd-clm.o \ > >src/amf/amfd/bin_osafamfd-cluster.o \ > > +
Re: [devel] [PATCH 1/1] amfd: make auto repair restriction configurable [#2435]
Hi Gary, If I understand, before 5.2, while running campaign SMF used to set saAmfSUMaintenanceCampaign attribute in affected SUs using CCB operations. Since AMF feature "Restrictions to auto repair" was not implemented (implemented in #2144, 5.2), AMF was still taking actions if components faults while campaign is running and also su maintenance related notifications were not generated. With 2144 in 5.2 release, SMF is still setting the saAmfSUMaintenanceCampaign. But if some faults happens now, AMF will be taking action and also it sends su maintenance related notification. I guess before 5.2 release SMF was just setting and unsetting saAmfSUMaintenanceCampaign without any other use as #2144 was not implemented before 5.2? If it is so, it means no application and even SMF itself does not track this attribute value before 5.2 other than setting and unsetting? Based on this one solution could be: if SMF skips the step/command of setting saAmfSUMaintenanceCampaign based on a new attribute in class SaSmfCampaign. One object of this class is created for each campaign before starting the campaign. Note:All Non-spec configuration attributes are named as "osafAmf*" in AMF. Thanks Praveen On 21-Apr-17 3:21 PM, Gary Lee wrote: > This adds a configuration object for AMF at amfConfig=1,safApp=safAmfService. > > A configuration attribute 'amfRestrictAutoRepairEnable' is added. > This determines if 'suMaintenanceCampaign' should be ignored to maintain > legacy AMF behaviour. The default behaviour is not to support auto repair > restriction. > > To enable restriction: > immcfg -a amfRestrictAutoRepairEnable=1 amfConfig=1,safApp=safAmfService > > To disable restriction: > immcfg -a amfRestrictAutoRepairEnable=0 amfConfig=1,safApp=safAmfService > --- > src/amf/Makefile.am| 3 + > src/amf/amfd/comp.cc | 2 +- > src/amf/amfd/config.cc | 179 > + > src/amf/amfd/config.h | 21 + > src/amf/amfd/imm.cc| 35 ++-- > src/amf/amfd/ndproc.cc | 4 +- > src/amf/amfd/node.cc | 4 +- > src/amf/amfd/sgproc.cc | 14 ++-- > src/amf/amfd/su.cc | 37 +++-- > src/amf/amfd/su.h | 3 +- > src/amf/common/amf_defs.h | 3 + > src/amf/config/amf_classes.xml | 15 > src/amf/config/amf_objects.xml | 7 ++ > 13 files changed, 300 insertions(+), 27 deletions(-) > create mode 100644 src/amf/amfd/config.cc > create mode 100644 src/amf/amfd/config.h > > diff --git a/src/amf/Makefile.am b/src/amf/Makefile.am > index 8c175c2..1d6ca60 100644 > --- a/src/amf/Makefile.am > +++ b/src/amf/Makefile.am > @@ -103,6 +103,7 @@ noinst_HEADERS += \ > src/amf/amfd/clm.h \ > src/amf/amfd/cluster.h \ > src/amf/amfd/comp.h \ > + src/amf/amfd/config.h \ > src/amf/amfd/csi.h \ > src/amf/amfd/def.h \ > src/amf/amfd/evt.h \ > @@ -213,6 +214,7 @@ bin_testamfd_LDFLAGS = \ > src/amf/amfd/bin_osafamfd-ckpt_updt.o \ > src/amf/amfd/bin_osafamfd-clm.o \ > src/amf/amfd/bin_osafamfd-cluster.o \ > + src/amf/amfd/bin_osafamfd-config.o \ > src/amf/amfd/bin_osafamfd-comp.o \ > src/amf/amfd/bin_osafamfd-compcstype.o \ > src/amf/amfd/bin_osafamfd-comptype.o \ > @@ -300,6 +302,7 @@ bin_osafamfd_SOURCES = \ > src/amf/amfd/comp.cc \ > src/amf/amfd/compcstype.cc \ > src/amf/amfd/comptype.cc \ > + src/amf/amfd/config.cc \ > src/amf/amfd/csi.cc \ > src/amf/amfd/csiattr.cc \ > src/amf/amfd/cstype.cc \ > diff --git a/src/amf/amfd/comp.cc b/src/amf/amfd/comp.cc > index d4b51a6..3e0dc5d 100644 > --- a/src/amf/amfd/comp.cc > +++ b/src/amf/amfd/comp.cc > @@ -152,7 +152,7 @@ void > AVD_COMP::avd_comp_pres_state_set(SaAmfPresenceStateT pres_state) { > (saAmfCompPresenceState == SA_AMF_PRESENCE_TERMINATION_FAILED)) || > ((node->saAmfNodeFailfastOnInstantiationFailure == true) && > (saAmfCompPresenceState == SA_AMF_PRESENCE_INSTANTIATION_FAILED))) > && > - (su->saAmfSUMaintenanceCampaign.empty())) { > + (su->restrict_auto_repair() == false)) { > saflog(LOG_NOTICE, amfSvcUsrName, "%s PresenceState %s => %s", > osaf_extended_name_borrow(&comp_info.name), > avd_pres_state_name[old_state], avd_pres_state_name[pres_state]); > diff --git a/src/amf/amfd/config.cc b/src/amf/amfd/config.cc > new file mode 100644 > index 000..bdb07d2 > --- /dev/null > +++ b/src/amf/amfd/config.cc > @@ -0,0 +1,179 @@ > +#include "amf/amfd/util.h" > +#include "amf/common/amf_util.h" > +#include "amf/amfd/imm.h" > +#include "amf/amfd/node.h" > +#include "amf/amfd/config.h" > + > +static Configuration _configuration; > +Configuration *configuration = &_configuration; > + > +static void ccb_apply_modify_hdlr(struct CcbUtilOperationData *opdata) { > + TRACE_ENTER(); > + const SaImmAttrModificationT_2 *attr_mod; > + i
Re: [devel] [PATCH 1/1] samples: fix $piddir undefined in amf_demo_script [#2410]
Ack, code review only. Thanks, praveen On 12-Apr-17 6:06 PM, Nguyen Luu wrote: > The $piddir variable (containing path to amf_demo comp's pid file) > is missed to be defined in amf_demo_script. > > This could lead to the amf_demo process not getting truely killed > in some cases when cleanup is called (e.g when invoking > saAmfComponentErrorReport()), leaving the process unmanaged by AMF. > --- > samples/amf/sa_aware/amf_demo_script | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/samples/amf/sa_aware/amf_demo_script > b/samples/amf/sa_aware/amf_demo_script > index eb61ac0..a033f53 100755 > --- a/samples/amf/sa_aware/amf_demo_script > +++ b/samples/amf/sa_aware/amf_demo_script > @@ -34,6 +34,7 @@ fi > # Source LSB functions library > . /lib/lsb/init-functions > > +piddir="/tmp" > compname=`echo $SA_AMF_COMPONENT_NAME | md5sum | awk '{print $1}'` > pidfile="$piddir/${compname}.pid" > > -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1 of 1] clms: return TIME_OUT for unlock op if CLMS update to CLM agent fails [#2381]
On 11-Apr-17 9:03 PM, Anders Widell wrote: > A question: > > I noticed that clms_send_is_member_info() is called twice from > clms_imm_node_unlock(), but you only modified the first call (the one > without PLM). Shouldn't the PLM case also be updated? Is a similar > change also applicable in clms_imm_node_shutdown? > Yes, it is needed at-least for PLM case in clms_imm_node_unlock() and may be other places also. But in the reported problem, there is no clue in CLMD traces why CLMS is not able to send message to this client. Also there is not MDS_DOWN for that client. This client was created by this CLMS when it was in check-pointing. I have reproduced issue in one way as given in the comment. But in this case CLMS is getting MDS_DOWN event. So this is not the original case reported in the description. Due to this, I did not go for generic fix. Thanks Praveen > regards, > > Anders Widell > > > On 04/03/2017 10:09 AM, praveen.malv...@oracle.com wrote: >> src/clm/clmd/clms_evt.c | 2 +- >> src/clm/clmd/clms_imm.c | 11 +-- >> src/clm/clmd/clms_mbcsv.c | 2 +- >> src/clm/clmd/clms_util.c | 16 ++-- >> 4 files changed, 17 insertions(+), 14 deletions(-) >> >> >> In this problem, first user performs CLM lock operation on payload and >> restarts it immediately. >> When node was joining, user performs UNLOCK operation on it. Operation >> gets timed out. >> After this, CLM rejects any admin operation on this payload with >> BAD_OP indicating >> that an admin operation is already going on. >> >> For unlock operation when CLM tries to send membership status to >> clients on the node being >> unlocked, MDS returns failure for a client. CLMS does not continue >> with remaining clients and it does >> not reply to IMM client also. This the reason of unlocked opreration >> getting timed out. Also >> CLM does not clear internal parameter related to admin operation. Due >> to this subsequent admin >> operationa on this node are rejected with BAD_OP. There is no clue in >> traces why MDS returned failure. >> >> Generally this can happen when CLMS is trying to send message to a >> bunch of clients and >> one of them goes down. Since CLMS has not processed this DOWN event, >> it will try to send >> message to this cleint for which MDS will return failure. Currently >> issue is reproduced >> on this basis. This patch is based on this. With the patch CLM will >> send membership status to remaining >> clients even if MDS returns failure for a client. Also in such a >> situation, CLMS will return >> TIMEOUT to the user and it will clear internal admin op params so that >> subsequent operation will >> continue. >> >> diff --git a/src/clm/clmd/clms_evt.c b/src/clm/clmd/clms_evt.c >> --- a/src/clm/clmd/clms_evt.c >> +++ b/src/clm/clmd/clms_evt.c >> @@ -137,7 +137,7 @@ CLMS_CLIENT_INFO *clms_client_get_by_id( >> rec = (CLMS_CLIENT_INFO *) >> ncs_patricia_tree_get(&clms_cb->client_db, (uint8_t *)&client_id_net); >> if (NULL == rec) >> -TRACE("client_id: %u lookup failed", client_id); >> +TRACE("client_id: %u not found", client_id); >> return rec; >> } >> diff --git a/src/clm/clmd/clms_imm.c b/src/clm/clmd/clms_imm.c >> --- a/src/clm/clmd/clms_imm.c >> +++ b/src/clm/clmd/clms_imm.c >> @@ -2221,16 +2221,15 @@ uint32_t clms_imm_node_unlock(CLMS_CLUST >> { >> uint32_t rc = NCSCC_RC_SUCCESS; >> TRACE_ENTER2("Node name %s to unlock", nodeop->node_name.value); >> - >> +SaAisErrorT ais_rc = SA_AIS_OK; >> if (nodeop->admin_state == SA_CLM_ADMIN_UNLOCKED) { >> -LOG_ER("Node is already in an unlocked state"); >> +LOG_NO("Node is already in an unlocked state"); >> nodeop->admin_op = 0; >> >> (void)immutil_saImmOiAdminOperationResult(clms_cb->immOiHandle, >> nodeop->curr_admin_inv, >> SA_AIS_ERR_NO_OP); >> rc = NCSCC_RC_FAILURE; >> goto done; >> } >> - >> if (((nodeop->admin_state == SA_CLM_ADMIN_LOCKED) || >> (nodeop->admin_state == SA_CLM_ADMIN_SHUTTING_DOWN))) { >> if (clms_cb->reg_with_plm == SA_FALSE) { >> @@ -2259,9 +2258,9 @@ uint32_t clms_imm_node_unlock(CLMS_CLUST >> clms_node_join_ntf(clms_cb, nodeop); >> rc = clms_send_is_member_info(clms_cb, >> nodeop->node_id, nodeop->member, true); >> -if(rc != NCSCC_RC_SUCCESS) { >> +if (rc != NCSCC_RC_SUCCESS) { >> TRACE("clms_send_is_member_info failed %u", rc); >> -goto done; >> +ais_rc = SA_AIS_ERR_TIMEOUT; >> } >> nodeop->change = SA_CLM_NODE_NO_CHANGE; >> } >> @@ -2322,7 +2321,7 @@ uint32_t clms_imm_node_unlock(CLMS_CLUST >> nodeop->admin_op = 0; >> /* Send node join notification */ >> -(void)immutil_saImmOiAdminOperationResult(clms_cb->immOiHandle, >> nodeop->cur
[devel] [PATCH 0 of 1] Review Request for clm: add tool commands clm-adm, clm-state, clm-find [#2394]
Summary: clm: add tool commands clm-adm, clm-state, clm-find [#2394] Review request for Trac Ticket(s): #2394 Peer Reviewer(s): Anders Pull request to: <> Affected branch(es):Default Development branch: <> Impacted area Impact y/n Docsn Build systemn RPM/packaging n Configuration files n Startup scripts n SAF servicesn OpenSAF servicesn Core libraries n Samples n Tests n Other y Comments (indicate scope for each "y" above): - changeset f4676a9743119edbd642debfe79f3ca43bbb8a47 Author: Praveen Malviya Date: Wed, 05 Apr 2017 17:56:44 +0530 clm: add tool commands clm-adm, clm-state, clm-find [#2394] clm-adm: for performing CLM admin operations on CLM node and cluser. clm- find: for finding out object(s) configured for CLM classes SaClmCluster and SaClmNode. Additional options like membership status and admin state can also be given to find out objects which satisfy this criteria. clm- state: for listing CLM node(s) and their important attributes. Added Files: src/clm/tools/clm-adm src/clm/tools/clm-find src/clm/tools/clm-state Complete diffstat: -- opensaf.spec.in |3 ++ src/clm/Makefile.am |5 src/clm/tools/clm-adm | 67 + src/clm/tools/clm-find | 83 ++ src/clm/tools/clm-state | 107 ++ 5 files changed, 265 insertions(+), 0 deletions(-) Testing Commands: - #clm-find node unlocked safNode=PL-4,safCluster=myClmCluster safNode=PL-5,safCluster=myClmCluster safNode=PL-6,safCluster=myClmCluster safNode=SC-1,safCluster=myClmCluster safNode=SC-2,safCluster=myClmCluster safNode=SC-3,safCluster=myClmCluster #clm-state all adm safNode=PL-4,safCluster=myClmCluster saClmNodeAdminState=UNLOCKED(1) safNode=PL-5,safCluster=myClmCluster saClmNodeAdminState=UNLOCKED(1) safNode=PL-6,safCluster=myClmCluster saClmNodeAdminState=UNLOCKED(1) safNode=SC-1,safCluster=myClmCluster saClmNodeAdminState=UNLOCKED(1) safNode=SC-2,safCluster=myClmCluster saClmNodeAdminState=UNLOCKED(1) safNode=SC-3,safCluster=myClmCluster saClmNodeAdminState=UNLOCKED(1) Testing, Expected Results: -- Already pasted out above. Conditions of Submission: - Ack from reviewer. Arch Built StartedLinux distro --- mipsn n mips64 n n x86 n n x86_64 y y powerpc n n powerpc64 n n Reviewer Checklist: --- [Submitters: make sure that your review doesn't trigger any checkmarks!] Your checkin has not passed review because (see checked entries): ___ Your RR template is generally incomplete; it has too many blank entries that need proper data filled in. ___ You have failed to nominate the proper persons for review and push. ___ Your patches do not have proper short+long header ___ You have grammar/spelling in your header that is unacceptable. ___ You have exceeded a sensible line length in your headers/comments/text. ___ You have failed to put in a proper Trac Ticket # into your commits. ___ You have incorrectly put/left internal data in your comments/files (i.e. internal bug tracking tool IDs, product names etc) ___ You have not given any evidence of testing beyond basic build tests. Demonstrate some level of runtime or other sanity testing. ___ You have ^M present in some of your files. These have to be removed. ___ You have needlessly changed whitespace or added whitespace crimes like trailing spaces, or spaces before tabs. ___ You have mixed real technical changes with whitespace and other cosmetic code cleanup changes. These have to be separate commits. ___ You need to refactor your submission into logical chunks; there is too much content into a single commit. ___ You have extraneous garbage in your review (merge commits etc) ___ You have giant attachments which should never have been sent; Instead you should place your content in a public tree to be pulled. ___ You have too many commits attached to an e-mail; resend as threaded commits, or place in a public tree for a pull. ___ You have resent this content multiple times without a clear indication of what has changed between each re-send. ___ You have failed to adequately and individually address all of the comments and change requests that were proposed in the initial review. _
[devel] [PATCH 1 of 1] clm: add tool commands clm-adm, clm-state, clm-find [#2394]
opensaf.spec.in |3 + src/clm/Makefile.am |5 ++ src/clm/tools/clm-adm | 67 ++ src/clm/tools/clm-find | 83 + src/clm/tools/clm-state | 107 5 files changed, 265 insertions(+), 0 deletions(-) clm-adm: for performing CLM admin operations on CLM node and cluser. clm-find: for finding out object(s) configured for CLM classes SaClmCluster and SaClmNode. Additional options like membership status and admin state can also be given to find out objects which satisfy this criteria. clm-state: for listing CLM node(s) and their important attributes. diff --git a/opensaf.spec.in b/opensaf.spec.in --- a/opensaf.spec.in +++ b/opensaf.spec.in @@ -1444,6 +1444,9 @@ fi %{_bindir}/ntfsubscribe %{_bindir}/ntfread %{_bindir}/saflogger +%{_bindir}/clm-adm +%{_bindir}/clm-find +%{_bindir}/clm-state %if %is_ais_smf %{_bindir}/smf-adm %{_bindir}/smf-find diff --git a/src/clm/Makefile.am b/src/clm/Makefile.am --- a/src/clm/Makefile.am +++ b/src/clm/Makefile.am @@ -166,6 +166,11 @@ bin_osafclmna_LDADD = \ lib/libSaAmf.la \ lib/libopensaf_core.la +dist_bin_SCRIPTS += \ + src/clm/tools/clm-adm \ + src/clm/tools/clm-find \ + src/clm/tools/clm-state + if ENABLE_TESTS bin_PROGRAMS += bin/clmtest diff --git a/src/clm/tools/clm-adm b/src/clm/tools/clm-adm new file mode 100644 --- /dev/null +++ b/src/clm/tools/clm-adm @@ -0,0 +1,67 @@ +#! /bin/sh +# -*- OpenSAF -*- +# +# Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. This file and program are licensed +# under the GNU Lesser General Public License Version 2.1, February 1999. +# The complete license can be accessed from the following location: +# http://opensource.org/licenses/lgpl-license.php +# See the Copying file included with the OpenSAF distribution for full +# licensing terms. +# + +usage() +{ + echo "" + echo "usage: $(basename $0) [-t timeout] " + echo "" + echo "-t: command timeout in seconds (default=60)" + echo "Note: specify options in same order as given in usage." + echo "" +} + +if [ $# -ne 2 -a $# -ne 4 ]; then + usage + exit 1 +fi + +if [ $# -eq 4 ]; then + if [ "$1" = "-t" ]; then +CMD=$3 +args="$1 $2 $4" + else +echo "Invalid option or command as not per usage" +usage +exit 1 + fi +elif [ $# -eq 2 ]; then + CMD=$1 + args=$2 +fi + +UNLOCK=1 +LOCK=2 +SHUTDOWN=3 +RESET=4 +case $CMD in + "unlock") + immadm -o $UNLOCK "$args" + ;; + "lock") + immadm -o $LOCK "$args" + ;; + "shutdown") + immadm -o $SHUTDOWN "$args" + ;; + "reset") + immadm -o $RESET "$args" + ;; + *) + echo "invalid admin operation: $CMD" + exit 1 +esac +exit $? + diff --git a/src/clm/tools/clm-find b/src/clm/tools/clm-find new file mode 100644 --- /dev/null +++ b/src/clm/tools/clm-find @@ -0,0 +1,83 @@ +#! /bin/sh + +# -*- OpenSAF -*- +# +# Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. This file and program are licensed +# under the GNU Lesser General Public License Version 2.1, February 1999. +# The complete license can be accessed from the following location: +# http://opensource.org/licenses/lgpl-license.php +# See the Copying file included with the OpenSAF distribution for full +# licensing terms. +# + +usage() +{ + echo "" + echo "usage: $(basename "$0") []" + echo "" + echo " cluster|node: CLM class names" + echo " locked|unlocked|member|nonmember: list nodes with this status." + echo "" +} + +if [ $# -ne 1 -a $# -ne 2 ] || [ "$1" = "-h" -o "$1" = "--help" ]; then + usage + exit 1 +fi + +if [ $# -eq 2 -a "$1" = "cluster" ]; then + usage + exit 1 +fi + +if [ $# -eq 2 -a "$1" = "node" -a "$2" != "locked" -a "$2" != "unlocked" -a "$2" != "member" -a "$2" != "nonmember" ] +then + usage + exit 1 +fi + +CLASS="" +case "$1" in + cluster) +CLASS="SaClmCluster" +;; + node) +CLASS="SaClmNode" +;; + *) +usage +exit 1 +esac + +list_with_states () +{ + for i in $(immfind -c "$1"); do +if [ "$2" = "locked" -o "$2" = "unlocked" ]; then + value=$(immlist -a "saClmNodeAdminState" "$i" | cut -d = -f2) + if [ "$2" = "locked" -a "$value" -eq 2 ] || [ "$2" = "unlocked" -a "$value" -eq 1 ]; then +echo "$i" + fi +else + value=$(immlist -a "saClmNodeIsMember" "$i" | cut -d = -f2) + if [ "$value" = "" ]; then +
Re: [devel] [PATCH 1 of 1] amfd: correct loop variable initialization [#2404]
Ack, code review only. Thanks Praveen On 05-Apr-17 11:31 AM, nagendr...@oracle.com wrote: > src/amf/amfd/su.cc | 3 ++- > 1 files changed, 2 insertions(+), 1 deletions(-) > > > diff --git a/src/amf/amfd/su.cc b/src/amf/amfd/su.cc > --- a/src/amf/amfd/su.cc > +++ b/src/amf/amfd/su.cc > @@ -1423,9 +1423,10 @@ static SaAisErrorT su_rt_attr_cb(SaImmOi > } > rc = > avd_saImmOiRtObjectUpdate_multival_sync(obj_name, attributeName, > SA_IMM_ATTR_SANAMET, temp, > assigned_si); > + j = 0; > for (AVD_SU_SI_REL *susi = su->list_of_susi; > susi != nullptr; susi = susi->su_next) { > - j = 0; > osaf_extended_name_free(siName + j); > + j = j + 1; > } > delete [] siName; > delete [] attrValues; > -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1 of 1] amfd: ignore node_up if node state is not absent [#2400]
Ack, code review only. Thanks Praveen On 03-Apr-17 6:53 AM, Gary Lee wrote: > src/amf/amfd/ndfsm.cc | 2 +- > 1 files changed, 1 insertions(+), 1 deletions(-) > > > if a duplicate node_up is processed after cluster startup timer is finished, > don't send a reboot order > > diff --git a/src/amf/amfd/ndfsm.cc b/src/amf/amfd/ndfsm.cc > --- a/src/amf/amfd/ndfsm.cc > +++ b/src/amf/amfd/ndfsm.cc > @@ -428,7 +428,7 @@ void avd_node_up_evh(AVD_CL_CB *cb, AVD_ > LOG_WA("Sending node reboot order to node:%s, due to > first node_up_msg after node sync window", > > osaf_extended_name_borrow(&n2d_msg->msg_info.n2d_node_up.node_name)); > avnd->reboot = true; > - } else if (cb->init_state == AVD_APP_STATE) { > + } else if (cb->init_state == AVD_APP_STATE && avnd->node_state > == AVD_AVND_STATE_ABSENT) { > LOG_WA("Sending node reboot order to node:%s, due to > late node_up_msg after cluster startup timeout", > > osaf_extended_name_borrow(&n2d_msg->msg_info.n2d_node_up.node_name)); > avnd->reboot = true; > -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
[devel] [PATCH 1 of 1] clms: return TIME_OUT for unlock op if CLMS update to CLM agent fails [#2381]
src/clm/clmd/clms_evt.c | 2 +- src/clm/clmd/clms_imm.c | 11 +-- src/clm/clmd/clms_mbcsv.c | 2 +- src/clm/clmd/clms_util.c | 16 ++-- 4 files changed, 17 insertions(+), 14 deletions(-) In this problem, first user performs CLM lock operation on payload and restarts it immediately. When node was joining, user performs UNLOCK operation on it. Operation gets timed out. After this, CLM rejects any admin operation on this payload with BAD_OP indicating that an admin operation is already going on. For unlock operation when CLM tries to send membership status to clients on the node being unlocked, MDS returns failure for a client. CLMS does not continue with remaining clients and it does not reply to IMM client also. This the reason of unlocked opreration getting timed out. Also CLM does not clear internal parameter related to admin operation. Due to this subsequent admin operationa on this node are rejected with BAD_OP. There is no clue in traces why MDS returned failure. Generally this can happen when CLMS is trying to send message to a bunch of clients and one of them goes down. Since CLMS has not processed this DOWN event, it will try to send message to this cleint for which MDS will return failure. Currently issue is reproduced on this basis. This patch is based on this. With the patch CLM will send membership status to remaining clients even if MDS returns failure for a client. Also in such a situation, CLMS will return TIMEOUT to the user and it will clear internal admin op params so that subsequent operation will continue. diff --git a/src/clm/clmd/clms_evt.c b/src/clm/clmd/clms_evt.c --- a/src/clm/clmd/clms_evt.c +++ b/src/clm/clmd/clms_evt.c @@ -137,7 +137,7 @@ CLMS_CLIENT_INFO *clms_client_get_by_id( rec = (CLMS_CLIENT_INFO *) ncs_patricia_tree_get(&clms_cb->client_db, (uint8_t *)&client_id_net); if (NULL == rec) - TRACE("client_id: %u lookup failed", client_id); + TRACE("client_id: %u not found", client_id); return rec; } diff --git a/src/clm/clmd/clms_imm.c b/src/clm/clmd/clms_imm.c --- a/src/clm/clmd/clms_imm.c +++ b/src/clm/clmd/clms_imm.c @@ -2221,16 +2221,15 @@ uint32_t clms_imm_node_unlock(CLMS_CLUST { uint32_t rc = NCSCC_RC_SUCCESS; TRACE_ENTER2("Node name %s to unlock", nodeop->node_name.value); - + SaAisErrorT ais_rc = SA_AIS_OK; if (nodeop->admin_state == SA_CLM_ADMIN_UNLOCKED) { - LOG_ER("Node is already in an unlocked state"); + LOG_NO("Node is already in an unlocked state"); nodeop->admin_op = 0; (void)immutil_saImmOiAdminOperationResult(clms_cb->immOiHandle, nodeop->curr_admin_inv, SA_AIS_ERR_NO_OP); rc = NCSCC_RC_FAILURE; goto done; } - if (((nodeop->admin_state == SA_CLM_ADMIN_LOCKED) || (nodeop->admin_state == SA_CLM_ADMIN_SHUTTING_DOWN))) { if (clms_cb->reg_with_plm == SA_FALSE) { @@ -2259,9 +2258,9 @@ uint32_t clms_imm_node_unlock(CLMS_CLUST clms_node_join_ntf(clms_cb, nodeop); rc = clms_send_is_member_info(clms_cb, nodeop->node_id, nodeop->member, true); - if(rc != NCSCC_RC_SUCCESS) { + if (rc != NCSCC_RC_SUCCESS) { TRACE("clms_send_is_member_info failed %u", rc); - goto done; + ais_rc = SA_AIS_ERR_TIMEOUT; } nodeop->change = SA_CLM_NODE_NO_CHANGE; } @@ -2322,7 +2321,7 @@ uint32_t clms_imm_node_unlock(CLMS_CLUST nodeop->admin_op = 0; /* Send node join notification */ - (void)immutil_saImmOiAdminOperationResult(clms_cb->immOiHandle, nodeop->curr_admin_inv, SA_AIS_OK); + (void)immutil_saImmOiAdminOperationResult(clms_cb->immOiHandle, nodeop->curr_admin_inv, ais_rc); clms_node_admin_state_change_ntf(clms_cb, nodeop, SA_CLM_ADMIN_UNLOCKED); done: TRACE_LEAVE(); diff --git a/src/clm/clmd/clms_mbcsv.c b/src/clm/clmd/clms_mbcsv.c --- a/src/clm/clmd/clms_mbcsv.c +++ b/src/clm/clmd/clms_mbcsv.c @@ -103,7 +103,7 @@ static CLMS_CKPT_HDLR ckpt_data_handler[ static uint32_t ckpt_proc_cluster_rec(CLMS_CB * cb, CLMS_CKPT_REC * data) { CLMSV_CKPT_CLUSTER_INFO *param = &data->param.cluster_rec; - + TRACE_ENTER(); osaf_cluster->num_nodes = param->num_nodes; osaf_cluster->init_time = param->init_time; cb->cluster_view_num = param->cluster_view_num; diff --git a/src/clm/clmd/clms_util.c b/src/clm/clmd/clms_util.c --- a/src/clm/clmd/clms_util.c +++ b/src/clm/clmd/clms_util.c @@ -396,6 +396,7 @@ SaClmClusterNotificationT_4 *clms_notbuf if (num == 0) {
[devel] [PATCH 0 of 1] Review Request for clms: return TIME_OUT for unlock op if CLMS update to CLM agent fails [#2381].
Summary: clms: return TIME_OUT for unlock op if CLMS update to CLM agent fails [#2381]. Review request for Trac Ticket(s): #2381 Peer Reviewer(s): Anders Pull request to: <> Affected branch(es): ALL Development branch: <> Impacted area Impact y/n Docsn Build systemn RPM/packaging n Configuration files n Startup scripts n SAF servicesy OpenSAF servicesn Core libraries n Samples n Tests n Other n Comments (indicate scope for each "y" above): - changeset 50f1f31babcc78c50b413497d91497266385b997 Author: Praveen Malviya Date: Mon, 03 Apr 2017 12:47:47 +0530 clms: return TIME_OUT for unlock op if CLMS update to CLM agent fails [#2381]. In this problem, first user performs CLM lock operation on payload and restarts it immediately. When node was joining, user performs UNLOCK operation on it. Operation gets timed out. After this, CLM rejects any admin operation on this payload with BAD_OP indicating that an admin operation is already going on. For unlock operation when CLM tries to send membership status to clients on the node being unlocked, MDS returns failure for a client. CLMS does not continue with remaining clients and it does not reply to IMM client also. This the reason of unlocked opreration getting timed out. Also CLM does not clear internal parameter related to admin operation. Due to this subsequent admin operationa on this node are rejected with BAD_OP. There is no clue in traces why MDS returned failure. Generally this can happen when CLMS is trying to send message to a bunch of clients and one of them goes down. Since CLMS has not processed this DOWN event, it will try to send message to this cleint for which MDS will return failure. Currently issue is reproduced on this basis. This patch is based on this. With the patch CLM will send membership status to remaining clients even if MDS returns failure for a client. Also in such a situation, CLMS will return TIMEOUT to the user and it will clear internal admin op params so that subsequent operation will continue. Complete diffstat: -- src/clm/clmd/clms_evt.c | 2 +- src/clm/clmd/clms_imm.c | 11 +-- src/clm/clmd/clms_mbcsv.c | 2 +- src/clm/clmd/clms_util.c | 16 ++-- 4 files changed, 17 insertions(+), 14 deletions(-) Testing Commands: - 1)Tested the case commented in the ticket. 2)LOCK, SHUTDOWN and UNLOCK operation. Testing, Expected Results: -- CLMS will accept admin operation on node after unlock. Conditions of Submission: - Ack from reviewer. Arch Built StartedLinux distro --- mipsn n mips64 n n x86 n n x86_64 y y powerpc n n powerpc64 n n Reviewer Checklist: --- [Submitters: make sure that your review doesn't trigger any checkmarks!] Your checkin has not passed review because (see checked entries): ___ Your RR template is generally incomplete; it has too many blank entries that need proper data filled in. ___ You have failed to nominate the proper persons for review and push. ___ Your patches do not have proper short+long header ___ You have grammar/spelling in your header that is unacceptable. ___ You have exceeded a sensible line length in your headers/comments/text. ___ You have failed to put in a proper Trac Ticket # into your commits. ___ You have incorrectly put/left internal data in your comments/files (i.e. internal bug tracking tool IDs, product names etc) ___ You have not given any evidence of testing beyond basic build tests. Demonstrate some level of runtime or other sanity testing. ___ You have ^M present in some of your files. These have to be removed. ___ You have needlessly changed whitespace or added whitespace crimes like trailing spaces, or spaces before tabs. ___ You have mixed real technical changes with whitespace and other cosmetic code cleanup changes. These have to be separate commits. ___ You need to refactor your submission into logical chunks; there is too much content into a single commit. ___ You have extraneous garbage in your review (merge commits etc) ___ You have giant attachments which should never have been sent; Instead you should place your content in a public tree to be pulled. ___ You have too many commits attached to an e-mail; resend as threaded commits, or place in a public tree for a pull. ___ Y
[devel] [PATCH 1 of 1] amf: fix track callback when multiple CLM nodes leaves membership[#2372]
src/amf/amfd/clm.cc | 43 ++- src/amf/amfnd/clm.cc | 6 -- 2 files changed, 34 insertions(+), 15 deletions(-) In reported issue, two CLM nodes are locked simultaneously. For one of the nodes, CLM lock gets timed out and user gets REPAIR_PENDING as return code. The two payloads being locked hosts Amf_demo with 2N model. When AMFD gets CLM track callback for PL-3 it starts terminating amf demo on PL-3. When termination of amf_demo still going on, user clm locks PL-4 and AMF gets another track callback with rootcausetentity as PL-4. Callback contains information of PL-3 also as this node is still in pending change phase. AMFD starts terminating amf_demo on PL-4 but at the same time it incorreclty responds for PL-3 with invocationId of PL-4 callback. CLM assumes that for PL-4 change_started completed and sends completion callback for PL-4. In this callback, AMF clears internal flags which monitors the graceful removal of nodes. Since AMF never responds for PL-3 callback, node lock timer expires in CLMD and it sends complete callback to AMF and responds user with REPAIR_PENDING. AMF thinks this is the case of nodefailover and tries to failover PL-3. Patch fixes this problem in both AMFD and AMFND: -to act on CHANGE_START step only once for a node (amfd). -to act on COMPLETE step only when rootCauseEntity matches and if it is graceful removal of node(amfd). -to act only once in tracl callback for COMPLETE step(amfnd). diff --git a/src/amf/amfd/clm.cc b/src/amf/amfd/clm.cc --- a/src/amf/amfd/clm.cc +++ b/src/amf/amfd/clm.cc @@ -203,6 +203,7 @@ static void clm_node_exit_complete(SaClm avd_node_failover(node); m_AVSV_SEND_CKPT_UPDT_ASYNC_UPDT(avd_cb, node, AVSV_CKPT_AVD_NODE_CONFIG); + node->clm_change_start_preceded = false; done: TRACE_LEAVE(); @@ -218,7 +219,7 @@ static void clm_track_cb(const SaClmClus AVD_AVND *node; TRACE_ENTER2("'%llu' '%u' '%u'", invocation, step, error); - + if (error != SA_AIS_OK) { LOG_ER("ClmTrackCallback received in error"); goto done; @@ -232,10 +233,13 @@ static void clm_track_cb(const SaClmClus ** The CLM cluster can be larger than the AMF cluster thus it is not an ** error if the corresponding AMF node cannot be found. */ + TRACE("numberOfMembers:'%u', numberOfItems:'%u'", numberOfMembers, + notificationBuffer->numberOfItems); for (i = 0; i < notificationBuffer->numberOfItems; i++) { notifItem = ¬ificationBuffer->notification[i]; const std::string node_name(Amf::to_string(¬ifItem->clusterNode.nodeName)); + TRACE("i=%u, node:'%s', clusterChange:%u",i, node_name.c_str(), notifItem->clusterChange); switch(step) { case SA_CLM_CHANGE_VALIDATE: if(notifItem->clusterChange == SA_CLM_NODE_LEFT) { @@ -264,6 +268,10 @@ static void clm_track_cb(const SaClmClus } if ( notifItem->clusterChange == SA_CLM_NODE_LEFT || notifItem->clusterChange == SA_CLM_NODE_SHUTDOWN ) { + if (node->clm_change_start_preceded == true) { + TRACE_3("Already got callback for start of this change."); + continue; + } /* invocation to be used by pending clm response */ node->clm_pend_inv = invocation; clm_node_exit_start(node, notifItem->clusterChange); @@ -298,25 +306,34 @@ static void clm_track_cb(const SaClmClus } clm_node_exit_complete(notifItem->clusterNode.nodeId); } else if (strncmp(osaf_extended_name_borrow(rootCauseEntity), "safNode=", 8) == 0) { + const std::string rootCause_clm_node(Amf::to_string(rootCauseEntity)); /* This callback is because of operation on CLM.*/ - if(true == node->clm_change_start_preceded) { + if (true == node->clm_change_start_preceded) { /* We have got a completed callback with start cbk step before, so already locking applications might have been done. So, no action - is needed.*/ - node->clm_change_start_preceded = false; - node->node_info.member = SA_FALSE; - m_AVSV_SEND_CKPT_UPDT_ASYNC_UPD
[devel] [PATCH 0 of 1] Review Request for amf: fix track callback when multiple CLM nodes leaves membership[#2372].
Summary: amf: fix track callback when multiple CLM nodes leaves membership[#2372]. Review request for Trac Ticket(s): #2372 Peer Reviewer(s): AMF devs Pull request to: <> Affected branch(es): ALL Development branch: <> Impacted area Impact y/n Docsn Build systemn RPM/packaging n Configuration files n Startup scripts n SAF servicesy OpenSAF servicesn Core libraries n Samples n Tests n Other n Comments (indicate scope for each "y" above): - changeset 1ee79821742a117265da9a4d5ba60617ac86e2e4 Author: Praveen Malviya Date: Mon, 27 Mar 2017 15:25:18 +0530 amf: fix track callback when multiple CLM nodes leaves membership[#2372]. In reported issue, two CLM nodes are locked simultaneously. For one of the nodes, CLM lock gets timed out and user gets REPAIR_PENDING as return code. The two payloads being locked hosts Amf_demo with 2N model. When AMFD gets CLM track callback for PL-3 it starts terminating amf demo on PL-3. When termination of amf_demo still going on, user clm locks PL-4 and AMF gets another track callback with rootcausetentity as PL-4. Callback contains information of PL-3 also as this node is still in pending change phase. AMFD starts terminating amf_demo on PL-4 but at the same time it incorreclty responds for PL-3 with invocationId of PL-4 callback. CLM assumes that for PL-4 change_started completed and sends completion callback for PL-4. In this callback, AMF clears internal flags which monitors the graceful removal of nodes. Since AMF never responds for PL-3 callback, node lock timer expires in CLMD and it sends complete callback to AMF and responds user with REPAIR_PENDING. AMF thinks this is the case of nodefailover and tries to failover PL-3. Patch fixes this problem in both AMFD and AMFND: -to act on CHANGE_START step only once for a node (amfd). -to act on COMPLETE step only when rootCauseEntity matches and if it is graceful removal of node(amfd). -to act only once in tracl callback for COMPLETE step(amfnd). Complete diffstat: -- src/amf/amfd/clm.cc | 43 ++- src/amf/amfnd/clm.cc | 6 -- 2 files changed, 34 insertions(+), 15 deletions(-) Testing Commands: - tested both the cases mentioned in the ticket. Testing, Expected Results: -- Both the cases passed. Conditions of Submission: - Ack from any reviewer. Arch Built StartedLinux distro --- mipsn n mips64 n n x86 n n x86_64 y y powerpc n n powerpc64 n n Reviewer Checklist: --- [Submitters: make sure that your review doesn't trigger any checkmarks!] Your checkin has not passed review because (see checked entries): ___ Your RR template is generally incomplete; it has too many blank entries that need proper data filled in. ___ You have failed to nominate the proper persons for review and push. ___ Your patches do not have proper short+long header ___ You have grammar/spelling in your header that is unacceptable. ___ You have exceeded a sensible line length in your headers/comments/text. ___ You have failed to put in a proper Trac Ticket # into your commits. ___ You have incorrectly put/left internal data in your comments/files (i.e. internal bug tracking tool IDs, product names etc) ___ You have not given any evidence of testing beyond basic build tests. Demonstrate some level of runtime or other sanity testing. ___ You have ^M present in some of your files. These have to be removed. ___ You have needlessly changed whitespace or added whitespace crimes like trailing spaces, or spaces before tabs. ___ You have mixed real technical changes with whitespace and other cosmetic code cleanup changes. These have to be separate commits. ___ You need to refactor your submission into logical chunks; there is too much content into a single commit. ___ You have extraneous garbage in your review (merge commits etc) ___ You have giant attachments which should never have been sent; Instead you should place your content in a public tree to be pulled. ___ You have too many commits attached to an e-mail; resend as threaded commits, or place in a public tree for a pull. ___ You have resent this content multiple times without a clear indication of what has changed between each re-send. ___ You have failed to adequately and individually address all of
Re: [devel] [PATCH 0 of 3] Review Request for amf: Fix all Cppcheck 1.77 issues [#2341] V3
Hi Mahesh, 5.2RC2 tag is planned on 2017-03-24. I will be reviewing after taggging on Monday. Thanks, Praveen On 22-Mar-17 1:41 PM, A V Mahesh wrote: > Hi AMF dev, > > A gentle reminder for review. > > So far I have received comments for Gary , If you guys doesn't have any > other comments , I will push by tomorrow EOD. > > -AVM > > > On 3/20/2017 10:49 AM, A V Mahesh wrote: >> Summary:amf: Fix all Cppcheck 1.77 issues [#2341] V3 >> Review request for Trac Ticket(s): #2341 >> Peer Reviewer(s): AMF dev >> Pull request to: <> >> Affected branch(es): default >> Development branch: default >> >> >> Impacted area Impact y/n >> >> Docsn >> Build systemn >> RPM/packaging n >> Configuration files n >> Startup scripts n >> SAF servicesn >> OpenSAF servicesy >> Core libraries n >> Samples n >> Tests n >> Other n >> >> >> Comments (indicate scope for each "y" above): >> - >> >> changeset efb3364a0779447b858cbd0cdae1b92f0a2d2716 >> Author:A V Mahesh >> Date:Mon, 20 Mar 2017 10:33:35 +0530 >> >> amfd: Fix all Cppcheck 1.77 issues [#2341] V3 >> >> V3 fixed review commets. >> >> [src/amf/amfd/app.cc:285]: (style) The scope of the variable 'i' >> can be >> reduced. [src/amf/amfd/apptype.cc:137]: (style) Condition 'rc!=0' >> is always >> false [src/amf/amfd/apptype.cc:89] -> [src/amf/amfd/apptype.cc:84]: >> (warning, inconclusive) Either the condition >> '(attr=attributes[i++])!=nullptr' is redundant or there is >> possible null >> pointer dereference: attr. [src/amf/amfd/apptype.cc:129] -> >> [src/amf/amfd/apptype.cc:124]: (warning, inconclusive) Either the >> condition >> '(attr=attributes[i++])!=nullptr' is redundant or there is >> possible null >> pointer dereference: attr. [src/amf/amfd/apptype.cc:69]: (style) >> The scope >> of the variable 'sg_type' can be reduced. >> [src/amf/amfd/chkop.cc:1297] -> >> [src/amf/amfd/chkop.cc:1302]: (style) Variable 'uba' is reassigned >> a value >> before the old one has been used. [src/amf/amfd/ckpt_dec.cc:374] -> >> [src/amf/amfd/ckpt_dec.cc:382]: (style) Variable 'status' is >> reassigned a >> value before the old one has been used. >> [src/amf/amfd/ckpt_dec.cc:573] -> >> [src/amf/amfd/ckpt_dec.cc:577]: (style) Variable 'status' is >> reassigned a >> value before the old one has been used. >> [src/amf/amfd/ckpt_dec.cc:1109]: >> (performance) Prefer prefix ++/-- operators for non-primitive types. >> [src/amf/amfd/ckpt_edu.cc:51] -> [src/amf/amfd/ckpt_edu.cc:56]: >> (style) >> Variable 'rc' is reassigned a value before the old one has been used. >> [src/amf/amfd/ckpt_enc.cc:2281] -> >> [src/amf/amfd/ckpt_enc.cc:2288]: (style) >> Variable 'status' is reassigned a value before the old one has >> been used. >> [src/amf/amfd/ckpt_enc.cc:2314] -> >> [src/amf/amfd/ckpt_enc.cc:2322]: (style) >> Variable 'status' is reassigned a value before the old one has >> been used. >> [src/amf/amfd/ckpt_enc.cc:1951]: (performance) Prefer prefix ++/-- >> operators >> for non-primitive types. [src/amf/amfd/ckpt_enc.cc:1982]: >> (performance) >> Prefer prefix ++/-- operators for non-primitive types. >> [src/amf/amfd/ckpt_enc.cc:2015]: (performance) Prefer prefix ++/-- >> operators >> for non-primitive types. [src/amf/amfd/ckpt_enc.cc:2044]: >> (performance) >> Prefer prefix ++/-- operators for non-primitive types. >> [src/amf/amfd/ckpt_enc.cc:2076]: (performance) Prefer prefix ++/-- >> operators >> for non-primitive types. [src/amf/amfd/ckpt_enc.cc:2111]: >> (performance) >> Prefer prefix ++/-- operators for non-primitive types. >> [src/amf/amfd/ckpt_enc.cc:2151]: (performance) Prefer prefix ++/-- >> operators >> for non-primitive types. [src/amf/amfd/ckpt_enc.cc:2176]: >> (performance) >> Prefer prefix ++/-- operators for non-primitive types. >> [src/amf/amfd/ckpt_enc.cc:2216]: (performance) Prefer prefix ++/-- >> operators >> for non-primitive types. [src/amf/amfd/ckpt_enc.cc:2252]: >> (performance) >> Prefer prefix ++/-- operators for non-primitive types. >> [src/amf/amfd/ckpt_enc.cc:2470]: (performance) Prefer prefix ++/-- >> operators >> for non-primitive types. [src/amf/amfd/clm.cc:452] -> >> [src/amf/amfd/clm.cc:456]: (style, inconclusive) Variable 'error' is >> reassigned a value before the old one has been used if variable is no >> semaphore variable. [src/amf/amfd/clm.cc:473] -> >> [src/amf/amfd/clm.cc:475]: >> (style, inconclusive) Variable 'error' is reassigned a value >> before the old >> one has been used if variable is no semaphore variable. >> [src/amf/amfd/clm.cc:344]: (performance) Prefer prefix ++/-- >> oper
[devel] [PATCH 0 of 1] Review Request for amfd: choose CLM unlocked spare controller for standby role in failover situation[#2387]
Summary: amfd: choose CLM unlocked spare controller for standby role in failover situation[#2387] Review request for Trac Ticket(s): #2387 Peer Reviewer(s): AMF devs Pull request to: <> Affected branch(es): ALL Development branch: <> Impacted area Impact y/n Docsn Build systemn RPM/packaging n Configuration files n Startup scripts n SAF servicesy OpenSAF servicesn Core libraries n Samples n Tests n Other n Comments (indicate scope for each "y" above): - changeset 9d28b2e0bba4e479bc65c0df6d55d6cc3f71ecd4 Author: Praveen Malviya Date: Tue, 21 Mar 2017 15:06:48 +0530 amfd: choose CLM unlocked spare controller for standby role in failover situation[#2387] When spare controllers are configured in cluster, AMF is chosing CLM locked controller for fresh standby controller during failover situation. Currently fresh standby assignment on CLM locked controller in failover situation fails because of issue in SMF #1791. Even if SMF issue is fixed, AMF may choose a CLM locked controller for fresh assignment. This will prohibit a user to use si-swap operation for controller swap. If available, AMF must choose CLM unlocked spare controller for fresh standby assignments. Complete diffstat: -- src/amf/amfd/clm.cc | 2 ++ src/amf/amfd/sg_2n_fsm.cc | 12 +++- 2 files changed, 13 insertions(+), 1 deletions(-) Testing Commands: - Brought 5 controllers up in UML envirnment. CLM lock of SC-3. Stop opensaf on active controller. Testing, Expected Results: -- AMF does not chose CLM locked spare controller for fresh standby assignments. Conditions of Submission: - Ack from reviewers. Arch Built StartedLinux distro --- mipsn n mips64 n n x86 n n x86_64 y y powerpc n n powerpc64 n n Reviewer Checklist: --- [Submitters: make sure that your review doesn't trigger any checkmarks!] Your checkin has not passed review because (see checked entries): ___ Your RR template is generally incomplete; it has too many blank entries that need proper data filled in. ___ You have failed to nominate the proper persons for review and push. ___ Your patches do not have proper short+long header ___ You have grammar/spelling in your header that is unacceptable. ___ You have exceeded a sensible line length in your headers/comments/text. ___ You have failed to put in a proper Trac Ticket # into your commits. ___ You have incorrectly put/left internal data in your comments/files (i.e. internal bug tracking tool IDs, product names etc) ___ You have not given any evidence of testing beyond basic build tests. Demonstrate some level of runtime or other sanity testing. ___ You have ^M present in some of your files. These have to be removed. ___ You have needlessly changed whitespace or added whitespace crimes like trailing spaces, or spaces before tabs. ___ You have mixed real technical changes with whitespace and other cosmetic code cleanup changes. These have to be separate commits. ___ You need to refactor your submission into logical chunks; there is too much content into a single commit. ___ You have extraneous garbage in your review (merge commits etc) ___ You have giant attachments which should never have been sent; Instead you should place your content in a public tree to be pulled. ___ You have too many commits attached to an e-mail; resend as threaded commits, or place in a public tree for a pull. ___ You have resent this content multiple times without a clear indication of what has changed between each re-send. ___ You have failed to adequately and individually address all of the comments and change requests that were proposed in the initial review. ___ You have a misconfigured ~/.hgrc file (i.e. username, email etc) ___ Your computer have a badly configured date and time; confusing the the threaded patch review. ___ Your changes affect IPC mechanism, and you don't present any results for in-service upgradability test. ___ Your changes affect user manual and documentation, your patch series do not contain the patch that updates the Doxygen manual. -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Open
[devel] [PATCH 1 of 1] amfd: choose CLM unlocked spare controller for standby role in failover situation[#2387]
src/amf/amfd/clm.cc | 2 ++ src/amf/amfd/sg_2n_fsm.cc | 12 +++- 2 files changed, 13 insertions(+), 1 deletions(-) When spare controllers are configured in cluster, AMF is chosing CLM locked controller for fresh standby controller during failover situation. Currently fresh standby assignment on CLM locked controller in failover situation fails because of issue in SMF #1791. Even if SMF issue is fixed, AMF may choose a CLM locked controller for fresh assignment. This will prohibit a user to use si-swap operation for controller swap. If available, AMF must choose CLM unlocked spare controller for fresh standby assignments. diff --git a/src/amf/amfd/clm.cc b/src/amf/amfd/clm.cc --- a/src/amf/amfd/clm.cc +++ b/src/amf/amfd/clm.cc @@ -202,6 +202,7 @@ static void clm_node_exit_complete(SaClm } avd_node_failover(node); + m_AVSV_SEND_CKPT_UPDT_ASYNC_UPDT(avd_cb, node, AVSV_CKPT_AVD_NODE_CONFIG); done: TRACE_LEAVE(); @@ -304,6 +305,7 @@ static void clm_track_cb(const SaClmClus is needed.*/ node->clm_change_start_preceded = false; node->node_info.member = SA_FALSE; + m_AVSV_SEND_CKPT_UPDT_ASYNC_UPDT(avd_cb, node, AVSV_CKPT_AVD_NODE_CONFIG); } else { diff --git a/src/amf/amfd/sg_2n_fsm.cc b/src/amf/amfd/sg_2n_fsm.cc --- a/src/amf/amfd/sg_2n_fsm.cc +++ b/src/amf/amfd/sg_2n_fsm.cc @@ -659,7 +659,17 @@ static AVD_SU *avd_sg_2n_su_chose_asgn(A for (const auto& iter : sg->list_of_su) { if (iter->saAmfSuReadinessState == SA_AMF_READINESS_IN_SERVICE && iter->list_of_susi == AVD_SU_SI_REL_NULL) { - s_su = iter; + + /* Assign standby for MW SU on CLM enabled node. If not available + then choose based on rank(list_of_su is based on rank).*/ + if (s_su == nullptr) + s_su = iter; + if (iter->sg_of_su->sg_ncs_spec == true) { + if (iter->su_on_node->node_info.member == SA_FALSE) + continue; + else + s_su = iter; + } break; } } -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1 of 1] amfd: remove assignments from lower ranked SU while adjusting SI assignments [#2268]
On 21-Mar-17 12:57 PM, Gary Lee wrote: > Hi Praveen > > I think it might be nicer to reduce the 2 else branches (ie. count > > si->saAmfSINumCurrActiveAssignments and count < > si->saAmfSINumCurrActiveAssignments) to a single else block, with a comment > saying adjustments are required. > The two else branches are required because in one case si->saAmfSINumCurrActiveAssignments is updated before actually adjusting the assignment and in other case after adjusting the assignments to avoid asserts in the code and also for fresh assignments. > Thanks > Gary > > -----Original Message- > From: praveen malviya > Organization: Oracle Corporation > Date: Tuesday, 21 March 2017 at 4:49 pm > To: gary , , > , > Cc: > Subject: Re: [PATCH 1 of 1] amfd: remove assignments from lower ranked SU > while adjusting SI assignments [#2268] > > Hi Gary, > > Thanks for the reviewing the patch. > For readability I had kept separate handling for new definition in the > if block. > > Now I will push patch with below refactoring : > /* Check if we need to readjust the SI assignments as > PrefActiveAssignments > got changed */ > uint32_t count = mod_pref_assignments; > if (mod_pref_assignments == 0) { > //Zero is set for using PrefAssignedSus > as default arguments. > count = > si->sg_of_si->pref_assigned_sus(); > } > if (count == > si->saAmfSINumCurrActiveAssignments ) { > TRACE("Assignments are equal updating > the SI status "); > si->saAmfSIPrefActiveAssignments = > mod_pref_assignments; > } else if (count > > si->saAmfSINumCurrActiveAssignments) { > si->saAmfSIPrefActiveAssignments = > mod_pref_assignments; > si->adjust_si_assignments(count); > } else if (count < > si->saAmfSINumCurrActiveAssignments) { > si->adjust_si_assignments(count); > si->saAmfSIPrefActiveAssignments = > mod_pref_assignments; > } > TRACE("Modified saAmfSIPrefActiveAssignments is > '%u'", si->saAmfSIPrefActiveAssignments); > si->update_ass_state(); > > > Thanks, > Praveen > > On 21-Mar-17 7:50 AM, Gary Lee wrote: > > Hi Praveen > > > > Ack (review only + regression tests run) with minor comment below. > > > > Thanks > > Gary > > > > -Original Message- > > From: > > Date: Friday, 17 March 2017 at 8:22 pm > > To: , , gary > , > > Cc: > > Subject: [PATCH 1 of 1] amfd: remove assignments from lower ranked SU > while adjusting SI assignments [#2268] > > > > src/amf/amfd/si.cc | 92 > +++--- > > 1 files changed, 46 insertions(+), 46 deletions(-) > > > > > > In N-Way Active mode, when saAmfSIPrefActiveAssignments is reduced, > > AMFD removes assignments from higher ranked SU when siranked su is > not configured and lower > > ranked SU have assignments. > > Similar issue in N-Way model when SiPrefStandbyAssignment is > reduced. Also AMFD is not > > checking HA state of susi and tries to delete active susi and > crashes. > > > > Patch fixes the problem by removing assignments from lower ranked > SU. > > > > diff --git a/src/amf/amfd/si.cc b/src/amf/amfd/si.cc > > --- a/src/amf/amfd/si.cc > > +++ b/src/amf/amfd/si.cc > > @@ -1052,8 +1052,7 @@ done: > > */ > > void AVD_SI::adjust_si_assignments(const uint32_t > mod_pref_assignments) > > { > > - AVD_SU_SI_REL *sisu, *tmp_sisu; > > - uint32_t no_of_sisus_to_delete; > > + AVD_SU_SI_REL *sisu; > > uint32_t i = 0; > > > > TRACE_ENTER2("for SI:%s ", name.c_str()); > > @@ -1073,31 +1072,24 @@ void AVD_SI::adjust_si_assignments(const > > TR
Re: [devel] [PATCH 1 of 1] amfd: remove assignments from lower ranked SU while adjusting SI assignments [#2268]
Hi Gary, Thanks for the reviewing the patch. For readability I had kept separate handling for new definition in the if block. Now I will push patch with below refactoring : /* Check if we need to readjust the SI assignments as PrefActiveAssignments got changed */ uint32_t count = mod_pref_assignments; if (mod_pref_assignments == 0) { //Zero is set for using PrefAssignedSus as default arguments. count = si->sg_of_si->pref_assigned_sus(); } if (count == si->saAmfSINumCurrActiveAssignments ) { TRACE("Assignments are equal updating the SI status "); si->saAmfSIPrefActiveAssignments = mod_pref_assignments; } else if (count > si->saAmfSINumCurrActiveAssignments) { si->saAmfSIPrefActiveAssignments = mod_pref_assignments; si->adjust_si_assignments(count); } else if (count < si->saAmfSINumCurrActiveAssignments) { si->adjust_si_assignments(count); si->saAmfSIPrefActiveAssignments = mod_pref_assignments; } TRACE("Modified saAmfSIPrefActiveAssignments is '%u'", si->saAmfSIPrefActiveAssignments); si->update_ass_state(); Thanks, Praveen On 21-Mar-17 7:50 AM, Gary Lee wrote: > Hi Praveen > > Ack (review only + regression tests run) with minor comment below. > > Thanks > Gary > > -Original Message- > From: > Date: Friday, 17 March 2017 at 8:22 pm > To: , , gary > , > Cc: > Subject: [PATCH 1 of 1] amfd: remove assignments from lower ranked SU while > adjusting SI assignments [#2268] > > src/amf/amfd/si.cc | 92 > +++--- > 1 files changed, 46 insertions(+), 46 deletions(-) > > > In N-Way Active mode, when saAmfSIPrefActiveAssignments is reduced, > AMFD removes assignments from higher ranked SU when siranked su is not > configured and lower > ranked SU have assignments. > Similar issue in N-Way model when SiPrefStandbyAssignment is reduced. > Also AMFD is not > checking HA state of susi and tries to delete active susi and crashes. > > Patch fixes the problem by removing assignments from lower ranked SU. > > diff --git a/src/amf/amfd/si.cc b/src/amf/amfd/si.cc > --- a/src/amf/amfd/si.cc > +++ b/src/amf/amfd/si.cc > @@ -1052,8 +1052,7 @@ done: > */ > void AVD_SI::adjust_si_assignments(const uint32_t mod_pref_assignments) > { > - AVD_SU_SI_REL *sisu, *tmp_sisu; > - uint32_t no_of_sisus_to_delete; > + AVD_SU_SI_REL *sisu; > uint32_t i = 0; > > TRACE_ENTER2("for SI:%s ", name.c_str()); > @@ -1073,31 +1072,24 @@ void AVD_SI::adjust_si_assignments(const > TRACE("No New assignments are been done SI:%s", > name.c_str()); > } > } else { > - no_of_sisus_to_delete = saAmfSINumCurrActiveAssignments > - > - mod_pref_assignments; > - > - /* Get the sisu pointer from the si->list_of_sisu list > from which > - no of sisus need to be deleted based on SI ranked SU */ > - sisu = tmp_sisu = list_of_sisu; > - for( i = 0; i < no_of_sisus_to_delete && nullptr != > tmp_sisu; i++ ) { > - tmp_sisu = tmp_sisu->si_next; > + if (list_of_sisu == nullptr) > + return; > + /* > +avd_susi_create() keeps sisus in list_of_sisu in > order from highest > +ranked to lowest ranked. > +Keep mod_pref_assignments in list_of_sisu from > beginning and delete others. > + */ > + sisu = list_of_sisu; > + for( i = 0; ((i < mod_pref_assignments) && (sisu != > nullptr)); i++ ) { > + sisu = sisu->si_next; > } > - while( tmp_sisu && (tmp_sisu->si_next != nullptr) ) { > - sisu = sisu->si_next; > - tmp_sisu = tmp_sisu->si_next; > - } > - > - for( i = 0; i < no_of_sisus_to_delete && (nullptr != > sisu); i++ ) { > - /* Send quiesced request for the sisu that > needs tobe deleted */ > + for( ; sisu != nullptr; sisu = sisu->si_next) { > if (avd_susi_mod_send(sisu, SA_AMF_HA_QUIESCED) > == NCSCC_RC_SUCCESS) { > -
Re: [devel] [PATCH 1 of 1] amfd: remove assignments from lower ranked SU while adjusting SI assignments [#2268]
Hi All, Please review this patch and provide your feedback. I want to push it on RC2. Thanks, Praveen On 17-Mar-17 2:52 PM, praveen.malv...@oracle.com wrote: > src/amf/amfd/si.cc | 92 > +++--- > 1 files changed, 46 insertions(+), 46 deletions(-) > > > In N-Way Active mode, when saAmfSIPrefActiveAssignments is reduced, > AMFD removes assignments from higher ranked SU when siranked su is not > configured and lower > ranked SU have assignments. > Similar issue in N-Way model when SiPrefStandbyAssignment is reduced. Also > AMFD is not > checking HA state of susi and tries to delete active susi and crashes. > > Patch fixes the problem by removing assignments from lower ranked SU. > > diff --git a/src/amf/amfd/si.cc b/src/amf/amfd/si.cc > --- a/src/amf/amfd/si.cc > +++ b/src/amf/amfd/si.cc > @@ -1052,8 +1052,7 @@ done: > */ > void AVD_SI::adjust_si_assignments(const uint32_t mod_pref_assignments) > { > - AVD_SU_SI_REL *sisu, *tmp_sisu; > - uint32_t no_of_sisus_to_delete; > + AVD_SU_SI_REL *sisu; > uint32_t i = 0; > > TRACE_ENTER2("for SI:%s ", name.c_str()); > @@ -1073,31 +1072,24 @@ void AVD_SI::adjust_si_assignments(const > TRACE("No New assignments are been done SI:%s", > name.c_str()); > } > } else { > - no_of_sisus_to_delete = saAmfSINumCurrActiveAssignments > - > - mod_pref_assignments; > - > - /* Get the sisu pointer from the si->list_of_sisu list > from which > - no of sisus need to be deleted based on SI ranked SU */ > - sisu = tmp_sisu = list_of_sisu; > - for( i = 0; i < no_of_sisus_to_delete && nullptr != > tmp_sisu; i++ ) { > - tmp_sisu = tmp_sisu->si_next; > + if (list_of_sisu == nullptr) > + return; > + /* > +avd_susi_create() keeps sisus in list_of_sisu in > order from highest > +ranked to lowest ranked. > +Keep mod_pref_assignments in list_of_sisu from > beginning and delete others. > + */ > + sisu = list_of_sisu; > + for( i = 0; ((i < mod_pref_assignments) && (sisu != > nullptr)); i++ ) { > + sisu = sisu->si_next; > } > - while( tmp_sisu && (tmp_sisu->si_next != nullptr) ) { > - sisu = sisu->si_next; > - tmp_sisu = tmp_sisu->si_next; > - } > - > - for( i = 0; i < no_of_sisus_to_delete && (nullptr != > sisu); i++ ) { > - /* Send quiesced request for the sisu that > needs tobe deleted */ > + for( ; sisu != nullptr; sisu = sisu->si_next) { > if (avd_susi_mod_send(sisu, SA_AMF_HA_QUIESCED) > == NCSCC_RC_SUCCESS) { > - /* Add SU to su_opr_list */ > avd_sg_su_oper_list_add(avd_cb, > sisu->su, false); > } > - sisu = sisu->si_next; > } > - /* Change the SG FSM to AVD_SG_FSM_SG_REALIGN if > assignment is sent.*/ > - if (i > 0) > - sg_of_si->set_fsm_state(AVD_SG_FSM_SG_REALIGN); > + /* Change the SG FSM to AVD_SG_FSM_SG_REALIGN as > assignment is sent.*/ > + sg_of_si->set_fsm_state(AVD_SG_FSM_SG_REALIGN); > } > } > if( sg_of_si->sg_redundancy_model == SA_AMF_N_WAY_REDUNDANCY_MODEL ) { > @@ -1107,30 +1099,28 @@ void AVD_SI::adjust_si_assignments(const > LOG_ER("SI new assignmemts failed SI:%s", > name.c_str()); > } > } else { > - no_of_sisus_to_delete = 0; > - no_of_sisus_to_delete = > saAmfSINumCurrStandbyAssignments - > - mod_pref_assignments; > - > - /* Get the sisu pointer from the si->list_of_sisu list > from which > - no of sisus need to be deleted based on SI ranked SU */ > - sisu = tmp_sisu = list_of_sisu; > - for(i = 0; i < no_of_sisus_to_delete && (nullptr != > tmp_sisu); i++) { > - tmp_sisu = tmp_sisu->si_next; > + if (list_of_sisu == nullptr) > + return; > + /* > +avd_susi_create() keeps sisus in list_of_sisu in > order from highest > +ranked to lowest ranked. > +
[devel] [PATCH 0 of 1] Review Request for amfd: remove assignments from lower ranked SU while adjusting SI assignments [#2268]
Summary: amfd: remove assignments from lower ranked SU while adjusting SI assignments [#2268] Review request for Trac Ticket(s): #2268 Peer Reviewer(s): AMF devs Pull request to: <> Affected branch(es): ALL Development branch: <> Impacted area Impact y/n Docsn Build systemn RPM/packaging n Configuration files n Startup scripts n SAF servicesy OpenSAF servicesn Core libraries n Samples n Tests n Other n Comments (indicate scope for each "y" above): - changeset 2d69e15801b87ac788b64a8287353616fd7e0c66 Author: Praveen Malviya Date: Fri, 17 Mar 2017 14:48:12 +0530 amfd: remove assignments from lower ranked SU while adjusting SI assignments [#2268] In N-Way Active mode, when saAmfSIPrefActiveAssignments is reduced, AMFD removes assignments from higher ranked SU when siranked su is not configured and lower ranked SU have assignments. Similar issue in N-Way model when SiPrefStandbyAssignment is reduced. Also AMFD is not checking HA state of susi and tries to delete active susi and crashes. Patch fixes the problem by removing assignments from lower ranked SU. Complete diffstat: -- src/amf/amfd/si.cc | 92 ++-- 1 files changed, 46 insertions(+), 46 deletions(-) Testing Commands: - Tested both N-WAY model and N-Way model configurations by adjsuting SI assignments params when SI has ranked SU configured and not configured. Also tested configuration without siranked su having ranks in SU. Testing, Expected Results: -- AMF removes assignments from lower ranked SUs or sus having lower ranks. Conditions of Submission: - Ack from any reviewer before RC2. Arch Built StartedLinux distro --- mipsn n mips64 n n x86 n n x86_64 y y powerpc n n powerpc64 n n Reviewer Checklist: --- [Submitters: make sure that your review doesn't trigger any checkmarks!] Your checkin has not passed review because (see checked entries): ___ Your RR template is generally incomplete; it has too many blank entries that need proper data filled in. ___ You have failed to nominate the proper persons for review and push. ___ Your patches do not have proper short+long header ___ You have grammar/spelling in your header that is unacceptable. ___ You have exceeded a sensible line length in your headers/comments/text. ___ You have failed to put in a proper Trac Ticket # into your commits. ___ You have incorrectly put/left internal data in your comments/files (i.e. internal bug tracking tool IDs, product names etc) ___ You have not given any evidence of testing beyond basic build tests. Demonstrate some level of runtime or other sanity testing. ___ You have ^M present in some of your files. These have to be removed. ___ You have needlessly changed whitespace or added whitespace crimes like trailing spaces, or spaces before tabs. ___ You have mixed real technical changes with whitespace and other cosmetic code cleanup changes. These have to be separate commits. ___ You need to refactor your submission into logical chunks; there is too much content into a single commit. ___ You have extraneous garbage in your review (merge commits etc) ___ You have giant attachments which should never have been sent; Instead you should place your content in a public tree to be pulled. ___ You have too many commits attached to an e-mail; resend as threaded commits, or place in a public tree for a pull. ___ You have resent this content multiple times without a clear indication of what has changed between each re-send. ___ You have failed to adequately and individually address all of the comments and change requests that were proposed in the initial review. ___ You have a misconfigured ~/.hgrc file (i.e. username, email etc) ___ Your computer have a badly configured date and time; confusing the the threaded patch review. ___ Your changes affect IPC mechanism, and you don't present any results for in-service upgradability test. ___ Your changes affect user manual and documentation, your patch series do not contain the patch that updates the Doxygen manual. -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot _
[devel] [PATCH 1 of 1] amfd: remove assignments from lower ranked SU while adjusting SI assignments [#2268]
src/amf/amfd/si.cc | 92 +++--- 1 files changed, 46 insertions(+), 46 deletions(-) In N-Way Active mode, when saAmfSIPrefActiveAssignments is reduced, AMFD removes assignments from higher ranked SU when siranked su is not configured and lower ranked SU have assignments. Similar issue in N-Way model when SiPrefStandbyAssignment is reduced. Also AMFD is not checking HA state of susi and tries to delete active susi and crashes. Patch fixes the problem by removing assignments from lower ranked SU. diff --git a/src/amf/amfd/si.cc b/src/amf/amfd/si.cc --- a/src/amf/amfd/si.cc +++ b/src/amf/amfd/si.cc @@ -1052,8 +1052,7 @@ done: */ void AVD_SI::adjust_si_assignments(const uint32_t mod_pref_assignments) { - AVD_SU_SI_REL *sisu, *tmp_sisu; - uint32_t no_of_sisus_to_delete; + AVD_SU_SI_REL *sisu; uint32_t i = 0; TRACE_ENTER2("for SI:%s ", name.c_str()); @@ -1073,31 +1072,24 @@ void AVD_SI::adjust_si_assignments(const TRACE("No New assignments are been done SI:%s", name.c_str()); } } else { - no_of_sisus_to_delete = saAmfSINumCurrActiveAssignments - - mod_pref_assignments; - - /* Get the sisu pointer from the si->list_of_sisu list from which - no of sisus need to be deleted based on SI ranked SU */ - sisu = tmp_sisu = list_of_sisu; - for( i = 0; i < no_of_sisus_to_delete && nullptr != tmp_sisu; i++ ) { - tmp_sisu = tmp_sisu->si_next; + if (list_of_sisu == nullptr) + return; + /* + avd_susi_create() keeps sisus in list_of_sisu in order from highest + ranked to lowest ranked. + Keep mod_pref_assignments in list_of_sisu from beginning and delete others. +*/ + sisu = list_of_sisu; + for( i = 0; ((i < mod_pref_assignments) && (sisu != nullptr)); i++ ) { + sisu = sisu->si_next; } - while( tmp_sisu && (tmp_sisu->si_next != nullptr) ) { - sisu = sisu->si_next; - tmp_sisu = tmp_sisu->si_next; - } - - for( i = 0; i < no_of_sisus_to_delete && (nullptr != sisu); i++ ) { - /* Send quiesced request for the sisu that needs tobe deleted */ + for( ; sisu != nullptr; sisu = sisu->si_next) { if (avd_susi_mod_send(sisu, SA_AMF_HA_QUIESCED) == NCSCC_RC_SUCCESS) { - /* Add SU to su_opr_list */ avd_sg_su_oper_list_add(avd_cb, sisu->su, false); } - sisu = sisu->si_next; } - /* Change the SG FSM to AVD_SG_FSM_SG_REALIGN if assignment is sent.*/ - if (i > 0) - sg_of_si->set_fsm_state(AVD_SG_FSM_SG_REALIGN); + /* Change the SG FSM to AVD_SG_FSM_SG_REALIGN as assignment is sent.*/ + sg_of_si->set_fsm_state(AVD_SG_FSM_SG_REALIGN); } } if( sg_of_si->sg_redundancy_model == SA_AMF_N_WAY_REDUNDANCY_MODEL ) { @@ -1107,30 +1099,28 @@ void AVD_SI::adjust_si_assignments(const LOG_ER("SI new assignmemts failed SI:%s", name.c_str()); } } else { - no_of_sisus_to_delete = 0; - no_of_sisus_to_delete = saAmfSINumCurrStandbyAssignments - - mod_pref_assignments; - - /* Get the sisu pointer from the si->list_of_sisu list from which - no of sisus need to be deleted based on SI ranked SU */ - sisu = tmp_sisu = list_of_sisu; - for(i = 0; i < no_of_sisus_to_delete && (nullptr != tmp_sisu); i++) { - tmp_sisu = tmp_sisu->si_next; + if (list_of_sisu == nullptr) + return; + /* + avd_susi_create() keeps sisus in list_of_sisu in order from highest + ranked to lowest ranked. + Keep mod_pref_assignments + active in list_of_sisu from beginning and delete others. +*/ + for (sisu = list_of_sisu; sisu != nullptr; sisu = sisu->si_next) { +
Re: [devel] Review Request for amf: Update PR/README for SC absence feature [#2179]
Hi Minh, Ack. I guess this is the same thing that was discussed during #1725 but in the context of headless. For normal cluster, issues related to missing IMM updates of run-time attributes and objects have been observed and reported but not the one in which assignment messages got missed. But whenever implemented, this needs to be handled in both in SC Absence and Normal cluster in same manner. One solution could be Active AMFD will run timer equal to the highest value of callbacktimeout in that assignment sent to AMFND. When active AMFD sends this assignment, it checkpoints assignment state. While decoding this state, standby AMFD can also run same timer. Now either AMFND will respond after collecting responses from components or it will send recovery request to AMFD if some assignment fails. For comp restart AMF will get presence state as update from that AMFND and it can restart the timer. If nothing comes then AMF can take action after timer expiry. If switchover/failover happens then standby is also running the timer. Thanks, Praveen On 16-Mar-17 11:38 AM, minh chau wrote: > Hi Praveen, > > Thanks for review, I have commented inline. > > * Escalation and Recovery during SC absence period: > -Restarts will work as normal, but failover or switchover will result in > Node > -Failfast. The repair action will be initiated when a SC returns if > -saAmfSGAutoRepair is enabled. > +Component and su restarts will work as normal. Any fail-over or > switch-over at > +component, su, and node level will only cleanup faulty components. > Recovery will > +be delayed until a SC returns: the fail-over or switch-over of SI > assignments > +will be initiated if saAmfSGAutoRepair is enabled, the node will be > reboot if > +saAmfNodeAutoRepair, aAmfNodeFailfastOnTerminationFailure, or > +saAmfNodeFailfastOnInstantiationFailure is enabled. > [Praveen] I think there is no dependecy of failover and switchover of > assignents on saAmfSgAutoRepair. > Should the sentence be like this? > " Recovery (failover or switchvoer of assignments) will be delayed > until a SC returns. > When first SC comes up after SC absebce state AMF will perform pending > repairs: > > [Minh]: This part is about escalation and recovery which is initiated by > su_oper message, it does depend on saAmfSgAutoRepair which is checked in > su_try_repair(), so I am not going to change the text > > +* Possible loss of RTA updates and SI assignment messages > +If both SCs go down abruptly (SCs are immediately powered-off for > instance), > +AMFD could fail to update RTA to IMM, the SI assignment messages sent from > +AMFND could not reach to AMFD, recovery could be impossible. > + > [Praveen] Should be mention the case of loss of assignment reseponse > from AMFND to AMFD? > Also I think we should mention impact of this loss, something like: > "In case of loss of RTA and SI assignments, AMF will not be able to > fully recover assignments. Thus application > may go in inconsistent state." > > [Minh]: I rewrites the text as: "If both SCs go down abruptly (SCs are > immediately powered-off for instance), AMFD could fail to update RTA to > IMM, the SI assignment request message sent from > AMFD could not reach to AMFND, or the SI assignment response message > sent from AMFND also could not reach to AMFD. In such cases, recovery > could be impossible, application may have inappropriate assignment states" > > One query: It's known in ticket #2210 that loss of mbcsv checkpoint in > sc failover in normal cluster can also happen as similar as loss of RTA > when both SCs go headless. For the loss of SI assignment messages, > although AMFD is using MDS in redundant view but the SI assignment is > not synchronization, I wonder if someone abruptly power off active > controller when active amfd is about receiving the assignment message, > or when amfnd just sends out the assignment response message but does > not reach to amfds? > > > > On 15/03/17 16:26, praveen malviya wrote: >> +saAmfNodeFailfastOnInstantiationFailure is enabled. >> [Praveen] I think there is no dependecy of failover and switchover of >> assignents on saAmfSgAutoRepair. >> Should the sentence be like this? >> " Recovery (failover or switchvoer of assignments) will be delayed >> until a SC returns. >> When first SC comes up after SC absebce state AMF will perform pending >> repairs: > -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] Review Request for amf: Update PR/README for SC absence feature [#2179]
Hi Minh, Ack with two comments in attached read me. Thanks, Praveen On 10-Mar-17 11:14 AM, minh chau wrote: Hi all, Please help to review documentation changes. Files are attached, they also can be found at below links: https://sourceforge.net/p/opensaf/tickets/_discuss/thread/342e9c61/8b9a/attachment/OpenSAF_AMF_PR_2179.odt https://sourceforge.net/p/opensaf/tickets/_discuss/thread/342e9c61/a94c/attachment/2179_README.diff Thanks, Minh diff --git a/src/amf/README_SC_ABSENCE b/src/amf/README_SC_ABSENCE --- a/src/amf/README_SC_ABSENCE +++ b/src/amf/README_SC_ABSENCE @@ -44,9 +44,12 @@ amfnd will not reboot the node and enter scAbsenceAllowed is configured) * Escalation and Recovery during SC absence period: -Restarts will work as normal, but failover or switchover will result in Node -Failfast. The repair action will be initiated when a SC returns if -saAmfSGAutoRepair is enabled. +Component and su restarts will work as normal. Any fail-over or switch-over at +component, su, and node level will only cleanup faulty components. Recovery will +be delayed until a SC returns: the fail-over or switch-over of SI assignments +will be initiated if saAmfSGAutoRepair is enabled, the node will be reboot if +saAmfNodeAutoRepair, aAmfNodeFailfastOnTerminationFailure, or +saAmfNodeFailfastOnInstantiationFailure is enabled. [Praveen] I think there is no dependecy of failover and switchover of assignents on saAmfSgAutoRepair. Should the sentence be like this? " Recovery (failover or switchvoer of assignments) will be delayed until a SC returns. When first SC comes up after SC absebce state AMF will perform pending repairs: -for sufailover recovery if saAmfSGAutoRepair is enabled. -for node-switchvoer and node failover recoveries if saAmfNodeAutoRepair is enabled. -for INST_FAILED and TERM_FAILED state if saAmfSGAutoRepair and saAmfNodeAutoRepair are enabled along with respective node level attributes saAmfNodeFailfastOnInstantiationFailure or saAmfNodeFailfastOnTerminationFailure. " . -for comp-failover recovery, amfnd will re-instantiate comp after assignments are switchovered. " * Amfnd detects return of SCs: NCSMDS_UP is the event that amfnd uses to detect the presence of an active amfd. @@ -76,16 +79,19 @@ absence recovery. The new attributes are Only 2N SG is currently supported for admin operation continuation. +* Node reboot during SC absence period: +The event of node reboot initiated by user during SC absence period +may lead to a loss of SI assignments. When a SC returns, AMF Director +will detect improper SI assignments and recover HA states of assignments. + LIMITATIONS --- -* While both SCs are absent, any failover or switchover escalation will result -in node failfast. The events of node reboot, node power off, and node failfast -will lead to a loss of SI assignments, which are not restored during the SC -absence period. The SI assignments may remain in improper states until a SC -comes back. Recovery of any lost SI assignments during SC absence period is -currently not supported. - +* Possible loss of RTA updates and SI assignment messages +If both SCs go down abruptly (SCs are immediately powered-off for instance), +AMFD could fail to update RTA to IMM, the SI assignment messages sent from +AMFND could not reach to AMFD, recovery could be impossible. + [Praveen] Should be mention the case of loss of assignment reseponse from AMFND to AMFD? Also I think we should mention impact of this loss, something like: "In case of loss of RTA and SI assignments, AMF will not be able to fully recover assignments. Thus application may go in inconsistent state." * SI dependency tolerance timer After a SC comes back, if an unassigned sponsor SI is detected, all its dependent SI(s) assignments are removed regardless of tolerance duration. The -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1 of 1] fm: changing the log level from ER to WA [#2363]
Ack, code review only. Thanks, Praveen On 14-Mar-17 11:54 AM, ramesh.bet...@oracle.com wrote: > src/fm/fmd/fm_main.c | 2 +- > 1 files changed, 1 insertions(+), 1 deletions(-) > > > diff --git a/src/fm/fmd/fm_main.c b/src/fm/fmd/fm_main.c > --- a/src/fm/fmd/fm_main.c > +++ b/src/fm/fmd/fm_main.c > @@ -608,7 +608,7 @@ static void fm_mbx_msg_handler(FM_CB *fm >* (old-Active) is still in the progress of shutdown (i.e., > amfd/immd is still alive). >*/ > if ((fm_cb->role == PCS_RDA_ACTIVE) && (fm_cb->csi_assigned == > false)) { > - LOG_ER("Two active controllers observed in a cluster, > newActive: %x and old-Active: %x", fm_cb->node_id, fm_cb->peer_node_id); > + LOG_WA("Two active controllers observed in a cluster, > newActive: %x and old-Active: %x", fm_cb->node_id, fm_cb->peer_node_id); > opensaf_reboot(0, NULL, > "Received svc up from peer node (old-active is not > fully DOWN), hence rebooting the new Active"); > } > -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1 of 1] amf: print list of CLC CLI command env variables correctly [#2368]
Ack, code review only. Thanks, Praveen On 13-Mar-17 10:24 AM, Nguyen TK Luu wrote: > src/amf/amfnd/clc.cc | 2 +- > 1 files changed, 1 insertions(+), 1 deletions(-) > > > Due to a coding error, only the first env variable in the list > gets printed repeatedly for the length of the list. This ticket > fixes the bug. > > diff --git a/src/amf/amfnd/clc.cc b/src/amf/amfnd/clc.cc > --- a/src/amf/amfnd/clc.cc > +++ b/src/amf/amfnd/clc.cc > @@ -3104,7 +3104,7 @@ uint32_t avnd_comp_clc_cmd_execute(AVND_ > > for(count=0;countnum_args;count++) > TRACE_1("CLC CLI command env variable name = '%s': value ='%s'", > - > cmd_info.i_set_env_args->env_arg->name,cmd_info.i_set_env_args->env_arg->value); > + > cmd_info.i_set_env_args->env_arg[count].name,cmd_info.i_set_env_args->env_arg[count].value); > > /* finally execute the command */ > rc = ncs_os_process_execute_timed(&cmd_info); > -- Announcing the Oxford Dictionaries API! The API offers world-renowned dictionary content that is easy and intuitive to access. Sign up for an account today to start using our lexical data to power your apps and projects. Get started today and enter our developer competition. http://sdm.link/oxford ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
[devel] [PATCH 1 of 1] amfd: honor PrefAssignedSU in N-Way and N-Way Active model during assignments [#2269]
src/amf/amfd/sg.cc | 46 +- src/amf/amfd/sg.h | 1 + src/amf/amfd/sg_nway_fsm.cc| 38 +++--- src/amf/amfd/sg_nwayact_fsm.cc | 26 ++- 4 files changed, 83 insertions(+), 28 deletions(-) SG attribute saAmfSGNumPrefAssignedSUs is applicable to N-Way and N-Way Active model. AMF is assigning more than saAmfSGNumPrefAssignedSUs in both N-Way and N-Way Active model. Patch fixes this problem. diff --git a/src/amf/amfd/sg.cc b/src/amf/amfd/sg.cc --- a/src/amf/amfd/sg.cc +++ b/src/amf/amfd/sg.cc @@ -105,7 +105,7 @@ AVD_SG::AVD_SG(): saAmfSGAutoAdjust(SA_FALSE), saAmfSGNumPrefActiveSUs(0), saAmfSGNumPrefStandbySUs(0), - saAmfSGNumPrefInserviceSUs(~0), + saAmfSGNumPrefInserviceSUs(0), saAmfSGNumPrefAssignedSUs(0), saAmfSGMaxActiveSIsperSU(0), saAmfSGMaxStandbySIsperSU(0), @@ -941,16 +941,16 @@ static void ccb_apply_modify_hdlr(CcbUti TRACE("Modified saAmfSGNumPrefStandbySUs is '%u'", sg->saAmfSGNumPrefStandbySUs); } else if (!strcmp(attribute->attrName, "saAmfSGNumPrefInserviceSUs")) { if (value_is_deleted) - sg->saAmfSGNumPrefInserviceSUs = ~0; + sg->saAmfSGNumPrefInserviceSUs = 0; //default value for internal use. else sg->saAmfSGNumPrefInserviceSUs = *((SaUint32T *)value); - TRACE("Modified saAmfSGNumPrefInserviceSUs is '%u'", sg->saAmfSGNumPrefInserviceSUs); + TRACE("Modified saAmfSGNumPrefInserviceSUs is '%u'", sg->pref_inservice_sus()); } else if (!strcmp(attribute->attrName, "saAmfSGNumPrefAssignedSUs")) { if (value_is_deleted) - sg->saAmfSGNumPrefAssignedSUs = sg->saAmfSGNumPrefInserviceSUs; + sg->saAmfSGNumPrefAssignedSUs = 0; //default value for internal use. else sg->saAmfSGNumPrefAssignedSUs = *((SaUint32T *)value); - TRACE("Modified saAmfSGNumPrefAssignedSUs is '%u'", sg->saAmfSGNumPrefAssignedSUs); + TRACE("Modified saAmfSGNumPrefAssignedSUs is '%u'", sg->pref_assigned_sus()); } else if (!strcmp(attribute->attrName, "saAmfSGMaxActiveSIsperSU")) { if (value_is_deleted) sg->saAmfSGMaxActiveSIsperSU = -1; @@ -1043,10 +1043,10 @@ static void ccb_apply_modify_hdlr(CcbUti if (!strcmp(attribute->attrName, "saAmfSGNumPrefInserviceSUs")) { if (value_is_deleted) - sg->saAmfSGNumPrefInserviceSUs = ~0; + sg->saAmfSGNumPrefInserviceSUs = 0; else sg->saAmfSGNumPrefInserviceSUs = *((SaUint32T *)value); - TRACE("Modified saAmfSGNumPrefInserviceSUs is '%u'", sg->saAmfSGNumPrefInserviceSUs); + TRACE("Modified saAmfSGNumPrefInserviceSUs is '%u'", sg->pref_inservice_sus()); if (avd_cb->avail_state_avd == SA_AMF_HA_ACTIVE) { if (avd_sg_app_su_inst_func(avd_cb, sg) != NCSCC_RC_SUCCESS) { @@ -1209,7 +1209,7 @@ static void sg_app_sg_admin_unlock_inst( if (su->saAmfSUPreInstantiable == true) { if (su->su_on_node->node_state == AVD_AVND_STATE_PRESENT) { - if (su->sg_of_su->saAmfSGNumPrefInserviceSUs > su_try_inst) { + if (su->sg_of_su->pref_inservice_sus() > su_try_inst) { if (avd_snd_presence_msg(cb, su, false) != NCSCC_RC_SUCCESS) { LOG_NO("%s: Failed to send Instantiation order of '%s' to %x", __FUNCTION__, su->name.c_str(), @@ -1866,17 +1866,6 @@ void avd_sg_adjust_config(AVD_SG *sg) } } } - - /* adjust saAmfSGNumPrefAssignedSUs if not configured, only applicable for -* the N-way and N-way active redundancy models -*/ - if ((sg->saAmfSGNumPrefAssignedSUs == 0) && - ((sg->sg_type->saAmfSgtRedundancyModel == SA_AMF_N_WAY_REDUNDANCY_MODEL) || - (s
[devel] [PATCH 0 of 1] Review Request for amfd: honor PrefAssignedSU in N-Way and N-Way Active model during assignments [#2269].
Summary: amfd: honor PrefAssignedSU in N-Way and N-Way Active model during assignments [#2269]. Review request for Trac Ticket(s): #2269 Peer Reviewer(s): AMF devs Pull request to: <> Affected branch(es): ALL Development branch: <> Impacted area Impact y/n Docsn Build systemn RPM/packaging n Configuration files n Startup scripts n SAF servicesy OpenSAF servicesn Core libraries n Samples n Tests n Other n Comments (indicate scope for each "y" above): - changeset 62b35316b2e40dff6098f4385e2073f2f1e5a11b Author: Praveen Malviya Date: Fri, 10 Mar 2017 16:09:20 +0530 amfd: honor PrefAssignedSU in N-Way and N-Way Active model during assignments [#2269]. SG attribute saAmfSGNumPrefAssignedSUs is applicable to N-Way and N-Way Active model. AMF is assigning more than saAmfSGNumPrefAssignedSUs in both N-Way and N-Way Active model. Patch fixes this problem. Complete diffstat: -- src/amf/amfd/sg.cc | 46 +++--- src/amf/amfd/sg.h | 1 + src/amf/amfd/sg_nway_fsm.cc| 38 ++ src/amf/amfd/sg_nwayact_fsm.cc | 26 +- 4 files changed, 83 insertions(+), 28 deletions(-) Testing Commands: - Brought up N-Way and N-Way active models: 1)with siranked su configured. 2)with equal distribution enabled. Testing, Expected Results: -- PASS. AMF assigns only PrefAssignedSus. Conditions of Submission: - Ack from reviewers. Arch Built StartedLinux distro --- mipsn n mips64 n n x86 n n x86_64 y y powerpc n n powerpc64 n n Reviewer Checklist: --- [Submitters: make sure that your review doesn't trigger any checkmarks!] Your checkin has not passed review because (see checked entries): ___ Your RR template is generally incomplete; it has too many blank entries that need proper data filled in. ___ You have failed to nominate the proper persons for review and push. ___ Your patches do not have proper short+long header ___ You have grammar/spelling in your header that is unacceptable. ___ You have exceeded a sensible line length in your headers/comments/text. ___ You have failed to put in a proper Trac Ticket # into your commits. ___ You have incorrectly put/left internal data in your comments/files (i.e. internal bug tracking tool IDs, product names etc) ___ You have not given any evidence of testing beyond basic build tests. Demonstrate some level of runtime or other sanity testing. ___ You have ^M present in some of your files. These have to be removed. ___ You have needlessly changed whitespace or added whitespace crimes like trailing spaces, or spaces before tabs. ___ You have mixed real technical changes with whitespace and other cosmetic code cleanup changes. These have to be separate commits. ___ You need to refactor your submission into logical chunks; there is too much content into a single commit. ___ You have extraneous garbage in your review (merge commits etc) ___ You have giant attachments which should never have been sent; Instead you should place your content in a public tree to be pulled. ___ You have too many commits attached to an e-mail; resend as threaded commits, or place in a public tree for a pull. ___ You have resent this content multiple times without a clear indication of what has changed between each re-send. ___ You have failed to adequately and individually address all of the comments and change requests that were proposed in the initial review. ___ You have a misconfigured ~/.hgrc file (i.e. username, email etc) ___ Your computer have a badly configured date and time; confusing the the threaded patch review. ___ Your changes affect IPC mechanism, and you don't present any results for in-service upgradability test. ___ Your changes affect user manual and documentation, your patch series do not contain the patch that updates the Doxygen manual. -- Announcing the Oxford Dictionaries API! The API offers world-renowned dictionary content that is easy and intuitive to access. Sign up for an account today to start using our lexical data to power your apps and projects. Get started today and enter our developer competition. http://sdm.link/oxford ___ Opensaf-devel mailing list Opensaf-d
Re: [devel] [PATCH 1 of 1] AMFND: Ensure su operational message synchronizes with component failover sequence [#2233]
On 08-Mar-17 9:11 AM, minh chau wrote: > Hi Praveen, > > I don't think we need both patches, one of those is enough to fix the > problem of comp f/o in case unassigned component. When we have both > patches, V2 patch will make reinstantiation of failed-unassigned comp > after assignment's removal, so V1 is not needed anymore because su > operational message (enabled) will always be sent after switchover. > I am not 100% sure how is the impact of moving reinstantiation of > component after SI assignment's removal, but basically this change of > behavior is exposed to applications [Praveen] I have checked the comment in the ticket #2233 now which contains the problem description in SC absence case. I think V2 patch will not allow two su_oper message as recovery can be done only after first controller comes up. So I prefer V2 as a solution. With v2 comp instantiation is being done after completion of recovery for both assigned and unassigned components. However, when comp-failover recovery is implemented in spec compliant way for N-Way and N-Way active model, then surely we need to instantiate component as early as possible. > One potential impact I can think of, in either headless or normal > cluster, is that failed component will have less time for its > instantiation before receiving csi assignment (since reinstantiation of > failed component has been started regardless SI switchover), so it could > be a timing issue for application due to application's specific > dependencies in instantiation phase. [Praveen] This I did not get fully. But if instantiationlevel is configured for components in su, then instantiation of failed component of any level will not lead to instantiation of components of other levels: from spec :"The instantiation level is, above all, a means to limit the load on the system during the instantiation process." > > Thanks, > Minh > > On 07/03/17 16:34, praveen malviya wrote: >> Hi Minh, >> >> Is there any harm if both the patches are merged? One patch adds >> strict checks for message ordering in case of comp-failover recovery >> of assigned or non-assigned component. Another patch ensures that if >> an assigned or non-assigned comp faults with comp-faiover recovery >> then first AMF will switchover whole SU (current implementation >> irrespective of red models) and after completion of switchover >> re-instantiation of failed comp will be attempted. >> Also, I think, from headless perspective, the strict check of patch V1 >> is important when comp-failover occurs in the absence of SCs. >> So I have a minor query here: Is there any impact of late >> instantiation of comp when comp-failover occurs in SCs Absence? >> >> >> Also I think now an enhancement ticket should be raised for >> implementation of comp-failover recovery as per spec for N-Way and >> N-Way active model. >> >> >> Thanks, >> Praveen >> >> >> >> On 07-Mar-17 4:10 AM, minh chau wrote: >>> Hi Praveen, >>> >>> Please see comments with [Minh5] >>> >>> Thanks, >>> Minh >>> >>> On 06/03/17 17:52, praveen malviya wrote: >>>> Hi Minh, >>>> >>>> Please see inline with [Praveen]. >>>> >>>> Thanks, >>>> Praveen >>>> >>>> On 03-Mar-17 5:39 PM, minh chau wrote: >>>>> Hi Praveen, >>>>> >>>>> I have two comments with [Minh4]. >>>>> >>>>> Thanks >>>>> Minh >>>>> >>>>> On 02/03/17 20:49, praveen malviya wrote: >>>>>> Hi Minh, >>>>>> Please see response with [Praveen]. >>>>>> >>>>>> Thanks, >>>>>> Praveen >>>>>> >>>>>> >>>>>> >>>>>> On 02-Mar-17 1:43 PM, minh chau wrote: >>>>>>> Hi, >>>>>>> >>>>>>> Thanks Gary. >>>>>>> @Nagu, Praveen: Have you had time to check the example in my >>>>>>> previous >>>>>>> email? >>>>>>> The ticket #2179 is about to document that full escalation is >>>>>>> supported >>>>>>> for SC absence feature, it is waiting for fix of #2233. >>>>>>> I think there's not big change in code for #2233, it's a matter of >>>>>>> decision to make for re-instantiation of failed component. >>>>>>> >>>>>>> Thanks, >>>>>>> Minh >
Re: [devel] [PATCH 1 of 1] AMFND: Ensure su operational message synchronizes with component failover sequence [#2233]
Hi Minh, Is there any harm if both the patches are merged? One patch adds strict checks for message ordering in case of comp-failover recovery of assigned or non-assigned component. Another patch ensures that if an assigned or non-assigned comp faults with comp-faiover recovery then first AMF will switchover whole SU (current implementation irrespective of red models) and after completion of switchover re-instantiation of failed comp will be attempted. Also, I think, from headless perspective, the strict check of patch V1 is important when comp-failover occurs in the absence of SCs. So I have a minor query here: Is there any impact of late instantiation of comp when comp-failover occurs in SCs Absence? Also I think now an enhancement ticket should be raised for implementation of comp-failover recovery as per spec for N-Way and N-Way active model. Thanks, Praveen On 07-Mar-17 4:10 AM, minh chau wrote: > Hi Praveen, > > Please see comments with [Minh5] > > Thanks, > Minh > > On 06/03/17 17:52, praveen malviya wrote: >> Hi Minh, >> >> Please see inline with [Praveen]. >> >> Thanks, >> Praveen >> >> On 03-Mar-17 5:39 PM, minh chau wrote: >>> Hi Praveen, >>> >>> I have two comments with [Minh4]. >>> >>> Thanks >>> Minh >>> >>> On 02/03/17 20:49, praveen malviya wrote: >>>> Hi Minh, >>>> Please see response with [Praveen]. >>>> >>>> Thanks, >>>> Praveen >>>> >>>> >>>> >>>> On 02-Mar-17 1:43 PM, minh chau wrote: >>>>> Hi, >>>>> >>>>> Thanks Gary. >>>>> @Nagu, Praveen: Have you had time to check the example in my previous >>>>> email? >>>>> The ticket #2179 is about to document that full escalation is >>>>> supported >>>>> for SC absence feature, it is waiting for fix of #2233. >>>>> I think there's not big change in code for #2233, it's a matter of >>>>> decision to make for re-instantiation of failed component. >>>>> >>>>> Thanks, >>>>> Minh >>>>> >>>>> On 01/03/17 15:42, Gary Lee wrote: >>>>>> Hi >>>>>> >>>>>> It seems the component should be re-instantiated if it has no CSI. >>>>>> Whether or not there is an SI assigned should be irrelevant? >>>>>> >>>>>> Thanks >>>>>> Gary >>>>>> >>>>>> -Original Message- >>>>>> From: minh chau >>>>>> Date: Thursday, 23 February 2017 at 3:16 pm >>>>>> To: Nagendra Kumar , Praveen Malviya >>>>>> >>>>>> Cc: , gary , >>>>>> , >>>>>> >>>>>> Subject: Re: [devel] [PATCH 1 of 1] AMFND: Ensure su operational >>>>>> message synchronizes with component failover sequence [#2233] >>>>>> >>>>>> Hi Nagu, Praveen, >>>>>> Please find my comment in [Minh3] >>>>>> Thanks, >>>>>> Minh >>>>>> On 22/02/17 19:34, Nagendra Kumar wrote: >>>>>> >>> Since in spec there is no specific discussion for >>>>>> comp-failover recovery for an unassigned comp, I will encourage other >>>>>> maintainers also to provide inputs. >>>>>> > I do agree for not instantiating failed component before >>>>>> recovery, this keeps the approach similar to SU failover also. >>>>>> [Minh3]: There's one example of component failover that I would >>>>>> like us >>>>>> to have a look >>>>>> - 2N application, SU4/SU5 has active/standby assignment >>>>>> respectively, >>>>>> each SU has 3 components >>>>>> - Add a sleep of 10 seconds in clc script start command of first >>>>>> component C41 of SU4 >>>>>> Steps: >>>>>> 1- Kill C41 to trigger component failover >>>>>> 2- SU4 goes for quiesced assignment >>>>>> 3- SU5 goes for active assignment >>>>>> 4- SU4 is removed its assignment >>>>>> 5- Now there's a pause of 10 seconds due to clc script start, to >>>>>> ensure >>>>>> that C41 is healthy
Re: [devel] [PATCH 1 of 1] AMFND: Ensure su operational message synchronizes with component failover sequence [#2233]
Hi Minh, Please see inline with [Praveen]. Thanks, Praveen On 03-Mar-17 5:39 PM, minh chau wrote: > Hi Praveen, > > I have two comments with [Minh4]. > > Thanks > Minh > > On 02/03/17 20:49, praveen malviya wrote: >> Hi Minh, >> Please see response with [Praveen]. >> >> Thanks, >> Praveen >> >> >> >> On 02-Mar-17 1:43 PM, minh chau wrote: >>> Hi, >>> >>> Thanks Gary. >>> @Nagu, Praveen: Have you had time to check the example in my previous >>> email? >>> The ticket #2179 is about to document that full escalation is supported >>> for SC absence feature, it is waiting for fix of #2233. >>> I think there's not big change in code for #2233, it's a matter of >>> decision to make for re-instantiation of failed component. >>> >>> Thanks, >>> Minh >>> >>> On 01/03/17 15:42, Gary Lee wrote: >>>> Hi >>>> >>>> It seems the component should be re-instantiated if it has no CSI. >>>> Whether or not there is an SI assigned should be irrelevant? >>>> >>>> Thanks >>>> Gary >>>> >>>> -Original Message- >>>> From: minh chau >>>> Date: Thursday, 23 February 2017 at 3:16 pm >>>> To: Nagendra Kumar , Praveen Malviya >>>> >>>> Cc: , gary , >>>> , >>>> Subject: Re: [devel] [PATCH 1 of 1] AMFND: Ensure su operational >>>> message synchronizes with component failover sequence [#2233] >>>> >>>> Hi Nagu, Praveen, >>>> Please find my comment in [Minh3] >>>> Thanks, >>>> Minh >>>> On 22/02/17 19:34, Nagendra Kumar wrote: >>>> >>> Since in spec there is no specific discussion for >>>> comp-failover recovery for an unassigned comp, I will encourage other >>>> maintainers also to provide inputs. >>>> > I do agree for not instantiating failed component before >>>> recovery, this keeps the approach similar to SU failover also. >>>> [Minh3]: There's one example of component failover that I would >>>> like us >>>> to have a look >>>> - 2N application, SU4/SU5 has active/standby assignment >>>> respectively, >>>> each SU has 3 components >>>> - Add a sleep of 10 seconds in clc script start command of first >>>> component C41 of SU4 >>>> Steps: >>>> 1- Kill C41 to trigger component failover >>>> 2- SU4 goes for quiesced assignment >>>> 3- SU5 goes for active assignment >>>> 4- SU4 is removed its assignment >>>> 5- Now there's a pause of 10 seconds due to clc script start, to >>>> ensure >>>> that C41 is healthy >>>> 6- Next SU4 has standby assignment. >>>>From the above example, I think we can see some problems if >>>> the >>>> re-instantiation of C41 is delayed: >>>> - Because C41 is faulty, it needs to be restarted ok because its >>>> SU has >>>> assignment >>>> - Moving re-instantiation of C41 is further down that means the >>>> recovery >>>> will take longer >>>> - What if re-instantiation of C41 leads to instantation-failed >> [Praveen] If AMFND re-instantiate C41 after removal of assignment and >> it moves to instantiation-failed then: >> -Node will be rebooted if nodefailfastonterminationfaioure=true. >> -ifnodefailfastonterminationfaioure=false then as per section 4.6 page >> 212, SU will be marked INST_FAILED and AMF will have to terminate all >> the components. Termination of other components will be easier if they >> do not have assignments or pending assignments. >> >> If C41 is instantiated before removal of assignments and it moves to >> INST_FAILED state, then AMFND will be terminating other comps of SU >> when they are in the middle of quiesced or removal of assignment. So a >> component will having different orders of quiesced/removal/terminate >> callbacks in its mailbox. This will make thing complex. > [Minh4]: I am not sure if I understand the complex thing you mentioned > as it has been working like this for long time. If we are going to > change the current behavior to the way that amfnd will instantiate > failed component after removal assignment, then I think it should be
Re: [devel] [PATCH 1 of 1] amf: support restrictions to auto-repair [#2144]
Hi, It is my mistake. I missed following two changes while combining the patches: a) 546c544 < + if (!comp->su->suMaintenanceCampaign.empty()) { --- > + if (!comp->su->suMaintenanceCampaign.empty() && !comp->admin_oper) { 2) 275,276d274 < + continue; < + } I think test is failing because of change a). I think case b) may not hit in any campaign. Thanks, Praveen On 03-Mar-17 7:52 PM, Alex Jones wrote: > Hi Neel, > > You are missing two patches. I've attached the final AMF patch, which > incorporates my original and Praveen's improvements. This is what I will > push if you are OK with it. > > Alex > > On 03/03/2017 05:19 AM, Neelakanta Reddy wrote: >> >> NOTICE: This email was received from an EXTERNAL sender >> >> >> Attaching the patches used. >> >> Thanks, >> Neel. >> On 2017/03/03 03:15 PM, Neelakanta Reddy wrote: >>> Hi Alex, >>> >>> The included patches are: latest #2144 patch provided by praveen with >>> latest #2145 patch. >>> >>> The Rolling upgrade campaign to change the application version is failing. >>> This is basic application upgrade test. >>> >>> # smf-state camp >>> safSmfCampaign=Campaign1,safApp=safSmfService >>> state=ERROR_DETECTED(7) >>> error='safSu=dummy_2n_1,safSg=SG_dummy_2n,safApp=2nApp failed >>> after upgrade' >>> >>> syslog: >>> Mar 27 08:10:57 SLES1 osafsmfd[29847]: NO PROC: Procedure init actions >>> completed >>> Mar 27 08:10:57 SLES1 osafsmfd[29847]: NO PROC: Start executing the steps >>> Mar 27 08:10:57 SLES1 osafsmfd[29847]: NO STEP: Executing AU restart >>> step >>> >> safSmfStep=0001,safSmfProc=amfClusterProc-1,safSmfCampaign=Campaign1,safApp=safSmfService >>> Mar 27 08:10:57 SLES1 osafsmfd[29847]: NO STEP: Online installation of >>> new software >>> Mar 27 08:10:57 SLES1 osafsmfnd[29845]: NO Successful start of command >>> execution: /hostfs/online_install.sh bundle-new, timeout 8 >>> Mar 27 08:10:57 SLES1 osafsmfnd[29845]: NO Command execution OK >>> Mar 27 08:10:57 SLES1 osafsmfd[29847]: NO STEP: Create new >>> SaAmfNodeSwBundle objects >>> Mar 27 08:10:57 SLES1 osafimmnd[29768]: NO Ccb 54 COMMITTED (SMFSERVICE) >>> Mar 27 08:10:57 SLES1 osafsmfd[29847]: NO STEP: Modify information model >>> and set maintenance status >>> Mar 27 08:10:57 SLES1 osafimmnd[29768]: NO Ccb 55 COMMITTED (SMFSERVICE) >>> Mar 27 08:10:57 SLES1 osafamfnd[29829]: NO saAmfCompType changed to >>> 'safVersion=6.0.0,safCompType=Comp_2nApp_2n_1_1' for >>> 'safComp=Norm1,safSu=dummy_2n_1,safSg=SG_dummy_2n,safApp=2nApp' >>> Mar 27 08:10:57 SLES1 osafimmnd[29768]: NO Ccb 56 COMMITTED (SMFSERVICE) >>> Mar 27 08:10:57 SLES1 osafsmfd[29847]: NO STEP: Restart activation units >>> Mar 27 08:10:57 SLES1 osafamfnd[29829]: NO Admin restart requested for >>> 'safComp=Norm1,safSu=dummy_2n_1,safSg=SG_dummy_2n,safApp=2nApp' >>> Mar 27 08:10:57 SLES1 osafamfnd[29829]: NO not restarting comp because >>> maintenance campaign is set: safSmfCampaign=Campaign1,safApp=safSmfService >>> Mar 27 08:10:57 SLES1 osafsmfd[29847]: ER SU: >>> safSu=dummy_2n_1,safSg=SG_dummy_2n,safApp=2nApp failed after upgrade in >>> campaign >>> >>> Thanks, >>> Neel. >>> >>> >>> >>> On 2017/03/03 02:55 AM, Alex Jones wrote: >>>> Hi Praveen, >>>> >>>> Both patches look fine except for one issue in the first patch >>>> (02_2144.patch). See the comment below. >>>> >>>> Neel, do you have any comments for the SMF patch? >>>> >>>> If both of you guys are OK, then I will push the AMF (my original and >>>> Praveen's 2 later ones) and SMF patches tomorrow. >>>> >>>> Alex >>>> >>>> diff --git a/src/amf/amfd/sgproc.cc b/src/amf/amfd/sgproc.cc >>>> --- a/src/amf/amfd/sgproc.cc >>>> +++ b/src/amf/amfd/sgproc.cc >>>> @@ -2092,13 +2092,17 @@ void avd_node_down_mw_susi_failover(AVD_ >>>> * one loop as more than one MW SU per SG in one node is not supported. >>>> */ >>>> osafassert(avnd->list_of_ncs_su.empty() != true); >>>> - >>>> + bool campaign_set = avnd->is_campaign_set_for_all_sus();
[devel] [PATCH 0 of 1] Review Request for clmd: try to re-read node config from IMM if BAD_HANDLE is returned [#2325].
Summary: clmd: try to re-read node config from IMM if BAD_HANDLE is returned [#2325]. Review request for Trac Ticket(s): #2325 Peer Reviewer(s): Anders Pull request to: <> Affected branch(es): ALL Development branch: <> Impacted area Impact y/n Docsn Build systemn RPM/packaging n Configuration files n Startup scripts n SAF servicesy OpenSAF servicesn Core libraries n Samples n Tests n Other n Comments (indicate scope for each "y" above): - changeset e3d19c3a42f74d766c8812c4e3713be38c32cee6 Author: Praveen Malviya Date: Fri, 03 Mar 2017 15:35:11 +0530 clmd: try to re-read node config from IMM if BAD_HANDLE is returned [#2325]. While coming up as standby, CLMD successfully initializes with IMM. It successfuly reads cluster related configuration. While reading node related configuration from IMM, CLMD make a calls to saImmOmSearchNext_2(). This API could not send any message to IMMND and failed with BAD_HANDLE. Patch tries reinitialize with IMM for finite attempts in BAD_HANDLE and TIMEOUT cases and re-reads node config. Complete diffstat: -- src/clm/clmd/clms_imm.c | 97 +- 1 files changed, 73 insertions(+), 24 deletions(-) Testing Commands: - Restarted standby controller 20 times. Testing, Expected Results: -- Issue not seen. Conditions of Submission: - Ack from reviewer. Arch Built StartedLinux distro --- mipsn n mips64 n n x86 n n x86_64 y y powerpc n n powerpc64 n n Reviewer Checklist: --- [Submitters: make sure that your review doesn't trigger any checkmarks!] Your checkin has not passed review because (see checked entries): ___ Your RR template is generally incomplete; it has too many blank entries that need proper data filled in. ___ You have failed to nominate the proper persons for review and push. ___ Your patches do not have proper short+long header ___ You have grammar/spelling in your header that is unacceptable. ___ You have exceeded a sensible line length in your headers/comments/text. ___ You have failed to put in a proper Trac Ticket # into your commits. ___ You have incorrectly put/left internal data in your comments/files (i.e. internal bug tracking tool IDs, product names etc) ___ You have not given any evidence of testing beyond basic build tests. Demonstrate some level of runtime or other sanity testing. ___ You have ^M present in some of your files. These have to be removed. ___ You have needlessly changed whitespace or added whitespace crimes like trailing spaces, or spaces before tabs. ___ You have mixed real technical changes with whitespace and other cosmetic code cleanup changes. These have to be separate commits. ___ You need to refactor your submission into logical chunks; there is too much content into a single commit. ___ You have extraneous garbage in your review (merge commits etc) ___ You have giant attachments which should never have been sent; Instead you should place your content in a public tree to be pulled. ___ You have too many commits attached to an e-mail; resend as threaded commits, or place in a public tree for a pull. ___ You have resent this content multiple times without a clear indication of what has changed between each re-send. ___ You have failed to adequately and individually address all of the comments and change requests that were proposed in the initial review. ___ You have a misconfigured ~/.hgrc file (i.e. username, email etc) ___ Your computer have a badly configured date and time; confusing the the threaded patch review. ___ Your changes affect IPC mechanism, and you don't present any results for in-service upgradability test. ___ Your changes affect user manual and documentation, your patch series do not contain the patch that updates the Doxygen manual. -- Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
[devel] [PATCH 1 of 1] clmd: try to re-read node config from IMM if BAD_HANDLE is returned [#2325]
src/clm/clmd/clms_imm.c | 97 1 files changed, 73 insertions(+), 24 deletions(-) While coming up as standby, CLMD successfully initializes with IMM. It successfuly reads cluster related configuration. While reading node related configuration from IMM, CLMD make a calls to saImmOmSearchNext_2(). This API could not send any message to IMMND and failed with BAD_HANDLE. Patch tries reinitialize with IMM for finite attempts in BAD_HANDLE and TIMEOUT cases and re-reads node config. diff --git a/src/clm/clmd/clms_imm.c b/src/clm/clmd/clms_imm.c --- a/src/clm/clmd/clms_imm.c +++ b/src/clm/clmd/clms_imm.c @@ -273,7 +273,7 @@ CLMS_CLUSTER_NODE *clms_node_new(SaNameT SaAisErrorT clms_node_create_config(void) { SaAisErrorT rc = SA_AIS_ERR_INVALID_PARAM; - uint32_t rt; + uint32_t rt, num_nodes = 0; SaImmHandleT imm_om_hdl; SaImmSearchHandleT search_hdl; SaImmSearchParametersT_2 searchParam; @@ -292,36 +292,85 @@ SaAisErrorT clms_node_create_config(void searchParam.searchOneAttr.attrValueType = SA_IMM_ATTR_SASTRINGT; searchParam.searchOneAttr.attrValue = &className; - rc = immutil_saImmOmSearchInitialize_2(imm_om_hdl, NULL, SA_IMM_SUBTREE, + (void) immutil_saImmOmSearchInitialize_2(imm_om_hdl, NULL, SA_IMM_SUBTREE, SA_IMM_SEARCH_ONE_ATTR | SA_IMM_SEARCH_GET_ALL_ATTR, &searchParam, NULL, &search_hdl); - - if (rc != SA_AIS_OK) { - LOG_ER("No Object of SaClmNode Class was found"); - goto done1; + //TODO: Read saClmClusterNumNodes attribute of SaClmCluster and verify here. + //Nodes read below >= saClmClusterNumNodes (dynamic addition may add more nodes). + for (;;) { + rc = immutil_saImmOmSearchNext_2(search_hdl, &dn, &attributes); + TRACE("saImmOmSearchNext_2() returns '%s'", saf_error(rc)); + if (rc == SA_AIS_ERR_NOT_EXIST) { + //No more nodes to read. + rc = SA_AIS_OK; + break; + } else if (rc == SA_AIS_OK) { + TRACE("dn:'%s'", dn.value); + if ((rt = clms_node_dn_chk(&dn)) != NCSCC_RC_SUCCESS) { + TRACE("Node DN name is incorrect"); + rc = SA_AIS_ERR_BAD_OPERATION; + goto done2; + } + if (clms_node_get_by_name(&dn) != NULL) { + TRACE("'%s' is already present in db.", dn.value); + continue; + } + if ((node = clms_node_new(&dn, (const SaImmAttrValuesT_2 **)attributes)) == NULL) + goto done2; + num_nodes++; + clms_node_add_to_model(node); + } else if ((rc == SA_AIS_ERR_BAD_HANDLE) || (rc == SA_AIS_ERR_TIMEOUT)) { + (void)immutil_saImmOmSearchFinalize(search_hdl); + (void)immutil_saImmOmFinalize(imm_om_hdl); + + //Try 10 times in a gap of 100 millisecs. + uint16_t count = 0; + rc = immutil_saImmOmInitialize(&imm_om_hdl, NULL, &immVersion); + while ((rc == SA_AIS_ERR_TIMEOUT) && (count < 10)) { + (void)immutil_saImmOmFinalize(imm_om_hdl); + count++; + osaf_nanosleep(&kHundredMilliseconds); + rc = immutil_saImmOmInitialize(&imm_om_hdl, NULL, &immVersion); + } + if (rc != SA_AIS_OK) { + LOG_ER("saImmOmInitialize failed with '%u'", rc); + goto done2; + } + + count = 0; + rc = immutil_saImmOmSearchInitialize_2(imm_om_hdl, NULL, SA_IMM_SUBTREE, + SA_IMM_SEARCH_ONE_ATTR | SA_IMM_SEARCH_GET_ALL_ATTR, &searchParam, + NULL, &search_hdl); + while (((rc == SA_AIS_ERR_TIMEOUT) || (rc == SA_AIS_ERR_BAD_HANDLE)) + && (count < 10)) { + (void)immutil_saImmOmSearchFinalize(search_hdl); + (void)immutil_saImmOmFinalize(imm_om_hdl); + + count++; + osaf_nanosleep(&kHundredMilliseconds); + + //Last try to both. + (void) immutil_saImmOmInitialize(&imm_om_hdl, NULL, &immVersion); + rc = immutil_saImmOmSearch
Re: [devel] [PATCH 1 of 1] osaf:fm on new-Active handling amfd up event of peer old-Active node which is going down[#2151] V2
Ack, code review only. Thanks, Praveen. On 02-Mar-17 3:39 PM, ramesh betham wrote: > Hi, > > Correctionto the patch. > > + /* Weird situation in a cluster, where the new-Active > controller node founds the peer node > + * (old-Active) is still in the progress of shutdown (i.e., > amfd/immd is still alive). > + */ > + if ((fm_cb->role == PCS_RDA_ACTIVE) && (fm_cb->csi_assigned == > false)) { > + LOG_ER("Two active controllers observed in a cluster, > newActive: %x and old-Active: %x", fm_cb->node_id, fm_cb->peer_node_id); > + opensaf_reboot(fm_cb->peer_node_id, NULL, > correction: opensaf_reboot(0, NULL, > + "Received svc up from peer node (old-active is not > fully DOWN), hence rebooting the new Active"); > + } > > Thanks, > Ramesh. > > On 3/2/2017 2:02 PM, ramesh.bet...@oracle.com wrote: >> src/fm/fmd/fm_evt.h |2 +- >> src/fm/fmd/fm_main.c | 78 ++--- >> src/fm/fmd/fm_mds.c | 181 >> -- >> 3 files changed, 155 insertions(+), 106 deletions(-) >> >> >> diff --git a/src/fm/fmd/fm_evt.h b/src/fm/fmd/fm_evt.h >> --- a/src/fm/fmd/fm_evt.h >> +++ b/src/fm/fmd/fm_evt.h >> @@ -1,6 +1,7 @@ >> /* -*- OpenSAF -*- >> * >> * (C) Copyright 2008 The OpenSAF Foundation >> +* Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved. >> * >> * This program is distributed in the hope that it will be useful, but >> * WITHOUT ANY WARRANTY; without even the implied warranty of >> MERCHANTABILITY >> @@ -49,7 +50,6 @@ typedef enum { >> FM_EVT_NODE_DOWN, >> FM_EVT_PEER_UP, >> FM_EVT_RDA_ROLE, >> -FM_EVT_SVC_DOWN, >> FM_FSM_EVT_MAX >> } FM_FSM_EVT_CODE; >> >> diff --git a/src/fm/fmd/fm_main.c b/src/fm/fmd/fm_main.c >> --- a/src/fm/fmd/fm_main.c >> +++ b/src/fm/fmd/fm_main.c >> @@ -1,6 +1,7 @@ >> /* -*- OpenSAF -*- >> * >> * (C) Copyright 2008 The OpenSAF Foundation >> +* Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved. >> * >> * This program is distributed in the hope that it will be useful, but >> * WITHOUT ANY WARRANTY; without even the implied warranty of >> MERCHANTABILITY >> @@ -31,6 +32,7 @@ This file contains the main() routine fo >> #include "nid/agent/nid_api.h" >> #include "fm.h" >> #include "base/osaf_time.h" >> +#include "base/osaf_poll.h" >> >> #define FM_CLM_API_TIMEOUT 100LL >> >> @@ -71,7 +73,6 @@ void handle_mbx_event(void); >> extern uint32_t fm_amf_init(FM_AMF_CB *fm_amf_cb); >> uint32_t gl_fm_hdl; >> static NCS_SEL_OBJ usr1_sel_obj; >> -void fm_proc_svc_down(FM_CB *cb, FM_EVT *fm_mbx_evt); >> >> /** >>* USR1 signal is used when AMF wants instantiate us as a >> @@ -176,6 +177,11 @@ int main(int argc, char *argv[]) >> */ >> fm_cb->control_tipc = true; /* Default behaviour */ >> >> +fm_cb->immd_down = true; >> +fm_cb->immnd_down = true; >> +fm_cb->amfnd_down = true; >> +fm_cb->amfd_down = true; >> + >> /* Create CB handle */ >> gl_fm_hdl = ncshm_create_hdl(NCS_HM_POOL_ID_COMMON, NCS_SERVICE_ID_GFM, >> (NCSCONTEXT)fm_cb); >> >> @@ -194,7 +200,7 @@ int main(int argc, char *argv[]) >> goto fm_init_failed; >> } >> >> -/* Attach MBX */ >> +/* Attach MBX */ >> if (m_NCS_IPC_ATTACH(&fm_cb->mbx) != NCSCC_RC_SUCCESS) { >> syslog(LOG_ERR, "m_NCS_IPC_ATTACH() failed."); >> goto fm_init_failed; >> @@ -268,7 +274,7 @@ int main(int argc, char *argv[]) >> >> /* notify the NID */ >> if (nid_started) >> -fm_nid_notify(NCSCC_RC_SUCCESS); >> +fm_nid_notify((uint32_t) NCSCC_RC_SUCCESS); >> >> while (1) { >> ret = poll(fds, nfds, -1); >> @@ -454,52 +460,6 @@ static uint32_t fm_get_args(FM_CB *fm_cb >> return NCSCC_RC_SUCCESS; >> } >> >> -void fm_proc_svc_down(FM_CB *cb, FM_EVT *fm_mbx_evt) >> -{ >> -switch (fm_mbx_evt->svc_id) { >> -case NCSMDS_SVC_ID_IMMND: >> -cb->immnd_down = true; >> -LOG_NO("IMMND down on: %x", cb->peer_node_id); >> -break; >> -case NCSMDS_SVC_ID_AVND: >> -cb->amfnd_down = true; >> -LOG_NO("AMFND down on: %x", cb->peer_node_id); >> -break; >> -case NCSMDS_SVC_ID_IMMD: >> -cb->immd_down = true; >> -LOG_NO("IMMD down on: %x", cb->peer_node_id); >> -break; >> -case NCSMDS_SVC_ID_AVD: >> -cb->amfd_down = true; >> -LOG_NO("AVD down on: %x", cb->peer_node_id); >> -break; >> -case NCSMDS_SVC_ID_GFM: >> -cb->fm_down = true; >> -LOG_NO("FM down on: %x", cb->peer_node_id); >> -b
Re: [devel] [PATCH 1 of 1] AMFND: Ensure su operational message synchronizes with component failover sequence [#2233]
Hi Minh, Please see response with [Praveen]. Thanks, Praveen On 02-Mar-17 1:43 PM, minh chau wrote: > Hi, > > Thanks Gary. > @Nagu, Praveen: Have you had time to check the example in my previous > email? > The ticket #2179 is about to document that full escalation is supported > for SC absence feature, it is waiting for fix of #2233. > I think there's not big change in code for #2233, it's a matter of > decision to make for re-instantiation of failed component. > > Thanks, > Minh > > On 01/03/17 15:42, Gary Lee wrote: >> Hi >> >> It seems the component should be re-instantiated if it has no CSI. >> Whether or not there is an SI assigned should be irrelevant? >> >> Thanks >> Gary >> >> -Original Message- >> From: minh chau >> Date: Thursday, 23 February 2017 at 3:16 pm >> To: Nagendra Kumar , Praveen Malviya >> >> Cc: , gary , >> , >> Subject: Re: [devel] [PATCH 1 of 1] AMFND: Ensure su operational >> message synchronizes with component failover sequence [#2233] >> >> Hi Nagu, Praveen, >> Please find my comment in [Minh3] >> Thanks, >> Minh >> On 22/02/17 19:34, Nagendra Kumar wrote: >> >>> Since in spec there is no specific discussion for >> comp-failover recovery for an unassigned comp, I will encourage other >> maintainers also to provide inputs. >> > I do agree for not instantiating failed component before >> recovery, this keeps the approach similar to SU failover also. >> [Minh3]: There's one example of component failover that I would >> like us >> to have a look >> - 2N application, SU4/SU5 has active/standby assignment >> respectively, >> each SU has 3 components >> - Add a sleep of 10 seconds in clc script start command of first >> component C41 of SU4 >> Steps: >> 1- Kill C41 to trigger component failover >> 2- SU4 goes for quiesced assignment >> 3- SU5 goes for active assignment >> 4- SU4 is removed its assignment >> 5- Now there's a pause of 10 seconds due to clc script start, to >> ensure >> that C41 is healthy >> 6- Next SU4 has standby assignment. >>From the above example, I think we can see some problems if >> the >> re-instantiation of C41 is delayed: >> - Because C41 is faulty, it needs to be restarted ok because its >> SU has >> assignment >> - Moving re-instantiation of C41 is further down that means the >> recovery >> will take longer >> - What if re-instantiation of C41 leads to instantation-failed [Praveen] If AMFND re-instantiate C41 after removal of assignment and it moves to instantiation-failed then: -Node will be rebooted if nodefailfastonterminationfaioure=true. -ifnodefailfastonterminationfaioure=false then as per section 4.6 page 212, SU will be marked INST_FAILED and AMF will have to terminate all the components. Termination of other components will be easier if they do not have assignments or pending assignments. If C41 is instantiated before removal of assignments and it moves to INST_FAILED state, then AMFND will be terminating other comps of SU when they are in the middle of quiesced or removal of assignment. So a component will having different orders of quiesced/removal/terminate callbacks in its mailbox. This will make thing complex. > Whether or not the C41 has assignment or is unassigned, the >> OperState/PresenceState result from re-instantiation of faulty C41 >> affects to SU4's eligibility for assignment. [Praveen] Here Su4 will get only fresh assignments after C4 gets enabled. For fresh assignments, AMF can choose any of the spare SUs available and Su4 will be chosen based on ranks. At the same time, AMF spec encourages not to choose faulty SUs soon for assignments. It is highlighted in SG Auto adjust feature context in section 3.6.1.2 Initiation of the Auto-Adjust Procedure for a Service Group: " However, if the completion of a recovery/repair operation has made the service group eligible for auto-adjustment (for example, if a node joins the cluster after the repair), it is not so wise to run the auto-adjust procedure for the service group involving the newly repaired service units immediately. Thus, the service group-level configuration attribute auto-adjust probation period has been introduced (actually, the saAmfSGAutoAdjustProb configuration attribute in the SaAmfSG object class, shown in Section 8.9). When a service unit becomes available for auto-adjustment after a repair/recovery operation, the service unit enter
Re: [devel] [PATCH 1 of 1] amfnd: Do not delete cbk info if csi is in assigning state while shutting down [#2316]
Ack with that minor change. Thanks Praveen On 02-Mar-17 1:37 PM, minh chau wrote: > Hi Praveen, > > Two comments in line. > > Thanks, > Minh > > On 02/03/17 18:06, praveen malviya wrote: >> Hi Minh, >> >> With the patch opensaf stop is successful but component is not issued >> remove callback. >> When comp is responding for csi assignment, AMFND is not sending >> remove callback to the component (attached amfnd traces). In >> avnd_comp_csi_assign_done(), call to avnd_su_si_oper_done() marks the >> SI REMOVED and the same function start clean up of all the component. >> Since SI is marked REMOVED, AMFND does not issue remove callback even >> though pending_removal flag is true. >> Minor correction on the patch makes it successful: >> diff --git a/src/amf/amfnd/comp.cc b/src/amf/amfnd/comp.cc >> --- a/src/amf/amfnd/comp.cc >> +++ b/src/amf/amfnd/comp.cc >> @@ -1614,7 +1614,7 @@ uint32_t avnd_comp_csi_assign_done(AVND_ >> >> if (find_unassigned_csi_at_rank(csi->si, rank) >> != nullptr) { >> rc = assign_all_csis_at_rank(csi->si, >> rank, true); >> - } else { >> + } else if (csi->pending_removal == false) { >> /* all csis belonging to the si are >> assigned */ >> rc = avnd_su_si_oper_done(cb, >> comp->su, m_AVND_SU_IS_ALL_SI(comp->su) ? nullptr : csi->si); > [Minh]: Thanks, will add this correction to the patch and test again >> } >> >> Also, what if we move the call to the function >> avnd_comp_cbq_csi_rec_del() in the first if-else block where CSI(s) >> is/are marked REMOVED if it/they is/are in REMOVING state instead of >> second if-else block. > [Minh]: The change is only calling avnd_comp_cbq_csi_rec_del() if it's > not in ASSIGNING state, moving this change to first block that means > this change will affect on "if (AVSV_SUSI_ACT_DEL == > csi->single_csi_add_rem_in_si)", so it will not be equivalent to > existing logic, unless making another check of @single_csi_add_rem_in_si > in the first if-else block >> >> Thanks, >> Praveen >> >> >> On 28-Feb-17 8:19 AM, Minh Hon Chau wrote: >>> src/amf/amfnd/comp.cc | 13 ++--- >>> 1 files changed, 10 insertions(+), 3 deletions(-) >>> >>> >>> If node is shutting down, and csi is in assigning state, amfnd >>> deletes cbk info which >>> is waiting for response of pending callback. When component responds >>> and cbk is deleted, >>> amfnd can not remove csi assignment. Thus, amfnd gets stuck in >>> shutting down. >>> >>> Patch makes amfnd not delete cbk info if csi is in assigning state, >>> respectively variable >>> pending_removal is true, and keep cbk info deleted in all other cases. >>> >>> diff --git a/src/amf/amfnd/comp.cc b/src/amf/amfnd/comp.cc >>> --- a/src/amf/amfnd/comp.cc >>> +++ b/src/amf/amfnd/comp.cc >>> @@ -1727,7 +1727,7 @@ uint32_t avnd_comp_csi_remove_done(AVND_ >>> osafassert(m_AVND_SU_IS_PREINSTANTIABLE(comp->su)); >>> >>> /* delete any pending cbk rec for csi assignment / removal */ >>> -avnd_comp_cbq_csi_rec_del(cb, comp, (csi) ? csi->name : ""); >>> + >>> >>> /* ok, time to reset CSi_ALL flag */ >>> if (!csi && m_AVND_COMP_IS_ALL_CSI(comp)) { >>> @@ -1755,6 +1755,7 @@ uint32_t avnd_comp_csi_remove_done(AVND_ >>> */ >>> if (csi) { >>> if (AVSV_SUSI_ACT_DEL == csi->single_csi_add_rem_in_si) { >>> +avnd_comp_cbq_csi_rec_del(cb, comp, csi->name); >>> /* csi belonging to the si are removed */ >>> rc = avnd_su_si_oper_done(cb, comp->su, csi->si); >>> >>> @@ -1762,6 +1763,11 @@ uint32_t avnd_comp_csi_remove_done(AVND_ >>> goto done; >>> } >>> else { >>> +/* Delete cbk info if csi is not ASSIGNING state, >>> @pending_removal will be true */ >>> +if (m_AVND_COMP_CSI_CURR_ASSIGN_STATE_IS_ASSIGNING(csi) >>> == false) { >>> +avnd_comp_cbq_csi_rec_del(cb, comp, csi->name); >>> +} >>> + >>> for (curr_csi = (AVND_COMP_CSI_REC >>> *)m_NCS_DBLIST_FIND_LAST(&csi->si->csi_list); >>>
Re: [devel] [PATCH 1 of 1] amfd: SG admin state is nor honored during node group unlock_instantiation [#2324]
Ack. Thanks, Praveen On 02-Mar-17 8:17 AM, Tai Dinh wrote: > src/amf/amfd/nodegroup.cc | 4 > 1 files changed, 4 insertions(+), 0 deletions(-) > > > During instantiation of node group, AMF does not check for the admin state of > the SG of SUs lead to the result that SUs will be instantiated even if their > SG is at LOCKED_INSTANTIATION state. > > diff --git a/src/amf/amfd/nodegroup.cc b/src/amf/amfd/nodegroup.cc > --- a/src/amf/amfd/nodegroup.cc > +++ b/src/amf/amfd/nodegroup.cc > @@ -1011,6 +1011,10 @@ static void ng_admin_unlock_inst(AVD_AMF > Also honor saAmfSURank while instantating. >*/ > AVD_SG *sg = node_su->sg_of_su; > + if (sg->saAmfSGAdminState == > SA_AMF_ADMIN_LOCKED_INSTANTIATION) { > + TRACE("SG of '%s' SU is in locked-in, skip it", > node_su->name.c_str()); > + continue; > + } > > std::set::const_iterator iter1 ; > iter1 = tmp_sg_list.find(sg->name); > -- Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1 of 1] amfnd: Do not delete cbk info if csi is in assigning state while shutting down [#2316]
Hi, I am reviewing this patch. Thanks Praveen On 28-Feb-17 8:19 AM, Minh Hon Chau wrote: > src/amf/amfnd/comp.cc | 13 ++--- > 1 files changed, 10 insertions(+), 3 deletions(-) > > > If node is shutting down, and csi is in assigning state, amfnd deletes cbk > info which > is waiting for response of pending callback. When component responds and cbk > is deleted, > amfnd can not remove csi assignment. Thus, amfnd gets stuck in shutting down. > > Patch makes amfnd not delete cbk info if csi is in assigning state, > respectively variable > pending_removal is true, and keep cbk info deleted in all other cases. > > diff --git a/src/amf/amfnd/comp.cc b/src/amf/amfnd/comp.cc > --- a/src/amf/amfnd/comp.cc > +++ b/src/amf/amfnd/comp.cc > @@ -1727,7 +1727,7 @@ uint32_t avnd_comp_csi_remove_done(AVND_ > osafassert(m_AVND_SU_IS_PREINSTANTIABLE(comp->su)); > > /* delete any pending cbk rec for csi assignment / removal */ > - avnd_comp_cbq_csi_rec_del(cb, comp, (csi) ? csi->name : ""); > + > > /* ok, time to reset CSi_ALL flag */ > if (!csi && m_AVND_COMP_IS_ALL_CSI(comp)) { > @@ -1755,6 +1755,7 @@ uint32_t avnd_comp_csi_remove_done(AVND_ >*/ > if (csi) { > if (AVSV_SUSI_ACT_DEL == csi->single_csi_add_rem_in_si) { > + avnd_comp_cbq_csi_rec_del(cb, comp, csi->name); > /* csi belonging to the si are removed */ > rc = avnd_su_si_oper_done(cb, comp->su, csi->si); > > @@ -1762,6 +1763,11 @@ uint32_t avnd_comp_csi_remove_done(AVND_ > goto done; > } > else { > + /* Delete cbk info if csi is not ASSIGNING state, > @pending_removal will be true */ > + if (m_AVND_COMP_CSI_CURR_ASSIGN_STATE_IS_ASSIGNING(csi) > == false) { > + avnd_comp_cbq_csi_rec_del(cb, comp, csi->name); > + } > + > for (curr_csi = (AVND_COMP_CSI_REC > *)m_NCS_DBLIST_FIND_LAST(&csi->si->csi_list); > curr_csi; > curr_csi = (AVND_COMP_CSI_REC > *)m_NCS_DBLIST_FIND_PREV(&curr_csi->si_dll_node)) { > @@ -1770,7 +1776,7 @@ uint32_t avnd_comp_csi_remove_done(AVND_ > else if > (m_AVND_COMP_CSI_CURR_ASSIGN_STATE_IS_REMOVING(curr_csi)) > break; > else if > (m_AVND_COMP_CSI_CURR_ASSIGN_STATE_IS_ASSIGNING(curr_csi)) { > - TRACE("'%s' is getting assigned, remove > it after assignment", > + LOG_WA("'%s' is getting assigned, > remove it after assignment", > curr_csi->name.c_str()); > curr_csi->pending_removal = true; > break; > @@ -1787,7 +1793,8 @@ uint32_t avnd_comp_csi_remove_done(AVND_ > rc = avnd_su_si_oper_done(cb, comp->su, > m_AVND_SU_IS_ALL_SI(comp->su) ? > nullptr : csi->si); > } > - } else { > + } else { > + avnd_comp_cbq_csi_rec_del(cb, comp, ""); > /* Issue remove callback with TARGET_ALL for CSIs belonging to > prv rank.*/ > for (curr_csi = > m_AVND_CSI_REC_FROM_COMP_DLL_NODE_GET(m_NCS_DBLIST_FIND_FIRST(&comp->csi_list)); > curr_csi; > -- Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1 of 1] amf: support restrictions to auto-repair [#2144]
Hi Alex, Change in comp.cc looks fine. I guess other changes are related to the node reboot case in the context of a campaign which performs upgrade by rebooting the node. In this case since amfnd is freshly coming up, SUs on this node will not have their sumaintenance attribute set (amfnd does not read su config from IMM) which was set before SMF orders node reboot. If this is the case then I think this can still be achieved without updating AMFD and AMFND MDS versions and without including it in SU_REG message. How to do it: AMFD still can send campaign name by calling su->set_su_maintenance_campaign() for each SU when it gets response from amfnd for SU_REG successful and before it sends instantiation message for any SU. By this time AMFND has already all the information of SU hosted on it, so it will update the su_db with campaign name. Attached is the patch based on this idea (2144_update.patch). Thanks, Praveen On 01-Mar-17 3:01 AM, Alex Jones wrote: src/amf/amfd/mds.cc | 2 +- src/amf/amfd/mds.h | 4 ++-- src/amf/amfd/util.cc| 5 + src/amf/amfnd/avnd_mds.h| 4 ++-- src/amf/amfnd/comp.cc | 2 +- src/amf/amfnd/mds.cc| 4 ++-- src/amf/amfnd/sudb.cc | 2 ++ src/amf/common/amf_d2nmsg.h | 2 ++ src/amf/common/d2nedu.c | 6 ++ src/amf/common/d2nmsg.c | 2 ++ 10 files changed, 25 insertions(+), 8 deletions(-) This patch adds support for Section 3.11.1.4.2 of AMF B.04.01 spec: Restrictions to Auto-Repair. diff --git a/src/amf/amfd/mds.cc b/src/amf/amfd/mds.cc --- a/src/amf/amfd/mds.cc +++ b/src/amf/amfd/mds.cc @@ -47,7 +47,7 @@ const MDS_CLIENT_MSG_FORMAT_VER avd_avnd AVSV_AVD_AVND_MSG_FMT_VER_1, AVSV_AVD_AVND_MSG_FMT_VER_2, AVSV_AVD_AVND_MSG_FMT_VER_3, AVSV_AVD_AVND_MSG_FMT_VER_4, AVSV_AVD_AVND_MSG_FMT_VER_5, AVSV_AVD_AVND_MSG_FMT_VER_6, - AVSV_AVD_AVND_MSG_FMT_VER_7 + AVSV_AVD_AVND_MSG_FMT_VER_7, AVSV_AVD_AVND_MSG_FMT_VER_8 }; const MDS_CLIENT_MSG_FORMAT_VER avd_avd_msg_fmt_map_table[] = { diff --git a/src/amf/amfd/mds.h b/src/amf/amfd/mds.h --- a/src/amf/amfd/mds.h +++ b/src/amf/amfd/mds.h @@ -33,10 +33,10 @@ /* In Service upgrade support */ #define AVD_MDS_SUB_PART_VERSION_4 4 -#define AVD_MDS_SUB_PART_VERSION 7 +#define AVD_MDS_SUB_PART_VERSION 8 #define AVD_AVND_SUBPART_VER_MIN 1 -#define AVD_AVND_SUBPART_VER_MAX 7 +#define AVD_AVND_SUBPART_VER_MAX 8 #define AVD_AVD_SUBPART_VER_MIN1 #define AVD_AVD_SUBPART_VER_MAX6 diff --git a/src/amf/amfd/util.cc b/src/amf/amfd/util.cc --- a/src/amf/amfd/util.cc +++ b/src/amf/amfd/util.cc @@ -483,6 +483,10 @@ static void reg_su_msg_init_su_info(AVD_ su_info->su_is_external = su->su_is_external; su_info->su_failover = su->saAmfSUFailover; + SaNameT suCampaign; + osaf_extended_name_alloc(su->saAmfSUMaintenanceCampaign.c_str(), &suCampaign); + su_info->suMaintenanceCampaign = suCampaign; + su_info->next = su_msg->msg_info.d2n_reg_su.su_list; su_msg->msg_info.d2n_reg_su.su_list = su_info; su_msg->msg_info.d2n_reg_su.num_su++; @@ -1671,6 +1675,7 @@ static void free_d2n_su_msg_info(AVSV_DN su_info = su_msg->msg_info.d2n_reg_su.su_list; su_msg->msg_info.d2n_reg_su.su_list = su_info->next; osaf_extended_name_free(&su_info->name); + osaf_extended_name_free(&su_info->suMaintenanceCampaign); delete su_info; } } diff --git a/src/amf/amfnd/avnd_mds.h b/src/amf/amfnd/avnd_mds.h --- a/src/amf/amfnd/avnd_mds.h +++ b/src/amf/amfnd/avnd_mds.h @@ -31,10 +31,10 @@ #define AMF_AMFND_AVND_MDS_H_ /* In Service upgrade support */ -#define AVND_MDS_SUB_PART_VERSION 7 +#define AVND_MDS_SUB_PART_VERSION 8 #define AVND_AVD_SUBPART_VER_MIN 1 -#define AVND_AVD_SUBPART_VER_MAX 7 +#define AVND_AVD_SUBPART_VER_MAX 8 #define AVND_AVND_SUBPART_VER_MIN 1 #define AVND_AVND_SUBPART_VER_MAX 1 diff --git a/src/amf/amfnd/comp.cc b/src/amf/amfnd/comp.cc --- a/src/amf/amfnd/comp.cc +++ b/src/amf/amfnd/comp.cc @@ -2675,7 +2675,7 @@ uint32_t comp_restart_initiate(AVND_COMP if (NCSCC_RC_SUCCESS != rc) goto done; - if (!comp->su->suMaintenanceCampaign.empty()) { + if (!comp->su->suMaintenanceCampaign.empty() && !comp->admin_oper) { LOG_NO("not restarting comp because maintenance campaign is set: %s", comp->su->suMaintenanceCampaign.c_str()); diff --git a/src/amf/amfnd/mds.cc b/src/amf/amfnd/mds.cc --- a/src/amf/amfnd/mds.cc +++ b/src/amf/amfnd/mds.cc @@ -43,7 +43,7 @@ const MDS_CLIENT_MSG_FORMAT_VER avnd_avd AVSV_AVD_AVND_MSG_FMT_VER_1, AVSV_AVD_AVND_MSG_FMT_VER_2, AVSV_AVD_AVND_MSG_FMT_VER_3, AVSV_AVD_AVND_MSG_FMT_VER_4, AVSV_AVD_AVND_MSG_FMT_VER_4, AVSV_AVD_AVND_MSG_FMT_VER_6, - AVSV_AVD_AVND_MSG_FMT_VER_7 + AVSV_AVD_AVND_MSG_FMT_VER_7, AVSV_AVD_AVND_MSG_FMT_VER_8 }; /* messages from director */ @@ -51,7 +51,
Re: [devel] [PATCH 1 of 1] amfd: support restrictions to auto-repair [#2144]
Hi, Attached 02_2144.patch is the patch that improves this patch. With 02_2144.patch, AMF will not mark a node disabled when: -its admin state is locked-in and -sumaintenance attribute is set for all the sus hosted on this node including MW. With this patchg SMF will not get any disabled state notification for SUs and node. SMF will have to locked-in node and mark sumaintenance attribute of all the SUs on that node before rebooting the node for upgrade. This patch needs to be applied on top of #2144 main patch (attached 01_2144.patch). Thanks, Praveen On 28-Feb-17 2:36 AM, Alex Jones wrote: src/amf/amfd/ndproc.cc | 3 ++- src/amf/amfd/sgproc.cc | 6 -- 2 files changed, 6 insertions(+), 3 deletions(-) This patch implements section 3.11.1.4.2 of AMF spec (Restrictions to Auto-Repair). diff --git a/src/amf/amfd/ndproc.cc b/src/amf/amfd/ndproc.cc --- a/src/amf/amfd/ndproc.cc +++ b/src/amf/amfd/ndproc.cc @@ -1141,7 +1141,8 @@ void avd_node_failover(AVD_AVND *node) avd_node_mark_absent(node); avd_pg_node_csi_del_all(avd_cb, node); avd_node_down_mw_susi_failover(avd_cb, node); - avd_node_down_appl_susi_failover(avd_cb, node); + if (node->saAmfNodeAdminState != SA_AMF_ADMIN_LOCKED_INSTANTIATION) + avd_node_down_appl_susi_failover(avd_cb, node); avd_node_delete_nodeid(node); TRACE_LEAVE(); } diff --git a/src/amf/amfd/sgproc.cc b/src/amf/amfd/sgproc.cc --- a/src/amf/amfd/sgproc.cc +++ b/src/amf/amfd/sgproc.cc @@ -2046,11 +2046,13 @@ void avd_node_down_mw_susi_failover(AVD_ osafassert(avnd->list_of_ncs_su.empty() != true); for (const auto& i_su : avnd->list_of_ncs_su) { - i_su->set_oper_state(SA_AMF_OPERATIONAL_DISABLED); + if (avnd->saAmfNodeAdminState != SA_AMF_ADMIN_LOCKED_INSTANTIATION) { + i_su->set_oper_state(SA_AMF_OPERATIONAL_DISABLED); + i_su->disable_comps(SA_AIS_ERR_TIMEOUT); + } i_su->set_pres_state(SA_AMF_PRESENCE_UNINSTANTIATED); i_su->set_readiness_state(SA_AMF_READINESS_OUT_OF_SERVICE); i_su->complete_admin_op(SA_AIS_ERR_TIMEOUT); - i_su->disable_comps(SA_AIS_ERR_TIMEOUT); /* Now analyze the service group for the new HA state * assignments and send the SU SI assign messages diff --git a/src/amf/amfd/node.cc b/src/amf/amfd/node.cc --- a/src/amf/amfd/node.cc +++ b/src/amf/amfd/node.cc @@ -1587,4 +1587,16 @@ void avd_node_constructor(void) avd_class_impl_set("SaAmfNode", nullptr, node_admin_op_cb, node_ccb_completed_cb, node_ccb_apply_cb); } - +bool AVD_AVND::is_campaign_set_for_all_sus() const { + if (std::all_of(list_of_ncs_su.begin(), list_of_ncs_su.end(), +[&](AVD_SU *su) -> bool {return su->saAmfSUMaintenanceCampaign.empty() == false;})) { +if (std::all_of(list_of_su.begin(), list_of_su.end(), + [&](AVD_SU *su) -> bool {return su->saAmfSUMaintenanceCampaign.empty() == false;})) { + return true; +} else { + return false; +} + } else { +return false; + } +} diff --git a/src/amf/amfd/node.h b/src/amf/amfd/node.h --- a/src/amf/amfd/node.h +++ b/src/amf/amfd/node.h @@ -143,7 +143,7 @@ class AVD_AVND { AVD_AMF_NG *admin_ng; /* points to the nodegroup on which admin operation is going on.*/ uint16_t node_up_msg_count; /* to count of node_up msg that director had received from this node */ bool reboot; - + bool is_campaign_set_for_all_sus() const; //Member functions. void node_sus_termstate_set(bool term_state) const; private: diff --git a/src/amf/amfd/sgproc.cc b/src/amf/amfd/sgproc.cc --- a/src/amf/amfd/sgproc.cc +++ b/src/amf/amfd/sgproc.cc @@ -2092,13 +2092,17 @@ void avd_node_down_mw_susi_failover(AVD_ * one loop as more than one MW SU per SG in one node is not supported. */ osafassert(avnd->list_of_ncs_su.empty() != true); - + bool campaign_set = avnd->is_campaign_set_for_all_sus(); for (const auto& i_su : avnd->list_of_ncs_su) { + if ((avnd->saAmfNodeAdminState != SA_AMF_ADMIN_LOCKED_INSTANTIATION) || + (campaign_set == false)) { + i_su->set_oper_state(SA_AMF_OPERATIONAL_DISABLED); + i_su->disable_comps(SA_AIS_ERR_TIMEOUT); + } i_su->set_oper_state(SA_AMF_OPERATIONAL_DISABLED); i_su->set_pres_state(SA_AMF_PRESENCE_UNINSTANTIATED); i_su->set_readiness_state(SA_AMF_READINESS_OUT_OF_SERVICE); i_su->complete_admin_op(SA_AIS_ERR_TIMEOUT); - i_su->disable_comps(SA_AIS_ERR_TIMEOUT); /* Now analyze the service group for the new HA state * assignments and send the SU SI assign messages @@ -2142,14 +2146,18 @@ void avd_node_down_appl_susi_failover(AV { TRACE_ENTER2("'