When the PBE hung, amfd can process the events with
below order when a node was started then stop then started
- clm_track_cb for node down event
- clm_track_cb for second node up event
- avd_mds_avnd_down_evh was called to process amfnd down event
And it cause the node can not join the cluster.
---
src/amf/amfd/ndfsm.cc | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/src/amf/amfd/ndfsm.cc b/src/amf/amfd/ndfsm.cc
index ee47582de..e2235b2e9 100644
--- a/src/amf/amfd/ndfsm.cc
+++ b/src/amf/amfd/ndfsm.cc
@@ -800,6 +800,23 @@ void avd_mds_avnd_down_evh(AVD_CL_CB *cb, AVD_EVT *evt) {
daemon_exit();
}
+ if (node->node_state == AVD_AVND_STATE_ABSENT) {
+ bool after_headless = false;
+ for (const auto &i_su : node->list_of_ncs_su) {
+ if (i_su->saAmfSUOperState == SA_AMF_OPERATIONAL_ENABLED) {
+ after_headless = true;
+ break;
+ }
+ }
+ if (after_headless != true) {
+ // Ignore amfnd down event in late after clm cb node left then joined
+ // But not ignore if after headless
+ LOG_WA("Ignore '%s' amfnd down event", node->node_name.c_str());
+ TRACE_LEAVE();
+ return;
+ }
+ }
+
if (cb->failover_list.find(evt->info.node_id) != cb->failover_list.end()) {
std::shared_ptr<NodeStateMachine> failed_node =
cb->failover_list.at(evt->info.node_id);
--
2.17.1
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel