If consensus service is enabled, only perform node failover
after peer controller has self-fenced
(after 2 * FMS_TAKEOVER_REQUEST_VALID_TIME seconds).
This also means if node failover delay is set to a large value,
we do not unnecesarily wait too long before failing over assignments
previously assigned to the peer controller.
Remove unused fmd_conf_file variable.
Change some LOG_ER calls to LOG_WA.
---
src/amf/amfd/cb.h | 1 -
src/amf/amfd/clm.cc | 4 ++--
src/amf/amfd/main.cc | 1 -
src/amf/amfd/ndfsm.cc | 8 ++++----
src/amf/amfd/ndproc.cc | 19 +++++++++++++++++++
src/amf/amfd/node_state.cc | 23 ++++++++++++-----------
src/amf/amfd/node_state_machine.cc | 19 +++++++++++++++++++
src/amf/amfd/node_state_machine.h | 2 ++
src/amf/amfd/proc.h | 1 +
9 files changed, 59 insertions(+), 19 deletions(-)
diff --git a/src/amf/amfd/cb.h b/src/amf/amfd/cb.h
index 89cf15d..7ac743e 100644
--- a/src/amf/amfd/cb.h
+++ b/src/amf/amfd/cb.h
@@ -202,7 +202,6 @@ typedef struct cl_cb_tag {
AVD_TMR heartbeat_tmr; /* The timer for sending heart beats to nd. */
SaTimeT heartbeat_tmr_period;
uint32_t minimum_cluster_size;
- std::string fmd_conf_file;
uint32_t nodes_exit_cnt; /* The counter to identifies the number
of nodes that have exited the membership
diff --git a/src/amf/amfd/clm.cc b/src/amf/amfd/clm.cc
index aeae939..cfbe36a 100644
--- a/src/amf/amfd/clm.cc
+++ b/src/amf/amfd/clm.cc
@@ -203,7 +203,7 @@ static void clm_node_exit_complete(SaClmNodeIdT nodeId) {
}
if (avd_cb->failover_list.count(node->node_info.nodeId) == 0 &&
- avd_cb->node_failover_delay == 0) {
+ delay_failover(avd_cb, node->node_info.nodeId) == false) {
avd_node_failover(node);
avd_node_delete_nodeid(node);
}
@@ -322,7 +322,7 @@ static void clm_track_cb(
LOG_IN("%s: CLM node '%s' is not an AMF cluster member; MDS down
received",
__FUNCTION__, node_name.c_str());
if (avd_cb->failover_list.count(node->node_info.nodeId) == 0 &&
- avd_cb->node_failover_delay == 0) {
+ delay_failover(avd_cb, node->node_info.nodeId) == false) {
avd_node_delete_nodeid(node);
}
goto done;
diff --git a/src/amf/amfd/main.cc b/src/amf/amfd/main.cc
index e3d0957..03857a1 100644
--- a/src/amf/amfd/main.cc
+++ b/src/amf/amfd/main.cc
@@ -582,7 +582,6 @@ static uint32_t initialize(void) {
}
cb->minimum_cluster_size =
base::GetEnv("OSAF_AMF_MIN_CLUSTER_SIZE", uint32_t{2});
- cb->fmd_conf_file = base::GetEnv("FMS_CONF_FILE", "");
node_list_db = new AmfDb<uint32_t, AVD_FAIL_OVER_NODE>;
amfnd_svc_db = new std::set<uint32_t>;
diff --git a/src/amf/amfd/ndfsm.cc b/src/amf/amfd/ndfsm.cc
index 7099196..16b2def 100644
--- a/src/amf/amfd/ndfsm.cc
+++ b/src/amf/amfd/ndfsm.cc
@@ -811,7 +811,7 @@ void avd_mds_avnd_down_evh(AVD_CL_CB *cb, AVD_EVT *evt) {
std::shared_ptr<NodeStateMachine> failed_node =
cb->failover_list.at(evt->info.node_id);
failed_node->MdsDown();
- } else if (cb->node_failover_delay > 0) {
+ } else if (delay_failover(cb, evt->info.node_id) == true) {
LOG_NO("Node '%s' is down. Start failover delay timer",
node->node_name.c_str());
@@ -821,10 +821,10 @@ void avd_mds_avnd_down_evh(AVD_CL_CB *cb, AVD_EVT *evt) {
}
if (avd_cb->avail_state_avd == SA_AMF_HA_ACTIVE) {
- if (cb->node_failover_delay == 0) {
+ check_quorum(cb);
+ if (delay_failover(cb, evt->info.node_id) == false) {
avd_node_failover(node);
}
- check_quorum(cb);
node->node_info.member = SA_FALSE;
// Update standby out of sync if standby sc goes down
if (avd_cb->node_id_avd_other == node->node_info.nodeId) {
@@ -833,7 +833,7 @@ void avd_mds_avnd_down_evh(AVD_CL_CB *cb, AVD_EVT *evt) {
m_AVSV_SEND_CKPT_UPDT_ASYNC_UPDT(avd_cb, node,
AVSV_CKPT_AVD_NODE_CONFIG);
}
- } else if (cb->node_failover_delay == 0) {
+ } else if (delay_failover(cb, evt->info.node_id) == false) {
/* Remove dynamic info for node but keep in nodeid tree.
* Possibly used at the end of controller failover to
* to failover payload nodes.
diff --git a/src/amf/amfd/ndproc.cc b/src/amf/amfd/ndproc.cc
index 5f5cbcd..0d30dfe 100644
--- a/src/amf/amfd/ndproc.cc
+++ b/src/amf/amfd/ndproc.cc
@@ -1277,6 +1277,25 @@ void avd_node_failover(AVD_AVND *node, const bool
mw_only) {
TRACE_LEAVE();
}
+bool delay_failover(const AVD_CL_CB *cb, const SaClmNodeIdT node_id) {
+ TRACE_ENTER();
+ Consensus consensus_service;
+ bool delay = false;
+
+ if (cb->node_failover_delay > 0) {
+ delay = true;
+ } else if (node_id == cb->node_id_avd_other &&
+ consensus_service.IsEnabled() == true &&
+ consensus_service.IsRemoteFencingEnabled() == false) {
+ // even though node failover delay is set to 0,
+ // the peer SC will still take some time to self-fence,
+ // we should wait FMS_TAKEOVER_REQUEST_VALID_TIME
+ delay = true;
+ }
+
+ return delay;
+}
+
void check_quorum(AVD_CL_CB *cb) {
TRACE_ENTER();
diff --git a/src/amf/amfd/node_state.cc b/src/amf/amfd/node_state.cc
index 787ddab..4446981 100644
--- a/src/amf/amfd/node_state.cc
+++ b/src/amf/amfd/node_state.cc
@@ -17,7 +17,7 @@ Start::Start(NodeStateMachine *fsm) :
}
void Start::TimerExpired() {
- LOG_ER("unexpected timer event");
+ LOG_WA("unexpected timer event");
}
void Start::MdsUp() {
@@ -62,8 +62,9 @@ Lost::Lost(NodeStateMachine *fsm) :
NodeState(fsm) {
avd_stop_tmr(fsm_->cb_, fsm_->timer_.get());
LOG_NO("Start timer for '%x'", fsm_->node_id_);
+
avd_start_tmr(fsm_->cb_, fsm_->timer_.get(),
- fsm_->cb_->node_failover_delay * SA_TIME_ONE_SECOND);
+ fsm_->FailoverDelay());
}
void Lost::TimerExpired() {
@@ -85,7 +86,7 @@ void Lost::TimerExpired() {
// wait for checkpoint to transition state
// meanwhile, restart timer in case a SC failover to this node occurs
avd_start_tmr(fsm_->cb_, fsm_->timer_.get(),
- fsm_->cb_->node_failover_delay * SA_TIME_ONE_SECOND);
+ fsm_->FailoverDelay());
}
}
@@ -98,12 +99,12 @@ void Lost::MdsUp() {
void Lost::MdsDown() {
if (fsm_->Active() == true) {
- LOG_ER("unexpected MDS down event");
+ LOG_WA("unexpected MDS down event");
}
}
void Lost::NodeUp() {
- LOG_ER("unexpected node up event");
+ LOG_WA("unexpected node up event");
}
// state 'LostFound'
@@ -149,7 +150,7 @@ void LostFound::TimerExpired() {
void LostFound::MdsUp() {
if (fsm_->Active() == true) {
- LOG_ER("unexpected MDS up event");
+ LOG_WA("unexpected MDS up event");
}
}
@@ -172,7 +173,7 @@ void LostFound::NodeUp() {
} else {
// wait for checkpoint to transition state
// we are standby and shouldn't get node up
- LOG_ER("unexpected node up event");
+ LOG_WA("unexpected node up event");
}
}
@@ -209,7 +210,7 @@ void LostRebooting::TimerExpired() {
void LostRebooting::MdsUp() {
if (fsm_->Active() == true) {
- LOG_ER("unexpected MDS up event");
+ LOG_WA("unexpected MDS up event");
}
}
@@ -234,7 +235,7 @@ void LostRebooting::MdsDown() {
}
void LostRebooting::NodeUp() {
- LOG_ER("unexpected node up event");
+ LOG_WA("unexpected node up event");
}
// state 'Failed'
@@ -245,7 +246,7 @@ Failed::Failed(NodeStateMachine *fsm) :
}
void Failed::TimerExpired() {
- LOG_ER("unexpected timer event");
+ LOG_WA("unexpected timer event");
}
void Failed::MdsUp() {
@@ -305,7 +306,7 @@ void FailedFound::TimerExpired() {
void FailedFound::MdsUp() {
if (fsm_->Active() == true) {
- LOG_ER("unexpected MDS up event");
+ LOG_WA("unexpected MDS up event");
}
}
diff --git a/src/amf/amfd/node_state_machine.cc
b/src/amf/amfd/node_state_machine.cc
index c5d86d3..4653f79 100644
--- a/src/amf/amfd/node_state_machine.cc
+++ b/src/amf/amfd/node_state_machine.cc
@@ -1,4 +1,5 @@
#include "base/logtrace.h"
+#include "osaf/consensus/consensus.h"
#include "amf/amfd/amfd.h"
#include "amf/amfd/node_state_machine.h"
@@ -93,6 +94,24 @@ uint32_t NodeStateMachine::GetState() {
return state_->GetInt();
}
+SaTimeT NodeStateMachine::FailoverDelay() const {
+ TRACE_ENTER();
+
+ SaTimeT delay;
+ if (node_id_ == cb_->node_id_avd_other) {
+ // If peer SC, it's guaranteed to fence after this amount of time
+ // (2 * FMS_TAKEOVER_REQUEST_VALID_TIME).
+ // This may be smaller than node_failover_delay.
+ Consensus consensus_service;
+ delay = 2 * consensus_service.TakeoverValidTime();
+ } else {
+ delay = cb_->node_failover_delay;
+ }
+
+ TRACE("delay is %llu", delay);
+ return delay * SA_TIME_ONE_SECOND;
+}
+
bool NodeStateMachine::Active() {
return cb_->avail_state_avd == SA_AMF_HA_ACTIVE;
}
diff --git a/src/amf/amfd/node_state_machine.h
b/src/amf/amfd/node_state_machine.h
index 3bfabd0..598642e 100644
--- a/src/amf/amfd/node_state_machine.h
+++ b/src/amf/amfd/node_state_machine.h
@@ -22,6 +22,8 @@ class NodeStateMachine {
void SetState(uint32_t state);
uint32_t GetState();
+ SaTimeT FailoverDelay() const;
+
std::shared_ptr<AVD_TMR> timer_;
std::shared_ptr<NodeState> state_;
diff --git a/src/amf/amfd/proc.h b/src/amf/amfd/proc.h
index f1dc7ba..4052aec 100644
--- a/src/amf/amfd/proc.h
+++ b/src/amf/amfd/proc.h
@@ -96,6 +96,7 @@ void avd_process_hb_event(AVD_CL_CB *cb_now, struct AVD_EVT
*evt);
extern void avd_node_mark_absent(AVD_AVND *node);
extern void avd_tmr_snd_hb_evh(AVD_CL_CB *cb, AVD_EVT *evt);
extern void avd_node_failover(AVD_AVND *node, const bool mw_only = false);
+bool delay_failover(const AVD_CL_CB *cb, const SaClmNodeIdT node_id);
extern void check_quorum(AVD_CL_CB *cb);
extern AVD_SU *get_other_su_from_oper_list(AVD_SU *su);
extern void su_complete_admin_op(AVD_SU *su, SaAisErrorT result);
--
2.7.4
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel