The 'lost' nodes will be rebooted, thus there is no need
to queue sync messages from these nodes.
In addition, node_sync_window_closed is not reliable as it's not
check pointed. We should remove all uses of it in another ticket?
Instead, check if the timer is running.
---
src/amf/amfd/cb.h | 2 ++
src/amf/amfd/ndproc.cc | 30 ++++++++++++++++++++++--------
src/amf/amfd/timer.h | 12 ++++++------
3 files changed, 30 insertions(+), 14 deletions(-)
diff --git a/src/amf/amfd/cb.h b/src/amf/amfd/cb.h
index 89cf15d..8902d78 100644
--- a/src/amf/amfd/cb.h
+++ b/src/amf/amfd/cb.h
@@ -237,6 +237,8 @@ typedef struct cl_cb_tag {
*/
bool active_services_exist;
bool all_nodes_synced;
+ // @todo this should be checkpointed to standby? otherwise
+ // after a controller failover, it will still be false?
bool node_sync_window_closed;
/*
diff --git a/src/amf/amfd/ndproc.cc b/src/amf/amfd/ndproc.cc
index 5f5cbcd..20008d9 100644
--- a/src/amf/amfd/ndproc.cc
+++ b/src/amf/amfd/ndproc.cc
@@ -345,19 +345,26 @@ void avd_nd_sisu_state_info_evh(AVD_CL_CB *cb, AVD_EVT
*evt) {
evt->info.avnd_msg->msg_info.n2d_nd_sisu_state_info.node_id,
evt->info.avnd_msg->msg_info.n2d_nd_sisu_state_info.msg_id);
- if (cb->node_sync_window_closed == false) {
+ const SaClmNodeIdT node_id =
+ evt->info.avnd_msg->msg_info.n2d_nd_sisu_state_info.node_id;
+
+ if (cb->failover_list.find(node_id) != cb->failover_list.end()) {
+ // ignore msg
+ LOG_WA("sisu_state_info messages received from lost node (%x)",
+ node_id);
+ } else if (cb->node_sync_tmr.is_active == true) {
AVD_EVT_QUEUE *state_info_evt = new AVD_EVT_QUEUE();
state_info_evt->evt = new AVD_EVT{};
state_info_evt->evt->rcv_evt = evt->rcv_evt;
state_info_evt->evt->info.avnd_msg = n2d_msg;
cb->evt_queue.push(state_info_evt);
+ return;
} else {
LOG_WA(
- "Ignore this sisu_state_info message since node sync window has
closed");
- avsv_dnd_msg_free(n2d_msg);
+ "Ignore this sisu_state_info message since node sync window has closed");
}
- TRACE_LEAVE();
+ avsv_dnd_msg_free(n2d_msg);
}
/*****************************************************************************
@@ -387,19 +394,26 @@ void avd_nd_compcsi_state_info_evh(AVD_CL_CB *cb, AVD_EVT
*evt) {
evt->info.avnd_msg->msg_info.n2d_nd_csicomp_state_info.node_id,
evt->info.avnd_msg->msg_info.n2d_nd_csicomp_state_info.msg_id);
- if (cb->node_sync_window_closed == false) {
+ const SaClmNodeIdT node_id =
+ evt->info.avnd_msg->msg_info.n2d_nd_csicomp_state_info.node_id;
+
+ if (cb->failover_list.find(node_id) != cb->failover_list.end()) {
+ // ignore msg
+ LOG_WA("compcsi_state_info messages received from lost node (%x)",
+ node_id);
+ } else if (cb->node_sync_tmr.is_active == true) {
AVD_EVT_QUEUE *state_info_evt = new AVD_EVT_QUEUE();
state_info_evt->evt = new AVD_EVT{};
state_info_evt->evt->rcv_evt = evt->rcv_evt;
state_info_evt->evt->info.avnd_msg = n2d_msg;
cb->evt_queue.push(state_info_evt);
+ return;
} else {
LOG_WA(
- "Ignore this compcsi_state_info message since node sync window has
closed");
- avsv_dnd_msg_free(n2d_msg);
+ "Ignore this compcsi_state_info message since node sync window has
closed");
}
- TRACE_LEAVE();
+ avsv_dnd_msg_free(n2d_msg);
}
/**
diff --git a/src/amf/amfd/timer.h b/src/amf/amfd/timer.h
index 5316879..6db04c7 100644
--- a/src/amf/amfd/timer.h
+++ b/src/amf/amfd/timer.h
@@ -52,12 +52,12 @@ typedef enum avd_tmr_type {
/* AVD Timer definition */
typedef struct avd_tmr_tag {
- tmr_t tmr_id;
- AVD_TMR_TYPE type;
- SaClmNodeIdT node_id;
- std::string spons_si_name;
- std::string dep_si_name;
- bool is_active;
+ tmr_t tmr_id{};
+ AVD_TMR_TYPE type{AVD_TMR_MAX};
+ SaClmNodeIdT node_id{};
+ std::string spons_si_name{};
+ std::string dep_si_name{};
+ bool is_active{};
} AVD_TMR;
/* macro to start the cluster init timer. The cb structure
--
2.7.4
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel