- Incorrect counting lost nodes cause new coordinator
postpone sync waiting for a number of node bigger than cluster size.
- Correct counting lost nodes by a set of lost node Id.
---
src/imm/immnd/ImmModel.cc | 14 ++++++++++++++
src/imm/immnd/immnd_evt.c | 4 ++--
src/imm/immnd/immnd_init.h | 4 ++++
3 files changed, 20 insertions(+), 2 deletions(-)
diff --git a/src/imm/immnd/ImmModel.cc b/src/imm/immnd/ImmModel.cc
index 631597b8a..00d7f4794 100644
--- a/src/imm/immnd/ImmModel.cc
+++ b/src/imm/immnd/ImmModel.cc
@@ -524,6 +524,7 @@ typedef std::map<std::string, ObjectSet> MissingParentsMap;
// Local variables
+static std::set<SaUint32T> sDiscardNodeSet;
static ClassMap sClassMap;
static AdminOwnerVector sOwnerVector;
static CcbVector sCcbVector;
@@ -1364,12 +1365,25 @@ void immModel_getCcbIdsForOrigCon(IMMND_CB* cb,
SaUint32T deadCon,
osafassert(ix == (*arrSize));
}
+void immModel_resetDiscardNodes(IMMND_CB* cb) {
+ cb->mLostNodes = 0;
+ sDiscardNodeSet.clear();
+}
+
+void immModel_eraseDiscardNode(SaUint32T nodeId) {
+ sDiscardNodeSet.erase(nodeId);
+}
+
void immModel_discardNode(IMMND_CB* cb, SaUint32T nodeId, SaUint32T* arrSize,
SaUint32T** ccbIdArr, SaUint32T* globArrSize,
SaUint32T** globccbIdArr) {
ConnVector cv, gv;
ConnVector::iterator cvi, gvi;
unsigned int ix = 0;
+ if (sDiscardNodeSet.find(nodeId) == sDiscardNodeSet.end()) {
+ sDiscardNodeSet.insert(nodeId);
+ cb->mLostNodes++;
+ }
ImmModel::instance(&cb->immModel)
->discardNode(nodeId, cv, gv, cb->mIsCoord, false);
*arrSize = (SaUint32T)cv.size();
diff --git a/src/imm/immnd/immnd_evt.c b/src/imm/immnd/immnd_evt.c
index dfef6c0a5..af8f5876a 100644
--- a/src/imm/immnd/immnd_evt.c
+++ b/src/imm/immnd/immnd_evt.c
@@ -10321,7 +10321,7 @@ static uint32_t immnd_evt_proc_start_sync(IMMND_CB *cb,
IMMND_EVT *evt,
Nodes. This is mostly relevant for "standby"
i.e. the non-coord immnd which is on an SC.
*/
- cb->mLostNodes = 0;
+ immModel_resetDiscardNodes(cb);
}
}
immModel_prepareForSync(cb, cb->mSync);
@@ -10488,6 +10488,7 @@ static uint32_t immnd_evt_proc_sync_req(IMMND_CB *cb,
IMMND_EVT *evt,
cb->mSyncRequested = true;
if (cb->mLostNodes > 0) {
cb->mLostNodes--;
+ immModel_eraseDiscardNode(evt->info.ctrl.nodeId);
}
/*osafassert(cb->mRulingEpoch == evt->info.ctrl.rulingEpoch); */
TRACE_2("At COORD: My Ruling Epoch:%u Cenral Ruling Epoch:%u",
@@ -10989,7 +10990,6 @@ static void immnd_evt_proc_discard_node(IMMND_CB *cb,
IMMND_EVT *evt,
/* We should remember the nodeId/pid pair to avoid a redundant message
causing a newly reattached node being discarded.
*/
- cb->mLostNodes++;
immModel_discardNode(cb, evt->info.ctrl.nodeId, &arrSize, &idArr,
&globArrSize, &globIdArr);
if (globArrSize) {
diff --git a/src/imm/immnd/immnd_init.h b/src/imm/immnd/immnd_init.h
index 9a3f70072..0732f43f0 100644
--- a/src/imm/immnd/immnd_init.h
+++ b/src/imm/immnd/immnd_init.h
@@ -154,6 +154,10 @@ bool immModel_ccbAbort(IMMND_CB *cb, SaUint32T ccbId,
SaUint32T *arrSize,
void immModel_getCcbIdsForOrigCon(IMMND_CB *cb, SaUint32T origConn,
SaUint32T *arrSize, SaUint32T **ccbIdArr);
+void immModel_resetDiscardNodes(IMMND_CB* cb);
+
+void immModel_eraseDiscardNode(SaUint32T nodeId);
+
void immModel_discardNode(IMMND_CB *cb, SaUint32T nodeId, SaUint32T *arrSize,
SaUint32T **ccbIdArr, SaUint32T *globArrSize,
SaUint32T **globccbIdArr);
--
2.25.1
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel