Hi Lennart,

Can you help check this? Thanks.

Best Regards,
Thuan

-----Original Message-----
From: thuan.tran <[email protected]> 
Sent: Tuesday, September 25, 2018 2:04 PM
To: [email protected]; [email protected]
Cc: [email protected]; thuan.tran
<[email protected]>
Subject: [PATCH 1/1] smf: campaign is executing forever until cluster reset
[#1353]

The function getNodeDestination() reset elapsedTime to zero cause the node
reboot timeout at waitForNodeDestination() never reach.
If scenario that node reboot cannot come back then campaign is stuck in
executing forever until cluster reset.
---
 src/smf/smfd/SmfUpgradeStep.cc |  1 +
 src/smf/smfd/SmfUtils.cc       | 11 ++++-------
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/smf/smfd/SmfUpgradeStep.cc b/src/smf/smfd/SmfUpgradeStep.cc
index 4c0ddd192..80da668de 100644
--- a/src/smf/smfd/SmfUpgradeStep.cc
+++ b/src/smf/smfd/SmfUpgradeStep.cc
@@ -2399,6 +2399,7 @@ bool SmfUpgradeStep::nodeReboot() {
       "SmfUpgradeStep::nodeReboot: Waiting to get node destination with
increased UP counter");
 
   while (true) {
+    elapsedTime = 0;
     for (nodeIt = rebootedNodeList.begin(); nodeIt !=
rebootedNodeList.end();) {
       if (getNodeDestination((*nodeIt).node_name, &nodeDest, &elapsedTime,
                              -1)) {
diff --git a/src/smf/smfd/SmfUtils.cc b/src/smf/smfd/SmfUtils.cc index
915c086a5..4ac5af163 100644
--- a/src/smf/smfd/SmfUtils.cc
+++ b/src/smf/smfd/SmfUtils.cc
@@ -95,9 +95,6 @@ bool getNodeDestination(const std::string &i_node,
SmfndNodeDest *o_nodeDest,
 
   TRACE("Find destination for node '%s'", i_node.c_str());
 
-  if (elapsedTime)  // Initialize elapsedTime to zero.
-    *elapsedTime = 0;
-
   /* It seems SaAmfNode objects can be stored, but the code
    * indicates that SaClmNode's are expected. Anyway an attempt
    * to go for it is probably faster that examining IMM classes @@ -133,10
+130,10 @@ bool getNodeDestination(const std::string &i_node, SmfndNodeDest
*o_nodeDest,
       }
       struct timespec time = {2 * ONE_SECOND, 0};
       osaf_nanosleep(&time);
-      timeout--;
+      timeout -= 2;
       if (elapsedTime) *elapsedTime = *elapsedTime + 2 * ONE_SECOND;
       if (maxWaitTime != -1) {
-        if (*elapsedTime >= maxWaitTime) {
+        if ((elapsedTime) && (*elapsedTime >= maxWaitTime)) {
           LOG_NO("Failed to get node dest for clm node %s",
i_node.c_str());
           return false;
         }
@@ -165,11 +162,11 @@ bool getNodeDestination(const std::string &i_node,
SmfndNodeDest *o_nodeDest,
       }
       struct timespec time = {2 * ONE_SECOND, 0};
       osaf_nanosleep(&time);
-      timeout--;
+      timeout -= 2;
       if (elapsedTime) *elapsedTime = *elapsedTime + 2 * ONE_SECOND;
 
       if (maxWaitTime != -1) {
-        if (*elapsedTime >= maxWaitTime) {
+        if ((elapsedTime) && (*elapsedTime >= maxWaitTime)) {
           LOG_NO("Failed to get node dest for clm node %s",
i_node.c_str());
           free(nodeName);
           return false;
--
2.18.0




_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to