In one step upgrade, during the lock nodegroup. The timeout can happen and it causes the upgrade failed. By retrying if the return code of saImmOmAdminOperationInvoke_2() is SA_AIS_ERR_NO_OP, the lock is considered as successfully. --- src/smf/smfd/SmfAdminState.cc | 16 ++++++++++++++++ 1 file changed, 16 insertions(+)
diff --git a/src/smf/smfd/SmfAdminState.cc b/src/smf/smfd/SmfAdminState.cc index 90ae093c4..7b6155cd0 100755 --- a/src/smf/smfd/SmfAdminState.cc +++ b/src/smf/smfd/SmfAdminState.cc @@ -917,11 +917,13 @@ bool SmfAdminStateHandler::nodeGroupAdminOperation( SaAisErrorT oi_rc = SA_AIS_OK; SaAisErrorT imm_rc = SA_AIS_OK; errno_ = SA_AIS_OK; + SaBoolT timeout_elapsed = SA_FALSE; base::Timer adminOpTimer(smfd_cb->adminOpTimeout / kNanoMillis); while (adminOpTimer.is_timeout() == false) { TRACE("%s: saImmOmAdminOperationInvoke_2 time left = %" PRIu64, __FUNCTION__, adminOpTimer.time_left()); + timeout_elapsed = SA_FALSE; imm_rc = saImmOmAdminOperationInvoke_2(ownerHandle_, &nodeGroupName, 0, adminOp, params, &oi_rc, @@ -930,6 +932,20 @@ bool SmfAdminStateHandler::nodeGroupAdminOperation( (imm_rc == SA_AIS_OK && oi_rc == SA_AIS_ERR_TRY_AGAIN)) { base::Sleep(base::MillisToTimespec(2000)); continue; + } else if (imm_rc == SA_AIS_ERR_TIMEOUT) { + timeout_elapsed = SA_TRUE; + // Reset timeout + adminOpTimer.set_timeout_time(smfd_cb->adminOpTimeout / kNanoMillis); + continue; + } else if ((imm_rc == SA_AIS_ERR_NO_OP && + timeout_elapsed == SA_TRUE)) { + // If an admin operation is already performed SA_AIS_ERR_NO_OP + // is returned. Treat this as OK, just log it and return + // operation success + LOG_NO("Admin op [%d] on [%s], return SA_AIS_ERR_NO_OP," + "treated as OK", adminOp, nodeGroupName_s.c_str()); + method_rc = true; + break; } else if (imm_rc != SA_AIS_OK) { LOG_NO( "%s: saImmOmAdminOperationInvoke_2 Fail %s", -- 2.25.1 _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel