In one step upgrade, during the lock nodegroup. The timeout can happen and it causes the upgrade failed. By retrying if the return code of saImmOmAdminOperationInvoke_2() is SA_AIS_ERR_NO_OP, the lock is considered as successfully. --- src/smf/smfd/SmfAdminState.cc | 12 ++++++++++++ 1 file changed, 12 insertions(+)
diff --git a/src/smf/smfd/SmfAdminState.cc b/src/smf/smfd/SmfAdminState.cc index 90ae093c4..958b7ae82 100755 --- a/src/smf/smfd/SmfAdminState.cc +++ b/src/smf/smfd/SmfAdminState.cc @@ -930,6 +930,17 @@ bool SmfAdminStateHandler::nodeGroupAdminOperation( (imm_rc == SA_AIS_OK && oi_rc == SA_AIS_ERR_TRY_AGAIN)) { base::Sleep(base::MillisToTimespec(2000)); continue; + } else if (imm_rc == SA_AIS_ERR_TIMEOUT) { + // Retry + continue; + } else if (imm_rc == SA_AIS_ERR_NO_OP) { + // If an admin operation is already performed SA_AIS_ERR_NO_OP + // is returned. Treat this as OK, just log it and return + // operation success + LOG_NO("Admin op [%d] on [%s], return SA_AIS_ERR_NO_OP," + "treated as OK", adminOp, nodeGroupName_s.c_str()); + method_rc = true; + goto no_op_admin; } else if (imm_rc != SA_AIS_OK) { LOG_NO( "%s: saImmOmAdminOperationInvoke_2 Fail %s", @@ -964,6 +975,7 @@ bool SmfAdminStateHandler::nodeGroupAdminOperation( nodeGroupName_s.c_str()); } +no_op_admin: if (admset_rc == true) { TRACE("%s Admin operation is done. Release ownership if nodegroup", __FUNCTION__); -- 2.25.1 _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel