osaf/libs/common/immsv/include/immsv_api.h | 5 +- osaf/services/saf/immsv/README | 33 +++++++++++++++++--- osaf/services/saf/immsv/immnd/ImmModel.cc | 47 +++++++++++++++++++++++------ osaf/services/saf/immsv/immnd/ImmModel.hh | 5 +- 4 files changed, 71 insertions(+), 19 deletions(-)
See the diff for osaf/services/saf/immsv/REAMDE for an explanation of this enhancement. diff --git a/osaf/libs/common/immsv/include/immsv_api.h b/osaf/libs/common/immsv/include/immsv_api.h --- a/osaf/libs/common/immsv/include/immsv_api.h +++ b/osaf/libs/common/immsv/include/immsv_api.h @@ -142,11 +142,12 @@ typedef enum { typedef enum { SA_IMM_ADMIN_EXPORT = 1, /* Defined in A.02.01 declared in A.03.01 */ - SA_IMM_ADMIN_INIT_FROM_FILE = 100 /* Non standard, force PBE disable. */ + SA_IMM_ADMIN_INIT_FROM_FILE = 100, /* Non standard, force PBE disable. */ + SA_IMM_ADMIN_ABORT_CCBS = 202 /* Non standard, abort non critical CCBs. */ } SaImmMngtAdminOperationT; /* - * Special flags only to be used by the imm-dummper, the imm-loader or + * Special flags only to be used by the imm-dumper, the imm-loader or * new API functions. * * The first excludes non persistent runtime attributes from the dump. diff --git a/osaf/services/saf/immsv/README b/osaf/services/saf/immsv/README --- a/osaf/services/saf/immsv/README +++ b/osaf/services/saf/immsv/README @@ -2302,8 +2302,8 @@ the continuation times out in the server receives an error reply when that om client has NOT also timed out. -Improve error diagnostics when PBE is misconfigured. -==================================================== +Improve error diagnostics when PBE is misconfigured (4.6) +========================================================= http://sourceforge.net/p/opensaf/tickets/1139 Configuration mistakes such as omitting to change immnd.conf to allow PBE @@ -2329,15 +2329,15 @@ Error logging been improved and the imm Ccb operation error cases. This should make troubleshooting this issue much faster and easier. -IMM API that replaces SaNameT with SaStringT and SA_IMM_ATTR_DN -=============================================================== +IMM API that replaces SaNameT with SaStringT and SA_IMM_ATTR_DN (4.6) +===================================================================== http://sourceforge.net/p/opensaf/tickets/643 See: osaf/services/saf/immsv/README.SASTRINGT_API for details. Notes on upgrading from OpenSAF 4.[1,2,3,4,5] to OpenSAF (4.6) -========================================================== +============================================================== OpenSAF4.6 adds new message types that avoid using the SaNameT type (#969). During a rolling upgrade from an earlier OpenSAF release to the 4.6 release there will be nodes executing the older release concurrently with nodes executing OpenSAF 4.6. @@ -2376,6 +2376,29 @@ Bit 5 controls OpenSAF4.5 protocols allo Bit 6 controls OpenSAF4.6 protocols allowed or not (normally on/1). +Provide an admin-operation for aborting all non-critical CCBs (4.7) +=================================================================== +http://sourceforge.net/p/opensaf/tickets/1107 + +There may arise situations where an open CCB that is not in critical, +i.e. has not entered the commit protocol yet, is blocking an involved +service/OI from performing some other task that is more urgent and more +important than completing that CCB. The best example is the AMF, where +an si-swap will fail and cause the standby to reboot if it was involved +in an open CCB when the si-swap order was issued (see ticket #1105). +Ticket #1105 can be fixed by the AMF (active or standby) sending an +admin-operation directed at the IMM service requesting it to abort non +critical CCBs. The AMF can either use a synchronous admin-op or an +asyncronous admin-op. After the admin-operation has been invoked the AMF +should allow a few seconds for the CCB to get aborted and the AMF OI to +get the abort callback for the CCB. That should then clear the path for +the AMF standby to succeed with the si-swap. +The admin-operation for aborting non critical CCBs involves requesting the +operation id '202' directed at the IMM SF service object: + + immadm -o 202 safRdn=immManagement,safApp=safImmService + + ---------------------------------------- DEPENDENCIES ============ diff --git a/osaf/services/saf/immsv/immnd/ImmModel.cc b/osaf/services/saf/immsv/immnd/ImmModel.cc --- a/osaf/services/saf/immsv/immnd/ImmModel.cc +++ b/osaf/services/saf/immsv/immnd/ImmModel.cc @@ -453,6 +453,7 @@ static SaImmRepositoryInitModeT immInitM static SaUint32T ccbIdLongDnGuard = 0; /* Disallow long DN additions if longDnsAllowed is being changed in ccb*/ static bool sIsLongDnLoaded = false; /* track long DNs before opensafImm=opensafImm,safApp=safImmService is created */ +static bool sAbortNonCriticalCcbs = false; /* Set to true at coord by the special imm admin-op to abort ccbs #1107 */ struct AttrFlagIncludes { @@ -1252,7 +1253,7 @@ immModel_adminOperationInvoke(IMMND_CB * { return ImmModel::instance(&cb->immModel)-> adminOperationInvoke(req, reqConn, reply_dest, inv, - implConn, implNodeId, pbeExpected, displayRes); + implConn, implNodeId, pbeExpected, displayRes, cb->mIsCoord); } SaUint32T /* Returns admo-id for object if object exists and active admo exists, otherwise zero. */ @@ -3139,7 +3140,7 @@ ImmModel::classCreate(const ImmsvOmClass if(attr->attrValueType != SA_IMM_ATTR_SANAMET && !((attr->attrFlags & SA_IMM_ATTR_DN) && (attr->attrValueType == SA_IMM_ATTR_SASTRINGT))) { LOG_NO("ERR_INVALID_PARAM: Attribute '%s' must be of type SaNameT, " - "or of type SaStringT with DN flag", attNm); + "or of type SaStringT with DN flag", attNm); illegal = 1; } @@ -10982,7 +10983,7 @@ SaAisErrorT ImmModel::adminOperationInvo SaInvocationT& saInv, SaUint32T* implConn, unsigned int* implNodeId, - bool pbeExpected, bool* displayRes) + bool pbeExpected, bool* displayRes, bool isAtCoord) { TRACE_ENTER(); SaAisErrorT err = SA_AIS_OK; @@ -11179,7 +11180,7 @@ SaAisErrorT ImmModel::adminOperationInvo TRACE_7("Admin op on special object %s whith no implementer ret:%u", objectName.c_str(), err); } else if(objectName == immManagementDn) { - err = admoImmMngtObject(req); + err = admoImmMngtObject(req, isAtCoord); TRACE_7("Admin op on special object %s whith no implementer ret:%u", objectName.c_str(), err); } else { @@ -11772,7 +11773,7 @@ ImmModel::resourceDisplay(const struct I SaAisErrorT -ImmModel::admoImmMngtObject(const ImmsvOmAdminOperationInvoke* req) +ImmModel::admoImmMngtObject(const ImmsvOmAdminOperationInvoke* req, bool isAtCoord) { SaAisErrorT err = SA_AIS_ERR_INTERRUPT; /* Function for handling admin-ops directed at the immsv itself. @@ -11810,6 +11811,13 @@ ImmModel::admoImmMngtObject(const ImmsvO immInitMode = SA_IMM_INIT_FROM_FILE; LOG_NO("SaImmRepositoryInitModeT FORCED to: SA_IMM_INIT_FROM_FILE"); } + } else if (req->operationId == SA_IMM_ADMIN_ABORT_CCBS) { /* Non standard. */ + LOG_NO("Received: immadm -o %u safRdn=immManagement,safApp=safImmService", + SA_IMM_ADMIN_ABORT_CCBS); + if(isAtCoord) { + LOG_IN("sAbortNonCriticalCcbs = true;"); + sAbortNonCriticalCcbs = true; + } } else { LOG_NO("Invalid operation ID %llu, for operation on %s", (SaUint64T) req->operationId, immManagementDn.c_str()); @@ -12476,7 +12484,7 @@ ImmModel::cleanTheBasement(InvocVector& //AND ccbIds for ccbs in critical and marked with PbeRestartedId. //Restarted PBE => try to recover outcome BEFORE timeout, making //recovery transparent to user! - //TODO the timeout should not be hardwired, but for now it is. + //Also handle the case of admin-op requesting abort of all non-critical ccbs. TRACE("Checking active ccb %u for deadlock or blocked implementer", (*i3)->mId); TRACE("state:%u waitsart:%u PberestartId:%u",(*i3)->mState, @@ -12484,9 +12492,14 @@ ImmModel::cleanTheBasement(InvocVector& CcbImplementerMap::iterator cim; uint32_t max_oi_timeout = DEFAULT_TIMEOUT_SEC; - for(cim = (*i3)->mImplementers.begin(); cim != (*i3)->mImplementers.end(); ++cim) { - if(cim->second->mImplementer->mTimeout > max_oi_timeout) { - max_oi_timeout = cim->second->mImplementer->mTimeout; + if(sAbortNonCriticalCcbs) { + LOG_IN("sAbortNonCriticalCcbs is true => set max_oi_timeout to 0"); + max_oi_timeout = 0; + } else { + for(cim = (*i3)->mImplementers.begin(); cim != (*i3)->mImplementers.end(); ++cim) { + if(cim->second->mImplementer->mTimeout > max_oi_timeout) { + max_oi_timeout = cim->second->mImplementer->mTimeout; + } } } @@ -12502,6 +12515,15 @@ ImmModel::cleanTheBasement(InvocVector& oi_timeout = 0; TRACE_5("CCB %u timeout while waiting on implementer reply", (*i3)->mId); + setCcbErrorString(*i3, "Resource Error: CCB timeout while " + "waiting on implementer reply"); + } + + if(sAbortNonCriticalCcbs) { + LOG_NO("CCB %u aborted by: immadm -o %u safRdn=immManagement,safApp=safImmService", + (*i3)->mId, SA_IMM_ADMIN_ABORT_CCBS); + setCcbErrorString(*i3, "Resource Error: CCB aborted by admin-operation" + " '202' on safRdn=immManagement,safApp=safImmService"); } if((*i3)->mState == IMM_CCB_CRITICAL) { @@ -12528,6 +12550,11 @@ ImmModel::cleanTheBasement(InvocVector& } } + if(sAbortNonCriticalCcbs) { + LOG_IN("sAbortNonCriticalCcbs reset to false"); + sAbortNonCriticalCcbs = false; /* Reset. */ + } + while((i3 = ccbsToGc.begin()) != ccbsToGc.end()) { CcbInfo* ccb = (*i3); ccbsToGc.erase(i3); @@ -12544,7 +12571,7 @@ ImmModel::cleanTheBasement(InvocVector& //It needs to be long to allow reply on larger batch jobs such as a //schema/class change with instance migration and slow file system. //It can not be infinite as that could cause a memory leak. - if(now - ci2->second.mCreateTime >= (DEFAULT_TIMEOUT_SEC * 20)) { + if(now - ci2->second.mCreateTime >= (DEFAULT_TIMEOUT_SEC * 20)) { TRACE_5("Timeout on PbeRtReqContinuation %llu", ci2->first); pbePrtoReqs.push_back(ci2->second.mConn); sPbeRtReqContinuationMap.erase(ci2); diff --git a/osaf/services/saf/immsv/immnd/ImmModel.hh b/osaf/services/saf/immsv/immnd/ImmModel.hh --- a/osaf/services/saf/immsv/immnd/ImmModel.hh +++ b/osaf/services/saf/immsv/immnd/ImmModel.hh @@ -361,7 +361,8 @@ public: SaUint32T* implConn, unsigned int* implNodeId, bool pbeExpected, - bool* displayRes); + bool* displayRes, + bool isAtCoord); // Objects @@ -653,7 +654,7 @@ private: std::string newClassName, bool remove=false); SaAisErrorT updateImmObject2(const ImmsvOmAdminOperationInvoke* req); - SaAisErrorT admoImmMngtObject(const ImmsvOmAdminOperationInvoke* req); + SaAisErrorT admoImmMngtObject(const ImmsvOmAdminOperationInvoke* req, bool isAtCoord); void addNoDanglingRefs(ObjectInfo *obj); void removeNoDanglingRefs( ------------------------------------------------------------------------------ One dashboard for servers and applications across Physical-Virtual-Cloud Widest out-of-the-box monitoring support with 50+ applications Performance metrics, stats and reports that give you Actionable Insights Deep dive visibility with transaction tracing using APM Insight. http://ad.doubleclick.net/ddm/clk/290420510;117567292;y _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel