osaf/libs/agents/saf/imma/imma_oi_api.c | 10 ++- osaf/libs/agents/saf/imma/imma_proc.c | 6 ++- osaf/libs/common/immsv/immpbe_dump.cc | 60 ++++++++++++++++++++--- osaf/libs/common/immsv/include/immpbe_dump.hh | 3 +- osaf/services/saf/immsv/immnd/ImmModel.cc | 2 +- osaf/services/saf/immsv/immnd/immnd_evt.c | 1 - osaf/services/saf/immsv/immpbed/immpbe_daemon.cc | 33 ++++++++----- 7 files changed, 86 insertions(+), 29 deletions(-)
The main cause for the unclean cutoff was that the 2PBE slave used the runtime thread for dataflow for PRTO-create, PRTO-delete and PRTA-update. The termination of PBE is triggered by a regular CCB commit, which arrives via the main/applier thread in the 2PBE slave. As soon as the PBE termination CCB has comitted in imm-ram, the IMMNDs at the SCs will order their PBEs to terminate by sending a SIGTERM to their PBE process. The signal is caught and translated to an event on a termination descriptor polled by the main/applier thread. Because the 2PBE-slave did essentially all processing of PRTO ops in the runtime thread, they would esily be interrupted and terminated from the main/applier thread, before completing their current PRT job in the runtime thread. Such interruption/termination of the process, by a searate thread is very timing sensitive. This sometimes caused a discrepancy in termination between the primary PBE and the slave PBE. By transferring all the data flow for PRTO operations to the main/applier thread in the slave, any on-going PRTO job in the slave will complete before the termination job is excecuted by the main-thread. The spinlock mechanism added for 2PBE to synchronize the use of sqlite transactions between the slave's main/applier thread and runtime thread has been elaborated to eliminate the risk of the sqlite transaction being comitted before the its transaction buildup has been completed. This is needed now because the buildup of PRTO-create and PRTA-update in the slave is executed by the runtime thread, but the sqlite commit in the slave is handled by the applier/main thread. So the main thead has to wait not just for the start of the transaction, but also for the completion of the prepare/buildup. This patch also does a bit of minor cleanup in the fuctions touched. diff --git a/osaf/libs/agents/saf/imma/imma_oi_api.c b/osaf/libs/agents/saf/imma/imma_oi_api.c --- a/osaf/libs/agents/saf/imma/imma_oi_api.c +++ b/osaf/libs/agents/saf/imma/imma_oi_api.c @@ -1252,14 +1252,16 @@ SaAisErrorT saImmOiImplementerSet(SaImmO If error is returned by implementerSet then isApplier is reset to 0 below. */ cl_node->isApplier = 0x1; - } else if((strncmp(implementerName, OPENSAF_IMM_PBE_IMPL_NAME, nameLen) == 0) || - (strncmp(implementerName, OPENSAF_IMM_PBE_RT_IMPL_NAME_B, nameLen) == 0)) - { + } + + if((strncmp(implementerName, OPENSAF_IMM_PBE_IMPL_NAME, nameLen) == 0) || + (strncmp(implementerName, OPENSAF_IMM_2PBE_APPL_NAME, nameLen) == 0)) + { /* Optimistically turn on cl-node->isPbe since callbacks may arrive to the mds thread before reply on the implementerSet request arrives back here. If error is returned by implementerSet then isPbe is reset to 0 below. */ - TRACE("Special implementer %s detected and noted.", OPENSAF_IMM_PBE_IMPL_NAME); + TRACE("Special PBE implementer %s detected and noted.", implementerName); cl_node->isPbe = 0x1; } diff --git a/osaf/libs/agents/saf/imma/imma_proc.c b/osaf/libs/agents/saf/imma/imma_proc.c --- a/osaf/libs/agents/saf/imma/imma_proc.c +++ b/osaf/libs/agents/saf/imma/imma_proc.c @@ -2044,7 +2044,11 @@ static void imma_process_callback_info(I } } - if(!(cl_node->isApplier)) { + if(!(cl_node->isApplier) || (isPbeOp && cl_node->isPbe)) { + /* Appliers dont reply on completed except PBE slave replying on completed + for PRTO- delete. PRTO-delete means ccb-id is in the high rannge. + So PBE slave does NOT reply on completed for regular CCBs. + */ localEr = imma_evt_fake_evs(cb, &ccbCompletedRpl, NULL, 0, cl_node->handle, &locked, false); if (localEr != NCSCC_RC_SUCCESS) { /*Cant do anything but log error and drop this reply. */ diff --git a/osaf/libs/common/immsv/immpbe_dump.cc b/osaf/libs/common/immsv/immpbe_dump.cc --- a/osaf/libs/common/immsv/immpbe_dump.cc +++ b/osaf/libs/common/immsv/immpbe_dump.cc @@ -39,6 +39,7 @@ /* Spinlock for sqlite access see pbeBeginTrans. The lock will only be aquired in pbeBeginTrans(). It is relased in either pbeCommitTrans() or pbeAbortTrans(). + PbeCommitTrans() is only accepted after pbeClosePrepareTrans() */ static volatile unsigned int sqliteTransLock=0; @@ -47,6 +48,20 @@ bool pbeTransStarted() return sqliteTransLock!=0; } +bool pbeTransIsPrepared() +{ + return sqliteTransLock==2; +} + +void pbeClosePrepareTrans() +{ + if(sqliteTransLock != 1) { + LOG_ER("pbePrepareTrans was called when sqliteTransLock(%u)!=1", + sqliteTransLock); + abort(); + } + assert((++sqliteTransLock) == 2); +} #include <sqlite3.h> #define STRINT_BSZ 32 @@ -2683,9 +2698,13 @@ SaAisErrorT pbeBeginTrans(void* db_handl LOG_ER("Sqlite db appears blocked on other transaction"); return SA_AIS_ERR_FAILED_OPERATION; } - } + } ++sqliteTransLock; /* Lock is set. */ + if(sqliteTransLock != 1) { /* i.e. not 2 or 3 */ + LOG_ER("Failure in obtaining sqliteTransLock: %u", sqliteTransLock); + return SA_AIS_ERR_FAILED_OPERATION; + } rc = sqlite3_exec(dbHandle, "BEGIN EXCLUSIVE TRANSACTION", NULL, NULL, &execErr); if(rc != SQLITE_OK) { @@ -2706,11 +2725,14 @@ SaAisErrorT pbeCommitTrans(void* db_hand time_t now = time(NULL); SaAisErrorT err = SA_AIS_OK; - if(sqliteTransLock != 1) { - LOG_ER("pbeCommitTrans was called when sqliteTransLock(%u)!=1", sqliteTransLock); + if(sqliteTransLock != 2) { + LOG_ER("pbeCommitTrans was called when sqliteTransLock(%u)!=2", sqliteTransLock); abort(); } + assert((++sqliteTransLock) == 3); + + if(ccbId) { sqlite3_stmt *stmt = preparedStmt[SQL_INS_CCB_COMMITS]; @@ -2753,6 +2775,8 @@ SaAisErrorT pbeCommitTrans(void* db_hand } done: + --sqliteTransLock; + --sqliteTransLock; --sqliteTransLock; /* Lock is released. */ fsyncPbeJournalFile(); /* This should not be needed. sqlite does double fsync itself */ return err; @@ -2806,12 +2830,22 @@ void pbeAbortTrans(void* db_handle) if(sqliteTransLock == 0) { LOG_WA("pbeAbortTrans was called when sqliteTransLock==0"); - } else if(sqliteTransLock == 1) { - --sqliteTransLock; - } else { - LOG_ER("Illegal value on sqliteTransLock:%u", sqliteTransLock); - abort(); } + + switch(sqliteTransLock) { + case 3: + --sqliteTransLock; + case 2: + --sqliteTransLock; + case 1: + --sqliteTransLock; + break; + + default: + LOG_ER("Illegal value on sqliteTransLock:%u", sqliteTransLock); + abort(); + + } } SaAisErrorT getCcbOutcomeFromPbe(void* db_handle, SaUint64T ccbId, SaUint32T currentEpoch) @@ -2896,6 +2930,11 @@ bool pbeTransStarted() return false; } +bool pbeTransIsPrepared() +{ + return false; +} + void* pbeRepositoryInit(const char* filePath, bool create, std::string& localTmpFilename) { @@ -2974,6 +3013,11 @@ void pbeAbortTrans(void* db_handle) abort(); } +void pbeClosePrepareTrans() +{ + abort(); +} + void objectDeleteToPBE(std::string objectNameString, void* db_handle) { abort(); diff --git a/osaf/libs/common/immsv/include/immpbe_dump.hh b/osaf/libs/common/immsv/include/immpbe_dump.hh --- a/osaf/libs/common/immsv/include/immpbe_dump.hh +++ b/osaf/libs/common/immsv/include/immpbe_dump.hh @@ -94,7 +94,8 @@ SaAisErrorT pbeBeginTrans(void* db_handl SaAisErrorT pbeCommitTrans(void* db_handle, SaUint64T ccbId, SaUint32T epoch, SaTimeT *externCommitTime); void pbeAbortTrans(void* db_handle); - +void pbeClosePrepareTrans(); +bool pbeTransIsPrepared(); bool pbeTransStarted(); void purgeCcbCommitsFromPbe(void* sDbHandle, SaUint32T currentEpoch); diff --git a/osaf/services/saf/immsv/immnd/ImmModel.cc b/osaf/services/saf/immsv/immnd/ImmModel.cc --- a/osaf/services/saf/immsv/immnd/ImmModel.cc +++ b/osaf/services/saf/immsv/immnd/ImmModel.cc @@ -435,7 +435,7 @@ static const std::string immAttrEpoch(OP static const std::string immClassName(OPENSAF_IMM_CLASS_NAME); static const std::string immAttrNostFlags(OPENSAF_IMM_ATTR_NOSTD_FLAGS); static const std::string immSyncBatchSize(OPENSAF_IMM_SYNC_BATCH_SIZE); -static const std::string immPbeBSlaveName(OPENSAF_IMM_PBE_RT_IMPL_NAME_B); +static const std::string immPbeBSlaveName(OPENSAF_IMM_2PBE_APPL_NAME); static const std::string immManagementDn("safRdn=immManagement,safApp=safImmService"); static const std::string saImmRepositoryInit("saImmRepositoryInit"); diff --git a/osaf/services/saf/immsv/immnd/immnd_evt.c b/osaf/services/saf/immsv/immnd/immnd_evt.c --- a/osaf/services/saf/immsv/immnd/immnd_evt.c +++ b/osaf/services/saf/immsv/immnd/immnd_evt.c @@ -4761,7 +4761,6 @@ static void immnd_evt_proc_admop(IMMND_C return; } - TRACE_2("Send immediate reply to client"); memset(&send_evt, '\0', sizeof(IMMSV_EVT)); send_evt.type = IMMSV_EVT_TYPE_IMMA; diff --git a/osaf/services/saf/immsv/immpbed/immpbe_daemon.cc b/osaf/services/saf/immsv/immpbed/immpbe_daemon.cc --- a/osaf/services/saf/immsv/immpbed/immpbe_daemon.cc +++ b/osaf/services/saf/immsv/immpbed/immpbe_daemon.cc @@ -121,7 +121,7 @@ static SaAisErrorT sqlite_prepare_ccb(Sa /* Note: it is important that the code in this MODIFY case follow the same logic as performed by ImmModel::ccbObjectModify() We DO NOT want the PBE repository to diverge from the main memory - represenation of the immsv data. + representation of the immsv data. This is not the only way to solve this. In fact the current solution is very unoptimal since it generates possibly several sql commands for what could be one. The advantage with the current solution is that it follows @@ -185,6 +185,7 @@ static SaAisErrorT sqlite_prepare_ccb(Sa } ccbUtilOperationData = ccbUtilOperationData->next; } + pbeClosePrepareTrans(); ccb_abort: return rc; } @@ -205,7 +206,7 @@ static SaAisErrorT sqlite_prepare_ccb(Sa The entire prepare processing (start transaction and buildup) is done by the RTO thread at the slave. CCB is committed at primary, then in imm-ram. Finally the sqlite commit of a ccb at the slave is done - by the slave only when it receives the applier ########## + by the slave only when it receives the completed & apply callbacks in the applier thread. */ static bool pbe2_start_prepare_ccb_A_to_B(SaImmOiCcbIdT ccbId, SaUint32T numOps) @@ -235,6 +236,8 @@ static bool pbe2_start_prepare_ccb_A_to_ const SaImmAdminOperationParamsT_2 *params[] = {¶m0, ¶m1, NULL}; + osafassert(sPbe2 && !sPbe2B); /* Must be 2PBE and NOT at slave. */ + do{ rc2B = saImmOmAdminOperationInvoke_2(sOwnerHandle, &slavePbeRtObjName, 0, OPENSAF_IMM_PBE_CCB_PREPARE, params, &slavePbeRtReply, SA_TIME_ONE_SECOND * 10); @@ -273,6 +276,7 @@ static SaAisErrorT pbe2_ok_to_prepare_cc SaAisErrorT rc = SA_AIS_OK; SaUint64T numReceivedOps = 0LL; + osafassert(sPbe2 && sPbe2B); /* Must be at slave PBE. */ if(s2PbeBCcbToCompleteAtB == 0) { TRACE("First try at prepare for ccb: %llu at slave PBE", ccbId); s2PbeBCcbUtilCcbData = ccbutil_findCcbData(ccbId); @@ -337,7 +341,7 @@ static SaAisErrorT pbe2_ok_to_prepare_cc osafassert(s2PbeBCcbOpCountToExpectAtB == s2PbeBCcbOpCountNowAtB); /* Assert is redundant but keep it here just in case code is changed. - We never want to accidentally reply ok on prepare if we a re not + We never want to accidentally reply ok on prepare if we are not really ready at slave. */ @@ -541,7 +545,7 @@ static void saImmOiAdminOperationCallbac } LOG_IN("Create of class %s committing with ccbId:%llx", className.c_str(), ccbId); - + pbeClosePrepareTrans(); rc = pbeCommitTrans(sDbHandle, ccbId, sEpoch, &sLastCcbCommitTime); if(rc != SA_AIS_OK) { LOG_WA("PBE failed to commit transaction %llx for class create", ccbId); @@ -699,6 +703,7 @@ static void saImmOiAdminOperationCallbac LOG_IN("Delete of class %s committing with ccbId:%llx", className.c_str(), ccbId); + pbeClosePrepareTrans(); rc = pbeCommitTrans(sDbHandle, ccbId, sEpoch, &sLastCcbCommitTime); if(rc != SA_AIS_OK) { LOG_WA("PBE failed to commit transaction (ccb:%llx) for class delete", ccbId); @@ -814,6 +819,7 @@ static void saImmOiAdminOperationCallbac purgeCcbCommitsFromPbe(sDbHandle, sEpoch); LOG_NO("Update epoch %u committing with ccbId:%llx/%llu", sEpoch, ccbId, ccbId); + pbeClosePrepareTrans(); rc = pbeCommitTrans(sDbHandle, ccbId, sEpoch, &sLastCcbCommitTime); if(rc != SA_AIS_OK) { LOG_WA("PBE failed to commit sqlite transaction for update epoch"); @@ -1183,7 +1189,7 @@ static SaAisErrorT saImmOiCcbObjectModif ccbutil_deleteCcbData(ccbutil_findCcbData(ccbId)); - + pbeClosePrepareTrans(); rc = pbeCommitTrans(sDbHandle, ccbId, sEpoch, &sLastCcbCommitTime); if(rc != SA_AIS_OK) { LOG_WA("PBE failed to commit sqlite transaction (ccb:%llx) for PRT attr update", ccbId); @@ -1273,7 +1279,7 @@ static SaAisErrorT saImmOiCcbCompletedCa } if(sPbe2) { - /* Primary PBE requests slave PBE to start preparing. If slave replies Ok + /* Primary PBE has requested slave PBE to start preparing. If slave replied Ok then slave was ready to start prepare and has started its prepare. Reply sent from slave before slave prepare was completed, so both primary and slave will do the prepare in parallell, more or less. @@ -1549,7 +1555,7 @@ static SaAisErrorT saImmOiCcbObjectCreat operation->param.create.className, ccbId); ccbutil_deleteCcbData(ccbutil_findCcbData(0)); - + pbeClosePrepareTrans(); rc = pbeCommitTrans(sDbHandle, ccbId, sEpoch, &sLastCcbCommitTime); if(rc != SA_AIS_OK) { LOG_WA("PBE failed to commit sqlite transaction (ccbId:%llx) for PRTO create", ccbId); @@ -1962,9 +1968,9 @@ static void *pbeRtObjThread(void*) SaAisErrorT rc; while(immOiRtSelectionObject) { - TRACE("PBE Rt Thread entering poll"); + TRACE("##@-PBE RUNTIME thread entering poll"); int ret = poll(rtfds, nrtfds, -1); - TRACE("PBE Rt Thread returned from poll ret: %d", ret); + TRACE("##@-PBE RUNTIME thread continues after poll ret: %d", ret); if (ret == -1) { if (errno == EINTR) @@ -2085,9 +2091,9 @@ void pbeDaemon(SaImmHandleT immHandle, v contains a copy of the descriptor. */ while(immOiSelectionObject) { - TRACE("PBE Daemon entering poll"); + TRACE("##@-PBE MAIN thread entering poll"); int ret = poll(fds, nfds, -1); - TRACE("PBE Daemon returned from poll ret: %d", ret); + TRACE("##@-PBE MAIN thead continues after poll ret: %d", ret); if (ret == -1) { if (errno == EINTR) continue; @@ -2099,7 +2105,8 @@ void pbeDaemon(SaImmHandleT immHandle, v if (fds[FD_IMM_PBE_TERM].revents & POLLIN) { ncs_sel_obj_rmv_ind(term_sel_obj, true, true); if (sDbHandle != NULL) { - LOG_NO("PBE received SIG_TERM, closing db handle"); + LOG_NO("IMM %s received SIG_TERM, closing db handle", + sPbe2 ? (sPbe2B?"PBE SLAVE":"PBE PRIMARY"):"PBE"); pbeRepositoryClose(sDbHandle); sDbHandle = NULL; } @@ -2148,7 +2155,7 @@ void pbeDaemon(SaImmHandleT immHandle, v } - LOG_IN("IMM PBE process EXITING..."); + LOG_IN("IMM %s process EXITING...", sPbe2 ? (sPbe2B?"PBE SLAVE":"PBE PRIMARY"):"PBE"); TRACE_LEAVE(); exit(1); } ------------------------------------------------------------------------------ Android apps run on BlackBerry 10 Introducing the new BlackBerry 10.2.1 Runtime for Android apps. Now with support for Jelly Bean, Bluetooth, Mapview and more. Get your Android app in front of a whole new audience. Start now. http://pubads.g.doubleclick.net/gampad/clk?id=124407151&iu=/4140/ostg.clktrk _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel