osaf/libs/agents/saf/imma/imma_oi_api.c          |  10 ++-
 osaf/libs/agents/saf/imma/imma_proc.c            |   6 ++-
 osaf/libs/common/immsv/immpbe_dump.cc            |  60 ++++++++++++++++++++---
 osaf/libs/common/immsv/include/immpbe_dump.hh    |   3 +-
 osaf/services/saf/immsv/immnd/ImmModel.cc        |   2 +-
 osaf/services/saf/immsv/immnd/immnd_evt.c        |   1 -
 osaf/services/saf/immsv/immpbed/immpbe_daemon.cc |  33 ++++++++-----
 7 files changed, 86 insertions(+), 29 deletions(-)


The main cause for the unclean cutoff was that the 2PBE slave used the
runtime thread for dataflow for PRTO-create, PRTO-delete and PRTA-update.

The termination of PBE is triggered by a regular CCB commit, which
arrives via the main/applier thread in the 2PBE slave. As soon as the
PBE termination CCB has comitted in imm-ram, the IMMNDs at the SCs will
order their PBEs to terminate by sending a SIGTERM to their PBE process.
The signal is caught and translated to an event on a termination descriptor
polled by the main/applier thread.

Because the 2PBE-slave did essentially all processing of PRTO ops
in the runtime thread, they would esily be interrupted and terminated
from the main/applier thread, before completing their current PRT job
in the runtime thread.

Such interruption/termination of the process, by a searate thread is
very timing sensitive. This sometimes caused a discrepancy in termination
between the primary PBE and the slave PBE.

By transferring all the data flow for PRTO operations to the main/applier
thread in the slave, any on-going PRTO job in the slave will complete before
the termination job is excecuted by the main-thread.

The spinlock mechanism added for 2PBE to synchronize the use of sqlite
transactions between the slave's main/applier thread and runtime thread
has been elaborated to eliminate the risk of the sqlite transaction
being comitted before the its transaction buildup has been completed.
This is needed now because the buildup of PRTO-create and PRTA-update
in the slave is executed by the runtime thread, but the sqlite commit in
the slave is handled by the applier/main thread. So the main thead has to
wait not just for the start of the transaction, but also for the completion
of the prepare/buildup.

This patch also does a bit of minor cleanup in the fuctions touched.

diff --git a/osaf/libs/agents/saf/imma/imma_oi_api.c 
b/osaf/libs/agents/saf/imma/imma_oi_api.c
--- a/osaf/libs/agents/saf/imma/imma_oi_api.c
+++ b/osaf/libs/agents/saf/imma/imma_oi_api.c
@@ -1252,14 +1252,16 @@ SaAisErrorT saImmOiImplementerSet(SaImmO
                   If error is returned by implementerSet then isApplier is 
reset to 0 below.
                 */
                cl_node->isApplier = 0x1;
-       } else if((strncmp(implementerName, OPENSAF_IMM_PBE_IMPL_NAME, nameLen) 
== 0) ||
-               (strncmp(implementerName, OPENSAF_IMM_PBE_RT_IMPL_NAME_B, 
nameLen) == 0))
-               {
+       }
+
+       if((strncmp(implementerName, OPENSAF_IMM_PBE_IMPL_NAME, nameLen) == 0) 
||
+          (strncmp(implementerName, OPENSAF_IMM_2PBE_APPL_NAME, nameLen) == 0))
+       {
                /* Optimistically turn on cl-node->isPbe since callbacks may 
arrive to the mds
                   thread before reply on the implementerSet request arrives 
back here. 
                   If error is returned by implementerSet then isPbe is reset 
to 0 below.
                 */
-               TRACE("Special implementer %s detected and noted.", 
OPENSAF_IMM_PBE_IMPL_NAME);
+               TRACE("Special PBE implementer %s detected and noted.", 
implementerName);
                cl_node->isPbe = 0x1;
        }
 
diff --git a/osaf/libs/agents/saf/imma/imma_proc.c 
b/osaf/libs/agents/saf/imma/imma_proc.c
--- a/osaf/libs/agents/saf/imma/imma_proc.c
+++ b/osaf/libs/agents/saf/imma/imma_proc.c
@@ -2044,7 +2044,11 @@ static void imma_process_callback_info(I
                                        }
                                }
 
-                               if(!(cl_node->isApplier)) {
+                               if(!(cl_node->isApplier) || (isPbeOp && 
cl_node->isPbe)) {
+                                       /* Appliers dont reply on completed 
except PBE slave replying on completed
+                                          for PRTO- delete. PRTO-delete means 
ccb-id is in the high rannge.
+                                          So PBE slave does NOT reply on 
completed for regular CCBs. 
+                                       */
                                        localEr = imma_evt_fake_evs(cb, 
&ccbCompletedRpl, NULL, 0, cl_node->handle, &locked, false);
                                        if (localEr != NCSCC_RC_SUCCESS) {
                                                /*Cant do anything but log 
error and drop this reply. */
diff --git a/osaf/libs/common/immsv/immpbe_dump.cc 
b/osaf/libs/common/immsv/immpbe_dump.cc
--- a/osaf/libs/common/immsv/immpbe_dump.cc
+++ b/osaf/libs/common/immsv/immpbe_dump.cc
@@ -39,6 +39,7 @@
 /* Spinlock for sqlite access see pbeBeginTrans.
    The lock will only be aquired in pbeBeginTrans().
    It is relased in either pbeCommitTrans() or pbeAbortTrans().
+   PbeCommitTrans() is only accepted after pbeClosePrepareTrans()
 */
 static volatile unsigned int sqliteTransLock=0;
 
@@ -47,6 +48,20 @@ bool pbeTransStarted()
        return sqliteTransLock!=0;
 }
 
+bool pbeTransIsPrepared()
+{
+       return sqliteTransLock==2;
+}
+
+void pbeClosePrepareTrans()
+{
+    if(sqliteTransLock != 1) {
+        LOG_ER("pbePrepareTrans was called when sqliteTransLock(%u)!=1",
+               sqliteTransLock);
+        abort();
+    }
+    assert((++sqliteTransLock) == 2);
+}
 
 #include <sqlite3.h> 
 #define STRINT_BSZ 32
@@ -2683,9 +2698,13 @@ SaAisErrorT pbeBeginTrans(void* db_handl
                        LOG_ER("Sqlite db appears blocked on other 
transaction");
                        return SA_AIS_ERR_FAILED_OPERATION;
                }
-       } 
+       }
 
        ++sqliteTransLock; /* Lock is set. */
+        if(sqliteTransLock != 1) { /* i.e. not 2 or 3 */
+            LOG_ER("Failure in obtaining sqliteTransLock: %u", 
sqliteTransLock);
+            return SA_AIS_ERR_FAILED_OPERATION;
+        }
 
        rc = sqlite3_exec(dbHandle, "BEGIN EXCLUSIVE TRANSACTION", NULL, NULL, 
&execErr);
        if(rc != SQLITE_OK) {
@@ -2706,11 +2725,14 @@ SaAisErrorT pbeCommitTrans(void* db_hand
        time_t now = time(NULL);
        SaAisErrorT err = SA_AIS_OK;
 
-       if(sqliteTransLock != 1) {
-               LOG_ER("pbeCommitTrans was called when sqliteTransLock(%u)!=1", 
sqliteTransLock);
+       if(sqliteTransLock != 2) {
+               LOG_ER("pbeCommitTrans was called when sqliteTransLock(%u)!=2", 
sqliteTransLock);
                abort();
        }
 
+       assert((++sqliteTransLock) == 3);
+
+
        if(ccbId) {
                sqlite3_stmt *stmt = preparedStmt[SQL_INS_CCB_COMMITS];
 
@@ -2753,6 +2775,8 @@ SaAisErrorT pbeCommitTrans(void* db_hand
        }
 
  done:
+       --sqliteTransLock; 
+       --sqliteTransLock; 
        --sqliteTransLock; /* Lock is released. */
        fsyncPbeJournalFile(); /* This should not be needed. sqlite does double 
fsync itself */
        return err;
@@ -2806,12 +2830,22 @@ void pbeAbortTrans(void* db_handle)
 
        if(sqliteTransLock == 0) {
                LOG_WA("pbeAbortTrans was called when sqliteTransLock==0");
-       } else if(sqliteTransLock == 1) {
-               --sqliteTransLock;
-       } else {
-               LOG_ER("Illegal value on sqliteTransLock:%u", sqliteTransLock);
-               abort();
        }
+
+        switch(sqliteTransLock) {
+            case 3:
+                --sqliteTransLock;
+            case 2:
+                --sqliteTransLock;
+            case 1:
+                --sqliteTransLock;
+                break;
+
+            default:
+                LOG_ER("Illegal value on sqliteTransLock:%u", sqliteTransLock);
+                abort();
+
+        }
 }
 
 SaAisErrorT getCcbOutcomeFromPbe(void* db_handle, SaUint64T ccbId, SaUint32T 
currentEpoch)
@@ -2896,6 +2930,11 @@ bool pbeTransStarted()
        return false;
 }
 
+bool pbeTransIsPrepared()
+{
+       return false;
+}
+
 
 void* pbeRepositoryInit(const char* filePath, bool create, std::string& 
localTmpFilename)
 {
@@ -2974,6 +3013,11 @@ void pbeAbortTrans(void* db_handle)
        abort();
 }
 
+void pbeClosePrepareTrans()
+{
+    abort();
+}
+
 void objectDeleteToPBE(std::string objectNameString, void* db_handle)
 {
        abort();
diff --git a/osaf/libs/common/immsv/include/immpbe_dump.hh 
b/osaf/libs/common/immsv/include/immpbe_dump.hh
--- a/osaf/libs/common/immsv/include/immpbe_dump.hh
+++ b/osaf/libs/common/immsv/include/immpbe_dump.hh
@@ -94,7 +94,8 @@ SaAisErrorT pbeBeginTrans(void* db_handl
 SaAisErrorT pbeCommitTrans(void* db_handle, SaUint64T ccbId, SaUint32T epoch,
        SaTimeT *externCommitTime);
 void pbeAbortTrans(void* db_handle);
-
+void pbeClosePrepareTrans();
+bool pbeTransIsPrepared();
 bool pbeTransStarted();
 
 void purgeCcbCommitsFromPbe(void* sDbHandle, SaUint32T currentEpoch);
diff --git a/osaf/services/saf/immsv/immnd/ImmModel.cc 
b/osaf/services/saf/immsv/immnd/ImmModel.cc
--- a/osaf/services/saf/immsv/immnd/ImmModel.cc
+++ b/osaf/services/saf/immsv/immnd/ImmModel.cc
@@ -435,7 +435,7 @@ static const std::string immAttrEpoch(OP
 static const std::string immClassName(OPENSAF_IMM_CLASS_NAME);
 static const std::string immAttrNostFlags(OPENSAF_IMM_ATTR_NOSTD_FLAGS);
 static const std::string immSyncBatchSize(OPENSAF_IMM_SYNC_BATCH_SIZE);
-static const std::string immPbeBSlaveName(OPENSAF_IMM_PBE_RT_IMPL_NAME_B);
+static const std::string immPbeBSlaveName(OPENSAF_IMM_2PBE_APPL_NAME);
 
 static const std::string 
immManagementDn("safRdn=immManagement,safApp=safImmService");
 static const std::string saImmRepositoryInit("saImmRepositoryInit");
diff --git a/osaf/services/saf/immsv/immnd/immnd_evt.c 
b/osaf/services/saf/immsv/immnd/immnd_evt.c
--- a/osaf/services/saf/immsv/immnd/immnd_evt.c
+++ b/osaf/services/saf/immsv/immnd/immnd_evt.c
@@ -4761,7 +4761,6 @@ static void immnd_evt_proc_admop(IMMND_C
                        return;
                }
 
-               TRACE_2("Send immediate reply to client");
                memset(&send_evt, '\0', sizeof(IMMSV_EVT));
                send_evt.type = IMMSV_EVT_TYPE_IMMA;
 
diff --git a/osaf/services/saf/immsv/immpbed/immpbe_daemon.cc 
b/osaf/services/saf/immsv/immpbed/immpbe_daemon.cc
--- a/osaf/services/saf/immsv/immpbed/immpbe_daemon.cc
+++ b/osaf/services/saf/immsv/immpbed/immpbe_daemon.cc
@@ -121,7 +121,7 @@ static SaAisErrorT sqlite_prepare_ccb(Sa
                                /* Note: it is important that the code in this 
MODIFY case follow
                                   the same logic as performed by 
ImmModel::ccbObjectModify()
                                   We DO NOT want the PBE repository to diverge 
from the main memory
-                                  represenation of the immsv data. 
+                                  representation of the immsv data. 
                                   This is not the only way to solve this. In 
fact the current solution is
                                   very unoptimal since it generates possibly 
several sql commands for what
                                   could be one. The advantage with the current 
solution is that it follows
@@ -185,6 +185,7 @@ static SaAisErrorT sqlite_prepare_ccb(Sa
                }
                ccbUtilOperationData = ccbUtilOperationData->next;
        }
+       pbeClosePrepareTrans();
  ccb_abort:
        return rc;
 }
@@ -205,7 +206,7 @@ static SaAisErrorT sqlite_prepare_ccb(Sa
 
    The entire prepare processing (start transaction and buildup) is done by 
the RTO thread at the slave.
    CCB is committed at primary, then in imm-ram. Finally the sqlite commit of 
a ccb at the slave is done
-   by the slave only when it receives the applier ##########
+   by the slave only when it receives the completed & apply callbacks in the 
applier thread.
 */
 
 static bool pbe2_start_prepare_ccb_A_to_B(SaImmOiCcbIdT ccbId, SaUint32T 
numOps)
@@ -235,6 +236,8 @@ static bool pbe2_start_prepare_ccb_A_to_
 
        const SaImmAdminOperationParamsT_2 *params[] = {&param0, &param1, NULL};
 
+       osafassert(sPbe2 && !sPbe2B); /* Must be 2PBE and NOT at slave. */
+
        do{
                rc2B = saImmOmAdminOperationInvoke_2(sOwnerHandle, 
&slavePbeRtObjName, 0, OPENSAF_IMM_PBE_CCB_PREPARE,
                        params, &slavePbeRtReply, SA_TIME_ONE_SECOND * 10);
@@ -273,6 +276,7 @@ static SaAisErrorT pbe2_ok_to_prepare_cc
        SaAisErrorT rc = SA_AIS_OK;
        SaUint64T numReceivedOps = 0LL;
 
+       osafassert(sPbe2 && sPbe2B); /* Must be at slave PBE. */
        if(s2PbeBCcbToCompleteAtB == 0) { 
                TRACE("First try at prepare for ccb: %llu at slave PBE", ccbId);
                s2PbeBCcbUtilCcbData = ccbutil_findCcbData(ccbId);
@@ -337,7 +341,7 @@ static SaAisErrorT pbe2_ok_to_prepare_cc
 
        osafassert(s2PbeBCcbOpCountToExpectAtB == s2PbeBCcbOpCountNowAtB);
        /* Assert is redundant but keep it here just in case code is changed.
-          We never want to accidentally reply ok on prepare if we a re not 
+          We never want to accidentally reply ok on prepare if we are not
           really ready at slave.
         */
 
@@ -541,7 +545,7 @@ static void saImmOiAdminOperationCallbac
                }
 
                LOG_IN("Create of class %s committing with ccbId:%llx", 
className.c_str(), ccbId);
-
+               pbeClosePrepareTrans();
                rc = pbeCommitTrans(sDbHandle, ccbId, sEpoch, 
&sLastCcbCommitTime);
                if(rc != SA_AIS_OK) {
                        LOG_WA("PBE failed to commit transaction %llx for class 
create", ccbId);
@@ -699,6 +703,7 @@ static void saImmOiAdminOperationCallbac
 
                LOG_IN("Delete of class %s committing with ccbId:%llx", 
className.c_str(), ccbId);
 
+               pbeClosePrepareTrans();
                rc = pbeCommitTrans(sDbHandle, ccbId, sEpoch, 
&sLastCcbCommitTime);
                if(rc != SA_AIS_OK) {
                        LOG_WA("PBE failed to commit transaction (ccb:%llx) for 
class delete", ccbId);
@@ -814,6 +819,7 @@ static void saImmOiAdminOperationCallbac
                purgeCcbCommitsFromPbe(sDbHandle, sEpoch);
                
                LOG_NO("Update epoch %u committing with ccbId:%llx/%llu", 
sEpoch, ccbId, ccbId);
+               pbeClosePrepareTrans();
                rc = pbeCommitTrans(sDbHandle, ccbId, sEpoch, 
&sLastCcbCommitTime);
                if(rc != SA_AIS_OK) {
                        LOG_WA("PBE failed to commit sqlite transaction for 
update epoch");
@@ -1183,7 +1189,7 @@ static SaAisErrorT saImmOiCcbObjectModif
 
 
                ccbutil_deleteCcbData(ccbutil_findCcbData(ccbId));
-
+               pbeClosePrepareTrans();
                rc = pbeCommitTrans(sDbHandle, ccbId, sEpoch, 
&sLastCcbCommitTime);
                if(rc != SA_AIS_OK) {
                        LOG_WA("PBE failed to commit sqlite transaction 
(ccb:%llx) for PRT attr update", ccbId);
@@ -1273,7 +1279,7 @@ static SaAisErrorT saImmOiCcbCompletedCa
        }
 
        if(sPbe2) {
-               /* Primary PBE requests slave PBE to start preparing. If slave 
replies Ok
+               /* Primary PBE has requested slave PBE to start preparing. If 
slave replied Ok
                   then slave was ready to start prepare and has started its 
prepare.
                   Reply sent from slave before slave prepare was completed, so 
both 
                   primary and slave will do the prepare in parallell, more or 
less.
@@ -1549,7 +1555,7 @@ static SaAisErrorT saImmOiCcbObjectCreat
                        operation->param.create.className, ccbId);
 
                ccbutil_deleteCcbData(ccbutil_findCcbData(0));
-
+               pbeClosePrepareTrans();
                rc = pbeCommitTrans(sDbHandle, ccbId, sEpoch, 
&sLastCcbCommitTime);
                if(rc != SA_AIS_OK) {
                        LOG_WA("PBE failed to commit sqlite transaction 
(ccbId:%llx) for PRTO create", ccbId);
@@ -1962,9 +1968,9 @@ static void *pbeRtObjThread(void*)
        SaAisErrorT rc;
 
        while(immOiRtSelectionObject) {
-               TRACE("PBE Rt Thread entering poll");
+               TRACE("##@-PBE RUNTIME thread entering poll");
                int ret = poll(rtfds, nrtfds, -1);
-               TRACE("PBE Rt Thread returned from poll ret: %d", ret);
+               TRACE("##@-PBE RUNTIME thread continues after poll ret: %d", 
ret);
 
                if (ret == -1) {
                        if (errno == EINTR)
@@ -2085,9 +2091,9 @@ void pbeDaemon(SaImmHandleT immHandle, v
           contains a copy of the descriptor. 
         */
        while(immOiSelectionObject) { 
-               TRACE("PBE Daemon entering poll");
+               TRACE("##@-PBE MAIN thread entering poll");
                int ret = poll(fds, nfds, -1);
-               TRACE("PBE Daemon returned from poll ret: %d", ret);
+               TRACE("##@-PBE MAIN thead continues after poll ret: %d", ret);
                if (ret == -1) {
                        if (errno == EINTR)
                                continue;
@@ -2099,7 +2105,8 @@ void pbeDaemon(SaImmHandleT immHandle, v
                if (fds[FD_IMM_PBE_TERM].revents & POLLIN) {
                        ncs_sel_obj_rmv_ind(term_sel_obj, true, true);
                        if (sDbHandle != NULL) {
-                               LOG_NO("PBE received SIG_TERM, closing db 
handle");
+                               LOG_NO("IMM %s received SIG_TERM, closing db 
handle", 
+                                       sPbe2 ? (sPbe2B?"PBE SLAVE":"PBE 
PRIMARY"):"PBE");
                                pbeRepositoryClose(sDbHandle);
                                sDbHandle = NULL;
                        }
@@ -2148,7 +2155,7 @@ void pbeDaemon(SaImmHandleT immHandle, v
 
        }
 
-       LOG_IN("IMM PBE process EXITING...");
+               LOG_IN("IMM %s process EXITING...", sPbe2 ? (sPbe2B?"PBE 
SLAVE":"PBE PRIMARY"):"PBE");
        TRACE_LEAVE();
        exit(1);
 }

------------------------------------------------------------------------------
Android apps run on BlackBerry 10
Introducing the new BlackBerry 10.2.1 Runtime for Android apps.
Now with support for Jelly Bean, Bluetooth, Mapview and more.
Get your Android app in front of a whole new audience.  Start now.
http://pubads.g.doubleclick.net/gampad/clk?id=124407151&iu=/4140/ostg.clktrk
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to