osaf/libs/common/immsv/immpbe_dump.cc            |   9 +++++----
 osaf/services/saf/immsv/immpbed/immpbe_daemon.cc |  12 ++++++++----
 2 files changed, 13 insertions(+), 8 deletions(-)


An SMF campaign enables the PBE (with 2PBE) and immediately attempts
to update a PRTA. This fails because the slave PBE (PBE-B) has not
completed its initialization when it receives the prepare message
(for the PRTA update). This causes the PRTA update to be rejected.
It also causes the PBE slave to exit and restart again due to an
erroneous abort of an empty sqlite transaction.

Mar 31 10:33:19 SC-2-1 osafimmnd[13967]: NO ERR_BAD_OPERATION: Mismatch on 
administrative owner '' != 'safImmService'
Mar 31 10:33:19 SC-2-1 osafimmpbed: WA Start prepare for ccb: 
100000078/4294967416 towards slave PBE returned: '20' from Immsv
Mar 31 10:33:19 SC-2-1 osafimmpbed: WA PBE-A failed to prepare PRTA update 
Ccb:100000078/4294967416 towards PBE-B
Mar 31 10:33:19 SC-2-1 osafimmpbed: NO 2PBE Error (20) in PRTA update 
(ccbId:100000078)
Mar 31 10:33:19 SC-2-1 osafimmnd[13967]: WA update of PERSISTENT runtime 
attributes in object 'safSmfCampaign=ERIC-TestAppInstall,safApp=safSmfService' 
REVERTED. PBE rc:20

Mar 31 10:33:22 SC-2-2 osafimmpbed: IN PBE slave waiting for prepare from 
primary on PRTA update ccb:100000078
Mar 31 10:33:22 SC-2-2 osafimmnd[5243]: WA update of PERSISTENT runtime 
attributes in object 'safSmfCampaign=ERIC-TestAppInstall,safApp=safSmfService' 
REVERTED. PBE rc:20
Mar 31 10:33:24 SC-2-2 osafimmpbed: IN PBE slave waiting for prepare from 
primary on PRTA update ccb:100000078
Mar 31 10:33:24 SC-2-2 osafimmpbed: NO Slave PBE time-out in waiting on 
porepare for PRTA update ccb:100000078 
dn:safSmfCampaign=ERIC-TestAppInstall,safApp=safSmfService
Mar 31 10:33:24 SC-2-2 osafimmpbed: ER SQL statement ('ROLLBACK') failed 
because: cannot rollback - no transaction is active
Mar 31 10:33:24 SC-2-2 osafimmpbed: ER Exiting (line:2827)

The problem is the time gap between the creation of the RTO representing the
slave PBE and the setting of admin-owner by the slave PBE for that RTO.
Admin owner must be set for an admin-operation on the object to succeed.

The fix is to have the primary PBE be tolerant of receiving ERR_BAD_OPERATION on
the prepare request, treating it the same way it treats ERR_NOT_EXIST for the
slave PBE RTO not existing, or ERR_TRY_AGAIN for the slave PBE still being busy
with some other transaction. A fix is also made to the pbeAbortTrans function
to do nothing if the transaction is empty.

diff --git a/osaf/libs/common/immsv/immpbe_dump.cc 
b/osaf/libs/common/immsv/immpbe_dump.cc
--- a/osaf/libs/common/immsv/immpbe_dump.cc
+++ b/osaf/libs/common/immsv/immpbe_dump.cc
@@ -2818,6 +2818,11 @@ void pbeAbortTrans(void* db_handle)
        char *execErr=NULL;
        int rc=0;
 
+       if(sqliteTransLock == 0) {
+               LOG_WA("pbeAbortTrans was called when sqliteTransLock==0 -- 
ignoring abort");
+               return;
+       }
+
        rc = sqlite3_exec(dbHandle, "ROLLBACK", NULL, NULL, &execErr);
        if(rc != SQLITE_OK) {
                LOG_ER("SQL statement ('ROLLBACK') failed because:\n %s",
@@ -2828,10 +2833,6 @@ void pbeAbortTrans(void* db_handle)
                exit(1);
        }
 
-       if(sqliteTransLock == 0) {
-               LOG_WA("pbeAbortTrans was called when sqliteTransLock==0");
-       }
-
         switch(sqliteTransLock) {
             case 3:
                 --sqliteTransLock;
diff --git a/osaf/services/saf/immsv/immpbed/immpbe_daemon.cc 
b/osaf/services/saf/immsv/immpbed/immpbe_daemon.cc
--- a/osaf/services/saf/immsv/immpbed/immpbe_daemon.cc
+++ b/osaf/services/saf/immsv/immpbed/immpbe_daemon.cc
@@ -242,14 +242,18 @@ static bool pbe2_start_prepare_ccb_A_to_
                rc2B = saImmOmAdminOperationInvoke_2(sOwnerHandle, 
&slavePbeRtObjName, 0, OPENSAF_IMM_PBE_CCB_PREPARE,
                        params, &slavePbeRtReply, SA_TIME_ONE_SECOND * 10);
 
-               if(rc2B == SA_AIS_ERR_TRY_AGAIN || (rc2B==SA_AIS_OK && 
slavePbeRtReply==SA_AIS_ERR_TRY_AGAIN)) {
+               if(rc2B == SA_AIS_ERR_TRY_AGAIN || rc2B == 
SA_AIS_ERR_BAD_OPERATION ||
+                       (rc2B==SA_AIS_OK && 
slavePbeRtReply==SA_AIS_ERR_TRY_AGAIN)) {
                        usleep(sleep_delay_ms * 1000);
                        msecs_waited += sleep_delay_ms; 
                        LOG_NO("Slave PBE %u or Immsv (%u) replied with 
TRY_AGAIN on prepare for ccb:%llx/%llu", 
                                rc2B, slavePbeRtReply, ccbId, ccbId);
                }
-               /* Adjust the waiting time,a bove & below  to be more 
appropriate .... */
-       } while (((rc2B == SA_AIS_ERR_TRY_AGAIN) || (slavePbeRtReply == 
SA_AIS_ERR_TRY_AGAIN)) && (msecs_waited < 3000));
+               /* Adjust the waiting time, above & below  to be more 
appropriate .... 
+                  SA_AIS_ERR_BAD_OPERATION from immsv can happen ar slave PBE 
startup when
+                  slave has created its RTO, but not yet set admin-owner for 
it. */
+       } while (((rc2B == SA_AIS_ERR_TRY_AGAIN) || (rc2B == 
SA_AIS_ERR_BAD_OPERATION) ||
+                       (rc2B==SA_AIS_OK && slavePbeRtReply == 
SA_AIS_ERR_TRY_AGAIN)) && (msecs_waited < 3000));
 
        if(rc2B != SA_AIS_OK) {
                if((rc2B == SA_AIS_ERR_NOT_EXIST) && (sNoStdFlags & 
OPENSAF_IMM_FLAG_2PBE1_ALLOW)) {
@@ -294,7 +298,7 @@ static SaAisErrorT pbe2_ok_to_prepare_cc
                           The runtime thread thus reads from the immutils 
structure only from a stable pointer
                           verified to point at a ccb record with corrrect 
ccb-id. 
                        */
-                       LOG_WA("Missmatch on record for ccbId:%llx/%llu - 
thread interference problems ?", ccbId, ccbId);
+                       LOG_WA("Mismatch on record for ccbId:%llx/%llu - thread 
interference problems ?", ccbId, ccbId);
                        s2PbeBCcbUtilCcbData = NULL; 
                        rc = SA_AIS_ERR_TRY_AGAIN;
                        goto done;

------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to