Hi Praveen,

I have rerun the test with the patch, the test is "lock si" with 
simulated pbe hanging
I got this error

Nov 21 23:52:38.732148 osafamfd [488:imm.cc:0417] >> execute
Nov 21 23:52:38.732159 osafamfd [488:imm.cc:0219] >> exec: Update 
'safSi=AmfDemoTwon,safApp=AmfDemoTwon' saAmfSIAdminState
Nov 21 23:52:38.732171 osafamfd [488:imm.cc:0658] >> 
object_name_to_class_type: safSi=AmfDemoTwon,safApp=AmfDemoTwon
Nov 21 23:52:38.732186 osafamfd [488:imm.cc:0704] << 
object_name_to_class_type: 24
Nov 21 23:52:38.732198 osafamfd [488:imma_oi_api.c:2446] >> 
rt_object_update_common
Nov 21 23:52:38.732249 osafamfd [488:imma_oi_api.c:2519] ER 
ERR_BAD_OPERATION: The SaImmOiHandleT is not associated with any 
implementer name
Nov 21 23:52:38.732259 osafamfd [488:imma_oi_api.c:2719] << 
rt_object_update_common
...
Nov 21 23:52:38.732692 osafamfd [488:imm.cc:0250] ER exec: update FAILED 20
Nov 21 23:52:38.733654 osafamfd [488:imm.cc:0254] << exec
Nov 21 23:52:38.733668 osafamfd [488:imm.cc:0421] << execute: 5

Sometimes I got error code "2" instead

Nov 22  0:15:59.462401 osafamfd [486:imm.cc:0417] >> execute
Nov 22  0:15:59.462405 osafamfd [486:imm.cc:0219] >> exec: Update 
'safSi=AmfDemoTwon,safApp=AmfDemoTwon' saAmfSIAdminState
Nov 22  0:15:59.462410 osafamfd [486:imm.cc:0658] >> 
object_name_to_class_type: safSi=AmfDemoTwon,safApp=AmfDemoTwon
Nov 22  0:15:59.462490 osafamfd [486:imm.cc:0704] << 
object_name_to_class_type: 24
Nov 22  0:15:59.462637 osafamfd [486:imma_oi_api.c:2446] >> 
rt_object_update_common
Nov 22  0:15:59.462649 osafamfd [486:imma_oi_api.c:2611] T4 ERR_LIBRARY: 
Overlapping use of IMM OI handle by multiple threads
Nov 22  0:15:59.462654 osafamfd [486:imma_oi_api.c:2719] << 
rt_object_update_common
Nov 22  0:15:59.462707 osafamfd [486:imm.cc:0250] ER exec: update FAILED 2
Nov 22  0:15:59.462714 osafamfd [486:imm.cc:0254] << exec
Nov 22  0:15:59.462718 osafamfd [486:imm.cc:0421] << execute: 5

The "exec: update FAILED" is supposed to remove the update job of 
saAmfSIAdminState out of the queue, I think

Thanks,
Minh

On 21/11/16 20:51, [email protected] wrote:
>   osaf/services/saf/amf/amfd/imm.cc       |  16 +++++++++++-----
>   osaf/services/saf/amf/amfd/include/cb.h |   6 ++++++
>   osaf/services/saf/amf/amfd/main.cc      |   2 ++
>   3 files changed, 19 insertions(+), 5 deletions(-)
>
>
> When AMFD receives BAD_HANDLE from IMM, it calls avd_imm_reinit_bg()
> to init with IMM in a separate thread. After this if again main thread
> tries to execute some job for IMM update, IMM will return BAD_HANDLE and AMFD
> will again call avd_imm_reinit_bg() to init with IMM. This can happen when 
> AMFD tries
> to update IMM in job queues.
>
> Patch fixes this problem.
> Also if avd_saImmOiRtObjectUpdate_sync() fails with BAD_HANDLE,
> update will be pushed in jon queue to be updated later.
>
> diff --git a/osaf/services/saf/amf/amfd/imm.cc 
> b/osaf/services/saf/amf/amfd/imm.cc
> --- a/osaf/services/saf/amf/amfd/imm.cc
> +++ b/osaf/services/saf/amf/amfd/imm.cc
> @@ -1368,6 +1368,7 @@ SaAisErrorT avd_imm_init(void *avd_cb)
>       immutilWrapperProfile.nTries = 180;
>   
>   
> +     cb->avd_imm_status = AVD_IMM_INIT_ONGOING;
>       if ((error = immutil_saImmOiInitialize_2(&cb->immOiHandle, 
> &avd_callbacks, &immVersion)) != SA_AIS_OK) {
>               LOG_ER("saImmOiInitialize failed %u", error);
>               goto done;
> @@ -1383,6 +1384,7 @@ SaAisErrorT avd_imm_init(void *avd_cb)
>               goto done;
>       }
>   
> +     cb->avd_imm_status = AVD_IMM_INIT_DONE;
>       TRACE("Successfully initialized IMM");
>   
>   done:
> @@ -1639,6 +1641,7 @@ SaAisErrorT avd_saImmOiRtObjectUpdate_sy
>       const SaImmAttrModificationT_2 *attrMods[] = {&attrMod, nullptr};
>       SaImmAttrValueT attrValues[] = {value};
>   
> +     const std::string attribute_name(attributeName);
>       TRACE_ENTER2("'%s' %s", dn.c_str(), attributeName);
>   
>       attrMod.modType = modifyType;
> @@ -1651,6 +1654,9 @@ SaAisErrorT avd_saImmOiRtObjectUpdate_sy
>       if (rc != SA_AIS_OK) {
>               LOG_WA("saImmOiRtObjectUpdate of '%s' %s failed with %u",
>                       dn.c_str(), attributeName, rc);
> +             //Now it will be updated through job queue.
> +             avd_saImmOiRtObjectUpdate(dn, attribute_name, attrValueType, 
> value);
> +
>       }
>       return rc;
>   }
> @@ -1954,16 +1960,16 @@ void avd_imm_reinit_bg(void)
>       int rc = 0;
>   
>       TRACE_ENTER();
> +     if (avd_cb->avd_imm_status == AVD_IMM_INIT_ONGOING) {
> +             TRACE("Already IMM init is going in another thread");
> +             return;
> +     }
> +     avd_cb->avd_imm_status = AVD_IMM_INIT_ONGOING;
>   
>       LOG_NO("Re-initializing with IMM");
>   
>       osaf_mutex_lock_ordie(&imm_reinit_thread_startup_mutex);
>   
> -     (void) saImmOiFinalize(avd_cb->immOiHandle);
> -
> -     avd_cb->immOiHandle = 0;
> -     avd_cb->is_implementer = false;
> -
>       pthread_attr_init(&attr);
>       pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
>   
> diff --git a/osaf/services/saf/amf/amfd/include/cb.h 
> b/osaf/services/saf/amf/amfd/include/cb.h
> --- a/osaf/services/saf/amf/amfd/include/cb.h
> +++ b/osaf/services/saf/amf/amfd/include/cb.h
> @@ -59,6 +59,11 @@ typedef enum {
>       AVD_APP_STATE  /* Cluster startup timer has expired */
>   } AVD_INIT_STATE;
>   
> +typedef enum {
> +     AVD_IMM_INIT_BASE = 1,
> +     AVD_IMM_INIT_ONGOING = 2,
> +     AVD_IMM_INIT_DONE = 3,
> +} AVD_IMM_INIT_STATUS;
>   /*
>    * Sync state of the Standby.
>    */
> @@ -237,6 +242,7 @@ typedef struct cl_cb_tag {
>   
>       /* The duration that amfd should tolerate the absence of SCs */
>       uint32_t scs_absence_max_duration;
> +     AVD_IMM_INIT_STATUS avd_imm_status;
>   } AVD_CL_CB;
>   
>   extern AVD_CL_CB *avd_cb;
> diff --git a/osaf/services/saf/amf/amfd/main.cc 
> b/osaf/services/saf/amf/amfd/main.cc
> --- a/osaf/services/saf/amf/amfd/main.cc
> +++ b/osaf/services/saf/amf/amfd/main.cc
> @@ -562,6 +562,7 @@ static uint32_t initialize(void)
>       cb->all_nodes_synced = false;
>       cb->node_sync_window_closed = false;
>       cb->scs_absence_max_duration = 0;
> +     cb->avd_imm_status = AVD_IMM_INIT_BASE;
>   
>       if ((val = getenv("AVSV_HB_PERIOD")) != nullptr) {
>               cb->heartbeat_tmr_period = strtoll(val, nullptr, 0);
> @@ -687,6 +688,7 @@ static void main_loop(void)
>                       }
>   
>                       if (evt->rcv_evt == AVD_IMM_REINITIALIZED) {
> +                             cb->avd_imm_status = AVD_IMM_INIT_DONE;
>                               TRACE("Received IMM reinit msg");
>                               polltmo = retval_to_polltmo(Fifo::execute(cb));
>                               continue;
>


------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to