osaf/services/saf/plmsv/plms/hpi_intf/plms_hsm.c | 204 +++++++---------------
osaf/services/saf/plmsv/plms/plms_amf.c | 19 +--
2 files changed, 71 insertions(+), 152 deletions(-)
Blade extraction does not work consistently. If you extract a blade, and AMF
should reject it (because it cannot failover the services), the blade will
sometimes still deactivate.
The standby plm daemon is not calling saHpiHotSwapPolicyCancel and
saHpiAutoExtractTimeoutSet for the resources. When openhpid on the standby
controller gets the pending extraction message from the shelf manager, it
happily allows the extraction to proceed because it doesn't have the extraction
policy set. The openhpid on the active controller has been programmed by the
active plm daemon to cancel the hot swap policy, and set the auto extract
timeout for the resource. Now there is a race condition. Shelf manager responds
to which openhpid first?
Both active and standby plm daemons need to call saHpiHotSwapPolicyCancel and
saHpiAutoExtractTimeoutSet, to make sure that both openhpid on the active and
standby controllers have the same auto extract policy.
diff --git a/osaf/services/saf/plmsv/plms/hpi_intf/plms_hsm.c
b/osaf/services/saf/plmsv/plms/hpi_intf/plms_hsm.c
--- a/osaf/services/saf/plmsv/plms/hpi_intf/plms_hsm.c
+++ b/osaf/services/saf/plmsv/plms/hpi_intf/plms_hsm.c
@@ -71,7 +71,6 @@ static SaUint32T hsm_get_idr_chassis_inf
SaHpiIdrIdT idr_id,
PLMS_INV_DATA *inv_data);
static SaUint32T hsm_session_reopen();
-SaUint32T plms_hsm_session_close();
static SaUint32T hsm_discover_and_dispatch();
static void *plms_hsm();
@@ -446,28 +445,10 @@ static void *plms_hsm(void)
SaHpiPowerStateT power_state;
SaUint32T retriev_idr_info = 0;
SaInt32T rc,ret;
- SaInt32T got_new_active = false;
+ SaInt32T active = false;
TRACE_ENTER();
- rc = pthread_mutex_lock(&hsm_ha_state.mutex);
- if(rc){
- LOG_CR("HSM: Failed to take hsm_ha_state lock, exiting
\
- the thread, ret value:%d err:%s", rc, strerror(errno));
- assert(0);
- }
- if(hsm_ha_state.state != SA_AMF_HA_ACTIVE){
- TRACE("HSM: Thread going to block till Active state is set");
- pthread_cond_wait(&hsm_ha_state.cond,&hsm_ha_state.mutex);
- }
-
- rc = pthread_mutex_unlock(&hsm_ha_state.mutex);
- if(rc){
- LOG_CR("HSM:Failed to unlock hsm_ha_state lock,exiting
\
- the thread, ret value:%d err:%s", rc, strerror(errno));
- assert(0);
- }
-
/* Subscribe to receive events on this HPI session */
rc = saHpiSubscribe(cb->session_id);
if( SA_OK != rc ){
@@ -493,68 +474,9 @@ static void *plms_hsm(void)
TRACE("HSM:Blocking to receive events on HPI session");
while(true){
- rc = pthread_mutex_lock(&hsm_ha_state.mutex);
- if(rc){
- LOG_CR("HSM: Failed to take hsm_ha_state lock, exiting \
- the thread, ret value:%d err:%s", rc, strerror(errno));
- assert(0);
- }
- if(hsm_ha_state.state != SA_AMF_HA_ACTIVE){
- /* Wait on condition variable for the HA role from PLMS
main thread */
- TRACE("HSM:Received Standby state,thread going to block
till Active state is set");
-
pthread_cond_wait(&hsm_ha_state.cond,&hsm_ha_state.mutex);
- got_new_active = true;
- }
- rc = pthread_mutex_unlock(&hsm_ha_state.mutex);
- if(rc){
- LOG_CR("HSM:Failed to unlock hsm_ha_state lock,exiting \
- the thread, ret value:%d err:%s", rc, strerror(errno));
- assert(0);
- }
- if(got_new_active){
- /* Open the session on New active*/
- hsm_session_reopen();
-
- /* Rediscover the resources */
- hsm_discover_and_dispatch();
-
- got_new_active = false;
-
- /* PLMC initialize */
- if( !plms_cb->plmc_initialized ) {
- rc =
plmc_initialize(plms_plmc_connect_cbk,plms_plmc_udp_cbk,plms_plmc_error_cbk);
- if (rc) {
- LOG_ER("PLMC initialize failed");
- rc = NCSCC_RC_FAILURE;
- exit(0);
- }
- plms_cb->plmc_initialized = true;
- TRACE("PLMC initialization Success.");
- }
- }
-
ret = saHpiEventGet(cb->session_id, SAHPI_TIMEOUT_BLOCK,
&event, &rdr, &rpt_entry, NULL);
- plms_send_hpi_evt_ntf(event.EventType, &event, &(rpt_entry));
- rc = pthread_mutex_lock(&hsm_ha_state.mutex);
- if(rc){
- LOG_CR("HSM: Failed to take hsm_ha_state lock,exiting
thread, ret value:%d err:%s",rc,strerror(errno));
- assert(0);
- }
- if(hsm_ha_state.state != SA_AMF_HA_ACTIVE){
- rc = pthread_mutex_unlock(&hsm_ha_state.mutex);
- if(rc){
- LOG_CR("HSM:Failed to unlock
hsm_ha_state,exiting thread,ret value:%d err:%s",rc,strerror(errno));
- assert(0);
- }
- continue;
- }
- rc = pthread_mutex_unlock(&hsm_ha_state.mutex);
- if(rc){
- LOG_CR("HSM:Failed to unlock hsm_ha_state,exiting
thread,ret value:%d err:%s",rc,strerror(errno));
- assert(0);
- }
if( SA_OK != ret ){
LOG_ER("HSM:saHpiEventGet failed, ret val is:%d",rc);
/* Reopen the session */
@@ -566,6 +488,21 @@ static void *plms_hsm(void)
TRACE("HSM:Receieved event for res_id:%u Evt type:%u
",rpt_entry.ResourceId,event.EventType);
+ rc = pthread_mutex_lock(&hsm_ha_state.mutex);
+ if(rc){
+ LOG_CR("HSM: Failed to take hsm_ha_state lock,exiting
thread, ret value:%d err:%s",rc,strerror(errno));
+ assert(0);
+ }
+ active = (hsm_ha_state.state == SA_AMF_HA_ACTIVE) ? true :
false;
+ rc = pthread_mutex_unlock(&hsm_ha_state.mutex);
+ if(rc){
+ LOG_CR("HSM:Failed to unlock hsm_ha_state,exiting
thread,ret value:%d err:%s",rc,strerror(errno));
+ assert(0);
+ }
+
+ if (active)
+ plms_send_hpi_evt_ntf(event.EventType, &event,
&(rpt_entry));
+
if (event.EventType == SAHPI_ET_OEM) {
/* not currently supporting OEM events */
continue;
@@ -612,6 +549,42 @@ static void *plms_hsm(void)
}
}
+ if (event.EventType == SAHPI_ET_HOTSWAP){
+ if(hotswap_state_model ==
PLMS_HPI_FULL_FIVE_HOTSWAP_MODEL){
+ if
(event.EventDataUnion.HotSwapEvent.HotSwapState ==
+
SAHPI_HS_STATE_EXTRACTION_PENDING ||
+
event.EventDataUnion.HotSwapEvent.HotSwapState ==
+
SAHPI_HS_STATE_INSERTION_PENDING){
+ /* Cancel the hotswap polcy */
+ rc =
saHpiHotSwapPolicyCancel(cb->session_id,rpt_entry.ResourceId);
+ if (SA_OK != rc)
+ LOG_ER("Error taking control of
res:%d ret val:%d",
+
rpt_entry.ResourceId,rc);
+
+ /* Set the AutoExtractionTimeout */
+ rc =
saHpiAutoExtractTimeoutSet(cb->session_id,rpt_entry.ResourceId,
+
cb->extr_pending_timeout);
+ if (SA_OK != rc)
+ LOG_ER("AutoExtractTimeoutSet
failed for res:%u ret val:%d",
+
rpt_entry.ResourceId,rc);
+
+ }
+ }
+
+ if (active) {
+
hsm_send_hotswap_event(&rpt_entry,hotswap_state_model,event.EventDataUnion.HotSwapEvent.HotSwapState,
+
event.EventDataUnion.HotSwapEvent.PreviousHotSwapState,retriev_idr_info);
+ }
+ }
+
+ /*
+ * saHpiHotSwapPolicyCancel and saHpiAutoExtractTimeoutSet need
to be set on
+ * both active and standby, but anything else is only done by
active
+ */
+ if (!active)
+ continue;
+
+
/* If it is a resource restore event( communication lost and
got restored immediately ) ,retrieve the hotswap state after
communication is restored */
@@ -638,32 +611,6 @@ static void *plms_hsm(void)
retriev_idr_info);
}
}
-
- if (event.EventType == SAHPI_ET_HOTSWAP){
- if(hotswap_state_model ==
PLMS_HPI_FULL_FIVE_HOTSWAP_MODEL){
- if
(event.EventDataUnion.HotSwapEvent.HotSwapState ==
-
SAHPI_HS_STATE_EXTRACTION_PENDING ||
-
event.EventDataUnion.HotSwapEvent.HotSwapState ==
-
SAHPI_HS_STATE_INSERTION_PENDING){
- /* Cancel the hotswap polcy */
- rc =
saHpiHotSwapPolicyCancel(cb->session_id,rpt_entry.ResourceId);
- if (SA_OK != rc)
- LOG_ER("Error taking control of
res:%d ret val:%d",
-
rpt_entry.ResourceId,rc);
-
- /* Set the AutoExtractionTimeout */
- rc =
saHpiAutoExtractTimeoutSet(cb->session_id,rpt_entry.ResourceId,
-
cb->extr_pending_timeout);
- if (SA_OK != rc)
- LOG_ER("AutoExtractTimeoutSet
failed for res:%u ret val:%d",
-
rpt_entry.ResourceId,rc);
-
- }
- }
-
hsm_send_hotswap_event(&rpt_entry,hotswap_state_model,event.EventDataUnion.HotSwapEvent.HotSwapState,
-
event.EventDataUnion.HotSwapEvent.PreviousHotSwapState,retriev_idr_info);
-
- }
}
TRACE_LEAVE();
@@ -698,6 +645,7 @@ static SaUint32T hsm_discover_and_dispat
SaUint32T prev_domain_op_status = NCSCC_RC_SUCCESS;
SaUint32T rc = NCSCC_RC_SUCCESS;
static SaUint32T rpt_retry_count = 0;
+ bool active = false;
TRACE_ENTER();
@@ -742,13 +690,21 @@ static SaUint32T hsm_discover_and_dispat
plmscb->my_entity_path = 0;
#endif
+ rc = pthread_mutex_lock(&hsm_ha_state.mutex);
+ if(rc){
+ LOG_CR("HSM: Failed to take hsm_ha_state lock,exiting thread,
ret value:%d err:%s",rc,strerror(errno));
+ assert(0);
+ }
+ active = (hsm_ha_state.state == SA_AMF_HA_ACTIVE) ? true : false;
+ rc = pthread_mutex_unlock(&hsm_ha_state.mutex);
+ if(rc){
+ LOG_CR("HSM:Failed to unlock hsm_ha_state,exiting thread,ret
value:%d err:%s",rc,strerror(errno));
+ assert(0);
+ }
+
/* Process the list of RPT entries on this session */
next = SAHPI_FIRST_ENTRY;
do{
-
- if(hsm_ha_state.state == SA_AMF_HA_STANDBY)
- return NCSCC_RC_FAILURE;
-
current = next;
/* Get the RPT entry */
rc = saHpiRptEntryGet(cb->session_id, current,&next,
&rpt_entry);
@@ -869,8 +825,11 @@ static SaUint32T hsm_discover_and_dispat
retriev_idr_info = true;
/* Send the outstanding hot_swap event*/
- hsm_send_hotswap_event(&rpt_entry, hotswap_state_model, state,
- previous_state,retriev_idr_info);
+ if (active) {
+ hsm_send_hotswap_event(&rpt_entry, hotswap_state_model,
state,
+
previous_state,retriev_idr_info);
+ }
+
if(SAHPI_LAST_ENTRY == next &&
NCSCC_RC_SUCCESS == prev_domain_op_status ){
/* Get the update count of domain_info*/
@@ -1617,26 +1576,3 @@ static SaUint32T hsm_session_reopen()
TRACE_LEAVE();
return NCSCC_RC_SUCCESS;
}
-/***********************************************************************
-* @brief This function closes HPI session
-*
-* @param[in]
-*
-*
-* @return NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE
-***********************************************************************/
-SaUint32T plms_hsm_session_close()
-{
- PLMS_HSM_CB *cb = hsm_cb;
- SaUint32T rc = 0;
- /* Close the HPI session */
- rc = saHpiSessionClose(cb->session_id);
- if (SA_OK != rc){
- LOG_ER("HSM:Close session return error: %d:\n",rc);
- return NCSCC_RC_FAILURE;
- }
-
- /* Reset the session_id */
- cb->session_id = 0;
- return NCSCC_RC_SUCCESS;
-}
diff --git a/osaf/services/saf/plmsv/plms/plms_amf.c
b/osaf/services/saf/plmsv/plms/plms_amf.c
--- a/osaf/services/saf/plmsv/plms/plms_amf.c
+++ b/osaf/services/saf/plmsv/plms/plms_amf.c
@@ -266,7 +266,7 @@ plms_amf_CSI_set_callback(SaInvocationT
pthread_mutex_unlock(&hrb_ha_state.mutex);
}
/* PLMC initialize */
- if(!cb->hpi_cfg.hpi_support && !cb->plmc_initialized){
+ if(!cb->plmc_initialized){
TRACE("Initializing PLMC");
rc = plmc_initialize(plms_plmc_connect_cbk,
plms_plmc_udp_cbk,
@@ -297,23 +297,6 @@ plms_amf_CSI_set_callback(SaInvocationT
hrb_ha_state.state = SA_AMF_HA_STANDBY;
pthread_mutex_unlock(&hrb_ha_state.mutex);
- SaUint32T (* hsm_func_ptr)() = NULL;
- if(cb->hpi_cfg.hpi_support){
- /* Get the hsm Init func ptr */
- hsm_func_ptr = dlsym(cb->hpi_intf_hdl,
"plms_hsm_session_close");
- if ( NULL == hsm_func_ptr ) {
- LOG_ER("dlsym() failed to get the
hsm_func_ptr,error %s", dlerror());
- goto response;
- }
-
- /* Initialize HSM */
- rc = (* hsm_func_ptr)();
- if ( NCSCC_RC_SUCCESS != rc ) {
- LOG_ER("plms_session_close failed");
- goto response;
- }
- }
-
/* PLMC finalize */
if(cb->plmc_initialized){
rc = plmc_destroy();
------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel