Mathi,

Ack. Not tested.
Please see my comments inline [Ramesh].

Thanks,
Ramesh.

On 7/22/2013 3:49 PM, mathi.naic...@oracle.com wrote:
>   osaf/services/infrastructure/fm/fms/fm_amf.c  |   2 +-
>   osaf/services/infrastructure/fm/fms/fm_cb.h   |   3 +
>   osaf/services/infrastructure/fm/fms/fm_evt.h  |   6 +
>   osaf/services/infrastructure/fm/fms/fm_main.c |  88 
> ++++++++++++++++++++------
>   4 files changed, 78 insertions(+), 21 deletions(-)
>
>
> diff --git a/osaf/services/infrastructure/fm/fms/fm_amf.c 
> b/osaf/services/infrastructure/fm/fms/fm_amf.c
> --- a/osaf/services/infrastructure/fm/fms/fm_amf.c
> +++ b/osaf/services/infrastructure/fm/fms/fm_amf.c
> @@ -117,7 +117,7 @@ void fm_saf_CSI_set_callback(SaInvocatio
>       syslog(LOG_INFO, "fm_saf_CSI_set_callback: Comp %s, state %s", 
> compName->value, ha_role_string[haState - 1]);
>       fm_amf_cb = fm_amf_take_hdl();
>       if (fm_amf_cb != NULL) {
> -             fm_cb->role = haState;
> +             fm_cb->amf_state = haState;
>               error = saAmfResponse(fm_amf_cb->amf_hdl, invocation, error);
>               fm_cb->csi_assigned = true;
>       }

[Ramesh]: As we are differentiating RDA/AMF roles through separate 
variables "amf_state" and "role", I see the variable "csi_assignment" 
can be removed and instead can use "amf_state" directly.

> diff --git a/osaf/services/infrastructure/fm/fms/fm_cb.h 
> b/osaf/services/infrastructure/fm/fms/fm_cb.h
> --- a/osaf/services/infrastructure/fm/fms/fm_cb.h
> +++ b/osaf/services/infrastructure/fm/fms/fm_cb.h
> @@ -57,6 +57,9 @@ typedef struct fm_cb {
>   /* Holds own role. */
>       PCS_RDA_ROLE role;
>   
> +/* AMF HA state for FM */
> +     SaAmfHAStateT amf_state;
> +     
>   /* MDS handles. */
>       MDS_DEST adest;
>       MDS_HDL adest_hdl;
> diff --git a/osaf/services/infrastructure/fm/fms/fm_evt.h 
> b/osaf/services/infrastructure/fm/fms/fm_evt.h
> --- a/osaf/services/infrastructure/fm/fms/fm_evt.h
> +++ b/osaf/services/infrastructure/fm/fms/fm_evt.h
> @@ -38,11 +38,16 @@ typedef struct gfm_gfm_msg {
>   
>   } GFM_GFM_MSG;
>   
> +typedef struct fm_rda_info_t {
> +     PCS_RDA_ROLE role;
> +} FM_RDA_INFO;
> +
>   /* FM generated events.*/
>   typedef enum {
>       FM_EVT_TMR_EXP,
>       FM_EVT_NODE_DOWN,
>       FM_EVT_PEER_UP,
> +     FM_EVT_RDA_ROLE,
>       FM_FSM_EVT_MAX
>   } FM_FSM_EVT_CODE;
>   
> @@ -56,6 +61,7 @@ typedef struct fm_evt {
>       union {
>               FM_TMR *fm_tmr;
>               GFM_GFM_MSG gfm_msg;
> +             FM_RDA_INFO rda_info;
>       } info;
>   } FM_EVT;
>   
> diff --git a/osaf/services/infrastructure/fm/fms/fm_main.c 
> b/osaf/services/infrastructure/fm/fms/fm_main.c
> --- a/osaf/services/infrastructure/fm/fms/fm_main.c
> +++ b/osaf/services/infrastructure/fm/fms/fm_main.c
> @@ -74,6 +74,39 @@ static void sigusr1_handler(int sig)
>       ncs_sel_obj_ind(usr1_sel_obj);
>   }
>   
> +/**
> + * Callback from RDA. Post a message/event to the FM mailbox.
> + * @param cb_hdl
> + * @param cb_info
> + * @param error_code
> + */
> +static void rda_cb(uint32_t cb_hdl, PCS_RDA_CB_INFO *cb_info, 
> PCSRDA_RETURN_CODE error_code)
> +{
> +     uint32_t rc;
> +     FM_EVT *evt = NULL;
> +
> +     TRACE_ENTER();
> +
> +     evt = calloc(1, sizeof(FM_EVT));
> +     if (NULL == evt) {
> +             LOG_ER("calloc failed");
> +             goto done;
> +     }
> +
> +     evt->evt_code = FM_EVT_RDA_ROLE;
> +     evt->info.rda_info.role = cb_info->info.io_role;
> +
> +     rc = ncs_ipc_send(&fm_cb->mbx, (NCS_IPC_MSG *)evt, 
> MDS_SEND_PRIORITY_HIGH);
> +     if (rc != NCSCC_RC_SUCCESS) {
> +             syslog(LOG_ERR, "IPC send failed %d", rc);
> +             free(evt);      
> +     }
> +
> + done:
> +     TRACE_LEAVE();
> +}
> +
> +
>   
> /*****************************************************************************
>   
>   PROCEDURE NAME:       main
> @@ -144,6 +177,11 @@ int main(int argc, char *argv[])
>               goto fm_init_failed;
>       }
>   
> +     if ((rc = rda_register_callback(0, rda_cb)) != NCSCC_RC_SUCCESS) {
> +             syslog(LOG_ERR, "rda_register_callback FAILED %u", rc);
> +             goto done;
> +     }
> +
[Ramesh]: It should be "goto fm_init_failed";
>       if ((rc = ncs_sel_obj_create(&usr1_sel_obj)) != NCSCC_RC_SUCCESS) {
>               LOG_ER("ncs_sel_obj_create FAILED");
>               goto fm_init_failed;
> @@ -326,27 +364,33 @@ static void fm_mbx_msg_handler(FM_CB *fm
>       switch (fm_mbx_evt->evt_code) {
>       case FM_EVT_NODE_DOWN:
>               LOG_NO("Role: %s, Node Down for node id: %x", 
> role_string[fm_cb->role], fm_mbx_evt->node_id);
> -             if ((fm_cb->role == PCS_RDA_STANDBY)||(fm_cb->role == 
> PCS_RDA_QUIESCED)) {
> -                     if ((fm_mbx_evt->node_id == fm_cb->peer_node_id)) {
> -                             /* Start Promote active timer */
> -                             if ((fm_cb->role != PCS_RDA_QUIESCED) && 
> (fm_cb->active_promote_tmr_val != 0)){
> -                                     
> fm_tmr_start(&fm_cb->promote_active_tmr, fm_cb->active_promote_tmr_val);
> -                                     LOG_NO("Promote active timer started");
> -                             } else {
> -                                     /* Check whether node(AMF) 
> initialization is done */
> -                                     if (fm_cb->csi_assigned == false) {
> -                                             opensaf_reboot(0, NULL,
> -                                             "Failover occurred, but this 
> node is not yet ready");
> -                                     }
> -                                     fm_cb->role = PCS_RDA_ACTIVE;
> -                                     opensaf_reboot(fm_cb->peer_node_id, 
> (char *)fm_cb->peer_node_name.value,
> -                                             "Received Node Down for Active 
> peer");
> +             if ((fm_mbx_evt->node_id == fm_cb->peer_node_id)) {
> +                     /* Check whether node(AMF) initialization is done */
> +                     if (fm_cb->csi_assigned == false) {
> +                             opensaf_reboot(0, NULL,
> +                                     "Failover occurred, but this node is 
> not yet ready");
> +                     }
> +                     /* Start Promote active timer */
> +                     if ((fm_cb->role == PCS_RDA_STANDBY) && 
> (fm_cb->active_promote_tmr_val != 0)){
> +                             fm_tmr_start(&fm_cb->promote_active_tmr, 
> fm_cb->active_promote_tmr_val);
> +                             LOG_NO("Promote active timer started");
> +                     } else {
> +                             TRACE("rda role: %s, amf_state: %u", 
> role_string[fm_cb->role], fm_cb->amf_state);
> +                     /* The local node is either Quiesced or Active. Reboot 
> the peer node.
[Ramesh]: Local node can also be a StandBy node if 
"active_promote_tmr_val" is "0". So in this case, if there is link break 
between Active and StandBy, both will try to initiate a peer-reboot at 
the same time. This protocol gap exist in `fm` to give preference to a 
particular controller to initiate peer-reboot.
> +                      * Note: If local node is Active, there are two 
> interpretations.
> +                      *      - Normal scenario where the Standby went down
> +                      *      - Standby went down in the middle of a 
> swtichover and AMF has
> +                      *        transitioned CSI state, but not the RDA state.
> +                      *       In both the cases, this node should be set to 
> ACTIVE.
> +                      */
> +                             if (fm_cb->role != fm_cb->amf_state )
> +                                     LOG_NO("Failover occurred in the middle 
> of switchover");
> +                             fm_cb->role = PCS_RDA_ACTIVE;
> +                             opensaf_reboot(fm_cb->peer_node_id, (char 
> *)fm_cb->peer_node_name.value,
> +                                     "Received Node Down for peer 
> controller");
> +                             if (!((fm_cb->role == PCS_RDA_ACTIVE) && 
> (fm_cb->amf_state == PCS_RDA_ACTIVE)))
>                                       fm_rda_set_role(fm_cb, PCS_RDA_ACTIVE);
[Ramesh]: "if (!(fm_cb->amf_state == PCS_RDA_ACTIVE))"  condition is 
sufficient, because 'role' is always set to "PCS_RDA_ACTIVE".
> -                             }
>                       }
> -             } else if (fm_cb->role == PCS_RDA_ACTIVE) {
> -                             opensaf_reboot(fm_cb->peer_node_id, (char 
> *)fm_cb->peer_node_name.value,
> -                                     "Received Node Down for standby peer");
>               }
>               break;
>       case FM_EVT_PEER_UP:
> @@ -371,7 +415,11 @@ static void fm_mbx_msg_handler(FM_CB *fm
>                       fm_rda_set_role(fm_cb, PCS_RDA_ACTIVE);
>               }
>               break;
> -
> +     case FM_EVT_RDA_ROLE:
> +             /* RDA role assignment for this controller node */
> +             fm_cb->role = fm_mbx_evt->info.rda_info.role;
> +             syslog(LOG_INFO, "RDA role for this controller node: %s", 
> role_string[fm_cb->role]);
> +             break;
>       default:
>               break;
>       }


------------------------------------------------------------------------------
See everything from the browser to the database with AppDynamics
Get end-to-end visibility with application monitoring from AppDynamics
Isolate bottlenecks and diagnose root cause in seconds.
Start your free trial of AppDynamics Pro today!
http://pubads.g.doubleclick.net/gampad/clk?id=48808831&iu=/4140/ostg.clktrk
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to