Dear Mahesh, Reviewed and tested with collocated and non-collocated case, saw problem fixed and could not find any occurrence.
So ACK from me, tested. Sincerely, Hoang -----Original Message----- From: mahesh.va...@oracle.com [mailto:mahesh.va...@oracle.com] Sent: Wednesday, November 16, 2016 3:58 PM To: hoang.m...@dektech.com.au Cc: opensaf-devel@lists.sourceforge.net Subject: [PATCH 1 of 1] cpsv: on cpnd down fist remove child safReplica object then parent safCkpt object [#2189] osaf/services/saf/cpsv/cpd/cpd_imm.c | 20 ++++++++++++-------- osaf/services/saf/cpsv/cpd/cpd_proc.c | 9 ++++++++- 2 files changed, 20 insertions(+), 9 deletions(-) Bug : While cpd processing cpnd down for COLLOCATED cktp and that checkpoint only exist on the went down cpnd ( no others Node opened this checkpoint in cluster) , then cpd removes that checkpoint and replica completely. In such case the current logic has as bug, fist it removes ckpt node and then replica, this is causing deletion of parent object safCkpt=...,* first , then child object safReplica=...,safCkpt=...,* next. as we know IMM removes child if parent is removed ,so this is causing the issue out of sequence remove of safReplica object and ERR_NOT_EXIST is returned. Fix : While cpd removing that checkpoint and replica completely , follow the sequence of child object safReplica=...,safCkpt=...,* fist then parent object safCkpt=...,* next. This is focused fix , my be we need to review complete code for such occurrences , if found will be addressed in new ticket. diff --git a/osaf/services/saf/cpsv/cpd/cpd_imm.c b/osaf/services/saf/cpsv/cpd/cpd_imm.c --- a/osaf/services/saf/cpsv/cpd/cpd_imm.c +++ b/osaf/services/saf/cpsv/cpd/cpd_imm.c @@ -400,7 +400,9 @@ SaAisErrorT delete_runtime_replica_objec osaf_extended_name_lend(replica_dn, &replica_name); rc = immutil_saImmOiRtObjectDelete(immOiHandle, &replica_name); if (rc != SA_AIS_OK) { - LOG_ER("Deleting run time object %s Failed - rc = %d",replica_dn, rc); + LOG_ER("Deleting run time object %s Failed-1 - rc = %d",replica_dn, rc); + } else { + TRACE("Deleting run time object %s Success-1 - rc = %d",replica_dn, +rc); } free(replica_dn); @@ -522,9 +524,11 @@ SaAisErrorT delete_runtime_ckpt_object(C osaf_extended_name_lend(ckpt_node->ckpt_name, &ckpt_name); rc = immutil_saImmOiRtObjectDelete(immOiHandle, &ckpt_name); - if (rc != SA_AIS_OK) + if (rc != SA_AIS_OK) { LOG_ER("Deleting run time object %s failed - rc = %d", ckpt_node->ckpt_name, rc); - + } else { + TRACE("Deleting run time object %s success - rc = %d", ckpt_node->ckpt_name, rc); + } return rc; } @@ -917,11 +921,11 @@ SaAisErrorT cpd_clean_checkpoint_objects /* Delete the runtime object and its children. */ rc = immutil_saImmOiRtObjectDelete(cb->immOiHandle, &object_name); if (rc == SA_AIS_OK) { - TRACE("Object \"%s\" deleted", (char *) osaf_extended_name_borrow(&object_name)); - } else { - LOG_ER("%s saImmOiRtObjectDelete for \"%s\" FAILED %d", - __FUNCTION__, (char *) osaf_extended_name_borrow(&object_name), rc); - } + TRACE("saImmOiRtObjectDelete \"%s\" deleted Successfully", (char *) osaf_extended_name_borrow(&object_name)); + } else { + LOG_ER("%s saImmOiRtObjectDelete for \"%s\" FAILED %d", + __FUNCTION__, (char *) osaf_extended_name_borrow(&object_name), rc); + } } if (rc != SA_AIS_ERR_NOT_EXIST) { diff --git a/osaf/services/saf/cpsv/cpd/cpd_proc.c b/osaf/services/saf/cpsv/cpd/cpd_proc.c --- a/osaf/services/saf/cpsv/cpd/cpd_proc.c +++ b/osaf/services/saf/cpsv/cpd/cpd_proc.c @@ -809,6 +809,11 @@ uint32_t cpd_process_cpnd_down(CPD_CB *c send_evt.info.cpnd.info.ckpt_del.mds_dest = *cpnd_dest; if (ckpt_node->dest_cnt == 0) { TRACE_1("cpd ckpt del success for ckpt_id:%llx",ckpt_node->ckpt_id); + /* Delete reploc fist*/ + cpd_ckpt_reploc_get(&cb->ckpt_reploc_tree, &key_info, &rep_info); + if (rep_info) { + cpd_ckpt_reploc_node_delete(cb, rep_info, ckpt_node->is_unlink_set); + } cpd_ckpt_map_node_get(&cb->ckpt_map_tree, ckpt_node->ckpt_name, &map_info); /* Remove the ckpt_node */ @@ -875,7 +880,7 @@ uint32_t cpd_process_cpnd_down(CPD_CB *c /* Send it to CPD(s), by sending ckpt_id = 0 */ /* This is to delete the node from reploc_tree */ cpd_ckpt_reploc_get(&cb->ckpt_reploc_tree, &key_info, &rep_info); - if (rep_info) { + if ((rep_info) && (ckpt_node)) { cpd_ckpt_reploc_node_delete(cb, rep_info, ckpt_node->is_unlink_set); } @@ -1238,6 +1243,8 @@ uint32_t cpd_ckpt_reploc_imm_object_dele LOG_ER("Deleting run time object %s FAILED", replica_dn); free(replica_dn); return NCSCC_RC_FAILURE; + } else { + TRACE("Deleting run time object %s SUCCESS", replica_dn); } free(replica_dn); } ------------------------------------------------------------------------------ _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel