osaf/services/saf/cpsv/cpd/cpd_imm.c  |  20 ++++++++++++--------
 osaf/services/saf/cpsv/cpd/cpd_proc.c |   9 ++++++++-
 2 files changed, 20 insertions(+), 9 deletions(-)


Bug :
While cpd processing cpnd down for  COLLOCATED  cktp  and that checkpoint
only exist on the went down cpnd ( no others Node opened this checkpoint in 
cluster) ,
then cpd  removes  that checkpoint and replica completely.

In such case the current logic has as bug,  fist it removes ckpt node and then 
replica,
this is causing deletion of parent object safCkpt=...,*  first , then child 
object safReplica=...,safCkpt=...,* next.

as we know IMM removes child if parent is removed ,so this is causing the issue 
out of
sequence remove of safReplica object and ERR_NOT_EXIST  is returned.

Fix :
While cpd removing  that checkpoint and replica completely ,
follow the sequence of  child object safReplica=...,safCkpt=...,*  fist then  
parent object safCkpt=...,* next.

This is focused fix , my be we need to review complete code for such 
occurrences , if found
will be addressed in new ticket.

diff --git a/osaf/services/saf/cpsv/cpd/cpd_imm.c 
b/osaf/services/saf/cpsv/cpd/cpd_imm.c
--- a/osaf/services/saf/cpsv/cpd/cpd_imm.c
+++ b/osaf/services/saf/cpsv/cpd/cpd_imm.c
@@ -400,7 +400,9 @@ SaAisErrorT delete_runtime_replica_objec
        osaf_extended_name_lend(replica_dn, &replica_name);
        rc = immutil_saImmOiRtObjectDelete(immOiHandle, &replica_name); 
        if (rc != SA_AIS_OK) {
-               LOG_ER("Deleting run time object %s Failed - rc = 
%d",replica_dn, rc);
+               LOG_ER("Deleting run time object %s Failed-1 - rc = 
%d",replica_dn, rc);
+       } else {
+               TRACE("Deleting run time object %s Success-1 - rc = 
%d",replica_dn, rc);
        }
 
        free(replica_dn);
@@ -522,9 +524,11 @@ SaAisErrorT delete_runtime_ckpt_object(C
        osaf_extended_name_lend(ckpt_node->ckpt_name, &ckpt_name);
 
        rc =  immutil_saImmOiRtObjectDelete(immOiHandle, &ckpt_name);
-       if (rc != SA_AIS_OK)
+       if (rc != SA_AIS_OK) {
                LOG_ER("Deleting run time object %s failed - rc = %d", 
ckpt_node->ckpt_name, rc);
-
+       } else {
+               TRACE("Deleting run time object %s success - rc = %d", 
ckpt_node->ckpt_name, rc);
+       }
        return rc;
 }
 
@@ -917,11 +921,11 @@ SaAisErrorT cpd_clean_checkpoint_objects
                /* Delete the runtime object and its children. */
                rc = immutil_saImmOiRtObjectDelete(cb->immOiHandle, 
&object_name);
                if (rc == SA_AIS_OK) {
-                       TRACE("Object \"%s\" deleted", (char *) 
osaf_extended_name_borrow(&object_name));
-               } else {
-                       LOG_ER("%s saImmOiRtObjectDelete for \"%s\" FAILED %d",
-                                       __FUNCTION__, (char *) 
osaf_extended_name_borrow(&object_name), rc);
-               }
+                      TRACE("saImmOiRtObjectDelete \"%s\" deleted 
Successfully", (char *) osaf_extended_name_borrow(&object_name));
+              } else {
+                      LOG_ER("%s saImmOiRtObjectDelete for \"%s\" FAILED %d",
+                                      __FUNCTION__, (char *) 
osaf_extended_name_borrow(&object_name), rc);
+              }
        }
 
        if (rc != SA_AIS_ERR_NOT_EXIST) {
diff --git a/osaf/services/saf/cpsv/cpd/cpd_proc.c 
b/osaf/services/saf/cpsv/cpd/cpd_proc.c
--- a/osaf/services/saf/cpsv/cpd/cpd_proc.c
+++ b/osaf/services/saf/cpsv/cpd/cpd_proc.c
@@ -809,6 +809,11 @@ uint32_t cpd_process_cpnd_down(CPD_CB *c
                        send_evt.info.cpnd.info.ckpt_del.mds_dest = *cpnd_dest;
                        if (ckpt_node->dest_cnt == 0) {
                                TRACE_1("cpd ckpt del success for 
ckpt_id:%llx",ckpt_node->ckpt_id);
+                               /* Delete reploc fist*/
+                               cpd_ckpt_reploc_get(&cb->ckpt_reploc_tree, 
&key_info, &rep_info);
+                               if (rep_info) {
+                                       cpd_ckpt_reploc_node_delete(cb, 
rep_info, ckpt_node->is_unlink_set);
+                               }
                                cpd_ckpt_map_node_get(&cb->ckpt_map_tree, 
ckpt_node->ckpt_name, &map_info);
 
                                /* Remove the ckpt_node */
@@ -875,7 +880,7 @@ uint32_t cpd_process_cpnd_down(CPD_CB *c
                /* Send it to CPD(s), by sending ckpt_id = 0 */
                /* This is to delete the node from reploc_tree */
                cpd_ckpt_reploc_get(&cb->ckpt_reploc_tree, &key_info, 
&rep_info);
-               if (rep_info) {
+               if ((rep_info) && (ckpt_node)) {
                        cpd_ckpt_reploc_node_delete(cb, rep_info, 
ckpt_node->is_unlink_set);
                }
 
@@ -1238,6 +1243,8 @@ uint32_t cpd_ckpt_reploc_imm_object_dele
                        LOG_ER("Deleting run time object %s FAILED", 
replica_dn);
                        free(replica_dn);
                        return NCSCC_RC_FAILURE;
+               } else {
+                       TRACE("Deleting run time object %s SUCCESS", 
replica_dn);       
                }
                free(replica_dn);
        }

------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to