Dear Mahesh,

Reviewed and tested with collocated and non-collocated case, saw problem
fixed and could not find any occurrence.

So ACK from me, tested.

Sincerely,
Hoang

-----Original Message-----
From: mahesh.va...@oracle.com [mailto:mahesh.va...@oracle.com] 
Sent: Wednesday, November 16, 2016 3:58 PM
To: hoang.m...@dektech.com.au
Cc: opensaf-devel@lists.sourceforge.net
Subject: [PATCH 1 of 1] cpsv: on cpnd down fist remove child safReplica
object then parent safCkpt object [#2189]

 osaf/services/saf/cpsv/cpd/cpd_imm.c  |  20 ++++++++++++--------
 osaf/services/saf/cpsv/cpd/cpd_proc.c |   9 ++++++++-
 2 files changed, 20 insertions(+), 9 deletions(-)


Bug :
While cpd processing cpnd down for  COLLOCATED  cktp  and that checkpoint
only exist on the went down cpnd ( no others Node opened this checkpoint in
cluster) , then cpd  removes  that checkpoint and replica completely.

In such case the current logic has as bug,  fist it removes ckpt node and
then replica, this is causing deletion of parent object safCkpt=...,*  first
, then child object safReplica=...,safCkpt=...,* next.

as we know IMM removes child if parent is removed ,so this is causing the
issue out of sequence remove of safReplica object and ERR_NOT_EXIST  is
returned.

Fix :
While cpd removing  that checkpoint and replica completely , follow the
sequence of  child object safReplica=...,safCkpt=...,*  fist then  parent
object safCkpt=...,* next.

This is focused fix , my be we need to review complete code for such
occurrences , if found will be addressed in new ticket.

diff --git a/osaf/services/saf/cpsv/cpd/cpd_imm.c
b/osaf/services/saf/cpsv/cpd/cpd_imm.c
--- a/osaf/services/saf/cpsv/cpd/cpd_imm.c
+++ b/osaf/services/saf/cpsv/cpd/cpd_imm.c
@@ -400,7 +400,9 @@ SaAisErrorT delete_runtime_replica_objec
        osaf_extended_name_lend(replica_dn, &replica_name);
        rc = immutil_saImmOiRtObjectDelete(immOiHandle, &replica_name); 
        if (rc != SA_AIS_OK) {
-               LOG_ER("Deleting run time object %s Failed - rc =
%d",replica_dn, rc);
+               LOG_ER("Deleting run time object %s Failed-1 - rc =
%d",replica_dn, rc);
+       } else {
+               TRACE("Deleting run time object %s Success-1 - rc =
%d",replica_dn, 
+rc);
        }
 
        free(replica_dn);
@@ -522,9 +524,11 @@ SaAisErrorT delete_runtime_ckpt_object(C
        osaf_extended_name_lend(ckpt_node->ckpt_name, &ckpt_name);
 
        rc =  immutil_saImmOiRtObjectDelete(immOiHandle, &ckpt_name);
-       if (rc != SA_AIS_OK)
+       if (rc != SA_AIS_OK) {
                LOG_ER("Deleting run time object %s failed - rc = %d",
ckpt_node->ckpt_name, rc);
-
+       } else {
+               TRACE("Deleting run time object %s success - rc = %d",
ckpt_node->ckpt_name, rc);
+       }
        return rc;
 }
 
@@ -917,11 +921,11 @@ SaAisErrorT cpd_clean_checkpoint_objects
                /* Delete the runtime object and its children. */
                rc = immutil_saImmOiRtObjectDelete(cb->immOiHandle,
&object_name);
                if (rc == SA_AIS_OK) {
-                       TRACE("Object \"%s\" deleted", (char *)
osaf_extended_name_borrow(&object_name));
-               } else {
-                       LOG_ER("%s saImmOiRtObjectDelete for \"%s\" FAILED
%d",
-                                       __FUNCTION__, (char *)
osaf_extended_name_borrow(&object_name), rc);
-               }
+                      TRACE("saImmOiRtObjectDelete \"%s\" deleted
Successfully", (char *) osaf_extended_name_borrow(&object_name));
+              } else {
+                      LOG_ER("%s saImmOiRtObjectDelete for \"%s\" FAILED
%d",
+                                      __FUNCTION__, (char *)
osaf_extended_name_borrow(&object_name), rc);
+              }
        }
 
        if (rc != SA_AIS_ERR_NOT_EXIST) { diff --git
a/osaf/services/saf/cpsv/cpd/cpd_proc.c
b/osaf/services/saf/cpsv/cpd/cpd_proc.c
--- a/osaf/services/saf/cpsv/cpd/cpd_proc.c
+++ b/osaf/services/saf/cpsv/cpd/cpd_proc.c
@@ -809,6 +809,11 @@ uint32_t cpd_process_cpnd_down(CPD_CB *c
                        send_evt.info.cpnd.info.ckpt_del.mds_dest =
*cpnd_dest;
                        if (ckpt_node->dest_cnt == 0) {
                                TRACE_1("cpd ckpt del success for
ckpt_id:%llx",ckpt_node->ckpt_id);
+                               /* Delete reploc fist*/
+                               cpd_ckpt_reploc_get(&cb->ckpt_reploc_tree,
&key_info, &rep_info);
+                               if (rep_info) {
+                                       cpd_ckpt_reploc_node_delete(cb,
rep_info, ckpt_node->is_unlink_set);
+                               }
                                cpd_ckpt_map_node_get(&cb->ckpt_map_tree,
ckpt_node->ckpt_name, &map_info);
 
                                /* Remove the ckpt_node */
@@ -875,7 +880,7 @@ uint32_t cpd_process_cpnd_down(CPD_CB *c
                /* Send it to CPD(s), by sending ckpt_id = 0 */
                /* This is to delete the node from reploc_tree */
                cpd_ckpt_reploc_get(&cb->ckpt_reploc_tree, &key_info,
&rep_info);
-               if (rep_info) {
+               if ((rep_info) && (ckpt_node)) {
                        cpd_ckpt_reploc_node_delete(cb, rep_info,
ckpt_node->is_unlink_set);
                }
 
@@ -1238,6 +1243,8 @@ uint32_t cpd_ckpt_reploc_imm_object_dele
                        LOG_ER("Deleting run time object %s FAILED",
replica_dn);
                        free(replica_dn);
                        return NCSCC_RC_FAILURE;
+               } else {
+                       TRACE("Deleting run time object %s SUCCESS",
replica_dn);    
                }
                free(replica_dn);
        }


------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to