Problem:

Under certain circumstances cpg does not send group leave messages.

With a big token timeout (tested with token == 5min).
1 start all nodes
2 start ./test/testcpg on all nodes
2 go to the node with the lowest nodeid
3 ifconfig <int> down && killall -9 corosync && /etc/init.d/corosync restart && 
./testcpg
4 the other nodes will not get the cpg leave event
5 testcpg reports an extra cpg group (basically one was not removed)

Solution:
If a member gets removed using the new trans_list and
that member is the node used for syncing (lowest nodeid)
then the next lowest node needs to be chosen for syncing.

David would you mind confirming that this solves your problem?

-Angus

Signed-off-by: Angus Salkeld <asalk...@redhat.com>
---
 services/cpg.c |   36 ++++++++++++++++++++++++++++++++++++
 1 files changed, 36 insertions(+), 0 deletions(-)

diff --git a/services/cpg.c b/services/cpg.c
index ede426f..e9926ac 100644
--- a/services/cpg.c
+++ b/services/cpg.c
@@ -414,6 +414,27 @@ struct req_exec_cpg_downlist {
 
 static struct req_exec_cpg_downlist g_req_exec_cpg_downlist;
 
+static int memb_list_remove_value (unsigned int *list,
+       size_t list_entries, int value)
+{
+       int j;
+       int found = 0;
+
+       for (j = 0; j < list_entries; j++) {
+               if (list[j] == value) {
+                       /* mark next values to be copied down */
+                       found = 1;
+               }
+               else if (found) {
+                       list[j-1] = list[j];
+               }
+       }
+       if (found)
+               return (list_entries - 1);
+       else
+               return list_entries;
+}
+
 static void cpg_sync_init_v2 (
        const unsigned int *trans_list,
        size_t trans_list_entries,
@@ -432,6 +453,21 @@ static void cpg_sync_init_v2 (
                sizeof (unsigned int));
        my_member_list_entries = member_list_entries;
 
+       for (i = 0; i < my_old_member_list_entries; i++) {
+               found = 0;
+               for (j = 0; j < trans_list_entries; j++) {
+                       if (my_old_member_list[i] == trans_list[j]) {
+                               found = 1;
+                               break;
+                       }
+               }
+               if (found == 0) {
+                       my_member_list_entries = memb_list_remove_value (
+                               my_member_list, my_member_list_entries,
+                               my_old_member_list[i]);
+               }
+       }
+
        for (i = 0; i < my_member_list_entries; i++) {
                if (my_member_list[i] < lowest_nodeid) {
                        lowest_nodeid = my_member_list[i];
-- 
1.6.6.1


_______________________________________________
Openais mailing list
Openais@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/openais

Reply via email to