From: Yunkai Zhang <qiushu....@taobao.com>

Sheep master will fill joining nodes in join_message when a sheep joins.

Newly added sheep get joining nodes from join_message and use it to initialize
and recalculate all_nodes array which will be used to generate 
current_vnode_info
in cluster_enable_recover().

Signed-off-by: Yunkai Zhang <qiushu....@taobao.com>
---
 include/internal_proto.h | 10 ++++++----
 sheep/group.c            | 39 +++++++++++++++++++++++++++++++--------
 sheep/ops.c              |  1 +
 3 files changed, 38 insertions(+), 12 deletions(-)

diff --git a/include/internal_proto.h b/include/internal_proto.h
index 83d98f1..717fb79 100644
--- a/include/internal_proto.h
+++ b/include/internal_proto.h
@@ -194,6 +194,7 @@ struct join_message {
        uint16_t nr_nodes;
        uint16_t nr_failed_nodes;
        uint16_t nr_delayed_nodes;
+       uint16_t nr_joining_nodes;
        uint16_t cluster_flags;
        uint32_t cluster_status;
        uint32_t epoch;
@@ -204,10 +205,11 @@ struct join_message {
 
        /*
         * A joining sheep puts the local node list here, which is nr_nodes
-        * entries long.  After the master replies it will contain the list of
-        * nodes that attempted to join but failed the join process.  The
-        * number of entries in that case is nr_failed_nodes, which by
-        * defintion must be smaller than nr_nodes.
+        * entries long. After the master replies it will contain the list of
+        * nodes by following order:
+        * [ failed  nodes ]: size = nr_failed_nodes
+        * [ delayed nodes ]: size = nr_delayed_nodes
+        * [ joining nodes ]: size = nr_joining_nodes
         */
        struct sd_node nodes[];
 };
diff --git a/sheep/group.c b/sheep/group.c
index cb244f7..ad3447d 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -767,16 +767,32 @@ static void get_vdi_bitmap(struct sd_node *nodes, size_t 
nr_nodes)
        queue_work(sys->block_wqueue, &w->work);
 }
 
-static void prepare_recovery(struct sd_node *joined,
-                                   struct sd_node *nodes, size_t nr_nodes)
+static void prepare_recovery(struct join_message *jm,
+                                 struct sd_node *joined,
+                                 struct sd_node *nodes, size_t nr_nodes)
 {
-       int i;
+       int i, j, n, found;
 
        joining_nodes[nr_joining_nodes++] = *joined;
+
        if (!nr_all_nodes) {
-               /* exclude the newly added one */
-               for (i = 0; i < nr_nodes; i++) {
-                       if (!node_eq(nodes + i, joined))
+               /* initialize joining_nodes */
+               n = jm->nr_failed_nodes + jm->nr_delayed_nodes;
+               memcpy(&joining_nodes[nr_joining_nodes], &jm->nodes[n],
+                      jm->nr_joining_nodes * sizeof(*joining_nodes));
+               nr_joining_nodes += jm->nr_joining_nodes;
+
+               /* initialize all_nodes */
+               for (found = 0, i = 0; i < nr_nodes; i++) {
+                       /* exclude all joining nodes */
+                       for (j = 0; j < nr_joining_nodes; j++) {
+                               if (node_eq(nodes + i, joining_nodes + j)) {
+                                       found = 1;
+                                       break;
+                               }
+                       }
+
+                       if (!found)
                                all_nodes[nr_all_nodes++] = nodes[i];
                }
        }
@@ -866,7 +882,7 @@ static void update_cluster_info(struct join_message *msg,
                                start_recovery(current_vnode_info,
                                               old_vnode_info);
                        } else
-                               prepare_recovery(joined, nodes, nr_nodes);
+                               prepare_recovery(msg, joined, nodes, nr_nodes);
                }
 
                if (have_enough_zones())
@@ -1013,7 +1029,14 @@ enum cluster_join_result sd_check_join_cb(struct sd_node 
*joining,
            (ret == CJ_RES_SUCCESS || ret == CJ_RES_JOIN_LATER))
                format_exceptional_node_list(jm);
 
-       n = jm->nr_failed_nodes + jm->nr_delayed_nodes;
+       if (sys->disable_recovery) {
+               n = jm->nr_failed_nodes + jm->nr_delayed_nodes;
+               memcpy(&jm->nodes[n], joining_nodes,
+                      nr_joining_nodes * sizeof(*joining_nodes));
+               jm->nr_joining_nodes = nr_joining_nodes;
+       }
+
+       n = jm->nr_failed_nodes + jm->nr_delayed_nodes + jm->nr_joining_nodes;
        *opaque_len = sizeof(*jm) + n * sizeof(jm->nodes[0]);
        return ret;
 }
diff --git a/sheep/ops.c b/sheep/ops.c
index 0cddf66..b2e9c69 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -292,6 +292,7 @@ static int cluster_enable_recover(const struct sd_req *req,
                put_vnode_info(old_vnode_info);
        }
 
+       nr_all_nodes = 0;
        nr_joining_nodes = 0;
        sys->disable_recovery = 0;
        return SD_RES_SUCCESS;
-- 
1.7.11.2

-- 
sheepdog mailing list
sheepdog@lists.wpkg.org
http://lists.wpkg.org/mailman/listinfo/sheepdog

Reply via email to