From: MORITA Kazutaka <morita.kazut...@lab.ntt.co.jp>

Currently, cluster_wait_for_join_check() always returns
CJ_RES_MASTER_TRANSFER if the joining node has a newer epoch.
However, we have to take into account that the node has a wrong epoch
(e.g. the node comes from a different cluster).

Signed-off-by: MORITA Kazutaka <morita.kazut...@lab.ntt.co.jp>
---
 sheep/group.c |   29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/sheep/group.c b/sheep/group.c
index a78dd15..920b4a1 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -525,12 +525,6 @@ static int cluster_sanity_check(struct join_message *jm)
                return CJ_RES_FAIL;
        }
 
-       if (jm->epoch > local_epoch) {
-               sd_eprintf("joining node epoch too large: %"
-                          PRIu32 " vs %" PRIu32, jm->epoch, local_epoch);
-               return CJ_RES_FAIL;
-       }
-
        if (jm->nr_copies != local_nr_copies) {
                sd_eprintf("joining node nr_copies doesn't match: %u vs %u",
                           jm->nr_copies, local_nr_copies);
@@ -543,6 +537,21 @@ static int cluster_sanity_check(struct join_message *jm)
                return CJ_RES_FAIL;
        }
 
+       if (jm->epoch > local_epoch) {
+               if (sys->status == SD_STATUS_WAIT_FOR_JOIN) {
+                       /* The joining node will be a master */
+                       sd_eprintf("transfer mastership (%d, %d)", jm->epoch,
+                                  local_epoch);
+                       return CJ_RES_MASTER_TRANSFER;
+               } else {
+                       /* Something goes wrong with sheepdog */
+                       sd_printf(SDOG_ALERT, "joining node epoch too large: %"
+                                  PRIu32 " vs %" PRIu32, jm->epoch,
+                                  local_epoch);
+                       return CJ_RES_FAIL;
+               }
+       }
+
        return CJ_RES_SUCCESS;
 }
 
@@ -559,14 +568,8 @@ static int cluster_wait_for_join_check(const struct 
sd_node *joined,
                return CJ_RES_JOIN_LATER;
 
        ret = cluster_sanity_check(jm);
-       if (ret != CJ_RES_SUCCESS)  {
-               if (jm->epoch > sys->epoch) {
-                       sd_eprintf("transfer mastership (%d, %d)", jm->epoch,
-                                  sys->epoch);
-                       return CJ_RES_MASTER_TRANSFER;
-               }
+       if (ret != CJ_RES_SUCCESS)
                return ret;
-       }
 
        nr_local_entries = epoch_log_read(jm->epoch, local_entries,
                                          sizeof(local_entries));
-- 
1.7.9.5

-- 
sheepdog mailing list
sheepdog@lists.wpkg.org
http://lists.wpkg.org/mailman/listinfo/sheepdog

Reply via email to