On 10/17/2011 01:27 PM, MORITA Kazutaka wrote: > At Sun, 16 Oct 2011 18:35:11 +0800, > Liu Yuan wrote: >> >> From: Liu Yuan <[email protected]> >> >> And add a helper func to do the sanity check for cluster status. >> >> Signed-off-by: Liu Yuan <[email protected]> >> --- >> sheep/group.c | 138 >> +++++++++++++++++++++++++++++---------------------------- >> 1 files changed, 70 insertions(+), 68 deletions(-) >> >> diff --git a/sheep/group.c b/sheep/group.c >> index eb64207..a054c22 100644 >> --- a/sheep/group.c >> +++ b/sheep/group.c >> @@ -526,101 +526,97 @@ err: >> return ret; >> } >> >> +static int cluster_sanity_check(struct sheepdog_node_list_entry *entries, >> + int nr_entries, uint64_t ctime, uint32_t epoch) >> +{ >> + int ret = SD_RES_SUCCESS, nr_local_entries; >> + struct sheepdog_node_list_entry local_entries[SD_MAX_NODES]; >> + uint32_t lepoch; >> + >> + if (sys->status == SD_STATUS_WAIT_FOR_FORMAT || >> + sys->status == SD_STATUS_SHUTDOWN || >> + goto out; > > Syntax error. > >> + /* When the joinning node is newly created, we need to check nothing. */ >> + if (nr_entries == 0) >> + goto out; >> + >> + if (ctime != get_cluster_ctime()) { >> + ret = SD_RES_INVALID_CTIME; >> + goto out; >> + } >> + >> + lepoch = get_latest_epoch(); >> + if (epoch > lepoch) { >> + ret = SD_RES_OLD_NODE_VER; >> + goto out; >> + } >> + >> + if (sys->status == SD_STATUS_OK) >> + goto out; >> + >> + if (epoch < lepoch) { >> + ret = SD_RES_NEW_NODE_VER; >> + goto out; >> + } >> + >> + nr_local_entries = epoch_log_read(epoch, (char *)local_entries, >> + sizeof(local_entries)); >> + nr_local_entries /= sizeof(local_entries[0]); >> + >> + if (nr_entries != nr_local_entries || >> + memcmp(entries, local_entries, sizeof(entries[0]) * nr_entries) != >> 0) { >> + ret = SD_RES_INVALID_EPOCH; >> + goto out; >> + } >> + >> +out: >> + return ret; >> +} >> + >> static int get_cluster_status(struct sheepdog_node_list_entry *from, >> struct sheepdog_node_list_entry *entries, >> int nr_entries, uint64_t ctime, uint32_t epoch, >> uint32_t *status, uint8_t *inc_epoch) >> { >> - int i; >> - int nr_local_entries, nr_leave_entries; >> + int i, ret = SD_RES_SUCCESS; >> + int nr, nr_local_entries, nr_leave_entries; >> struct sheepdog_node_list_entry local_entries[SD_MAX_NODES]; >> struct node *node; >> - uint32_t local_epoch; >> char str[256]; >> >> *status = sys->status; >> if (inc_epoch) >> *inc_epoch = 0; >> >> + ret = cluster_sanity_check(entries, nr_entries, ctime, epoch); >> + if (ret) >> + goto out; >> + >> switch (sys->status) { >> case SD_STATUS_OK: >> if (inc_epoch) >> *inc_epoch = 1; >> - >> - if (nr_entries == 0) >> - break; >> - >> - if (ctime != get_cluster_ctime()) { >> - eprintf("joining node has invalid ctime, %s\n", >> - addr_to_str(str, sizeof(str), from->addr, >> from->port)); >> - return SD_RES_INVALID_CTIME; >> - } >> - >> - local_epoch = get_latest_epoch(); >> - if (epoch > local_epoch) { >> - eprintf("sheepdog is running with older epoch, >> %"PRIu32" %"PRIu32" %s\n", >> - epoch, local_epoch, >> - addr_to_str(str, sizeof(str), from->addr, >> from->port)); >> - return SD_RES_OLD_NODE_VER; >> - } >> break; >> case SD_STATUS_WAIT_FOR_FORMAT: >> - if (nr_entries != 0) { >> - eprintf("joining node is not clean, %s\n", >> - addr_to_str(str, sizeof(str), from->addr, >> from->port)); >> - return SD_RES_NOT_FORMATTED; >> - } >> + if (nr_entries != 0) >> + ret = SD_RES_NOT_FORMATTED; >> break; >> case SD_STATUS_WAIT_FOR_JOIN: >> - if (ctime != get_cluster_ctime()) { >> - eprintf("joining node has invalid ctime, %s\n", >> - addr_to_str(str, sizeof(str), from->addr, >> from->port)); >> - return SD_RES_INVALID_CTIME; >> - } >> - >> - local_epoch = get_latest_epoch(); >> - if (epoch > local_epoch) { >> - eprintf("sheepdog is waiting with older epoch, >> %"PRIu32" %"PRIu32" %s\n", >> - epoch, local_epoch, >> - addr_to_str(str, sizeof(str), from->addr, >> from->port)); >> - return SD_RES_OLD_NODE_VER; >> - } else if (epoch < local_epoch) { >> - eprintf("sheepdog is waiting with newer epoch, >> %"PRIu32" %"PRIu32" %s\n", >> - epoch, local_epoch, >> - addr_to_str(str, sizeof(str), from->addr, >> from->port)); >> - return SD_RES_NEW_NODE_VER; >> - } >> - >> + nr = get_nodes_nr_from(&sys->sd_node_list) + 1; >> nr_local_entries = epoch_log_read(epoch, (char *)local_entries, >> sizeof(local_entries)); >> - nr_local_entries /= sizeof(local_entries[0]); > > We can't remove this line because epoch_log_read() returns the number > of bytes read. Perhaps, should we change epoch_log_read() to return > the number of nodes? >
Hmm, it was removed accidentally by me. So this proves it quite error-prone. So yeah, I think so, return the number of nodes. I am going to prepare it in this patch series. Thanks, Yuan -- sheepdog mailing list [email protected] http://lists.wpkg.org/mailman/listinfo/sheepdog
