On 10/17/2011 01:27 PM, MORITA Kazutaka wrote:

> At Sun, 16 Oct 2011 18:35:11 +0800,
> Liu Yuan wrote:
>>
>> From: Liu Yuan <[email protected]>
>>
>> And add a helper func to do the sanity check for cluster status.
>>
>> Signed-off-by: Liu Yuan <[email protected]>
>> ---
>>  sheep/group.c |  138 
>> +++++++++++++++++++++++++++++----------------------------
>>  1 files changed, 70 insertions(+), 68 deletions(-)
>>
>> diff --git a/sheep/group.c b/sheep/group.c
>> index eb64207..a054c22 100644
>> --- a/sheep/group.c
>> +++ b/sheep/group.c
>> @@ -526,101 +526,97 @@ err:
>>      return ret;
>>  }
>>  
>> +static int cluster_sanity_check(struct sheepdog_node_list_entry *entries,
>> +                         int nr_entries, uint64_t ctime, uint32_t epoch)
>> +{
>> +    int ret = SD_RES_SUCCESS, nr_local_entries;
>> +    struct sheepdog_node_list_entry local_entries[SD_MAX_NODES];
>> +    uint32_t lepoch;
>> +
>> +    if (sys->status == SD_STATUS_WAIT_FOR_FORMAT ||
>> +        sys->status == SD_STATUS_SHUTDOWN ||
>> +            goto out;
> 
> Syntax error.
> 
>> +    /* When the joinning node is newly created, we need to check nothing. */
>> +    if (nr_entries == 0)
>> +            goto out;
>> +
>> +    if (ctime != get_cluster_ctime()) {
>> +            ret = SD_RES_INVALID_CTIME;
>> +            goto out;
>> +    }
>> +
>> +    lepoch = get_latest_epoch();
>> +    if (epoch > lepoch) {
>> +            ret = SD_RES_OLD_NODE_VER;
>> +            goto out;
>> +    }
>> +
>> +    if (sys->status == SD_STATUS_OK)
>> +            goto out;
>> +
>> +    if (epoch < lepoch) {
>> +            ret = SD_RES_NEW_NODE_VER;
>> +            goto out;
>> +    }
>> +
>> +    nr_local_entries = epoch_log_read(epoch, (char *)local_entries,
>> +                    sizeof(local_entries));
>> +    nr_local_entries /= sizeof(local_entries[0]);
>> +
>> +    if (nr_entries != nr_local_entries ||
>> +        memcmp(entries, local_entries, sizeof(entries[0]) * nr_entries) != 
>> 0) {
>> +            ret = SD_RES_INVALID_EPOCH;
>> +            goto out;
>> +    }
>> +
>> +out:
>> +    return ret;
>> +}
>> +
>>  static int get_cluster_status(struct sheepdog_node_list_entry *from,
>>                            struct sheepdog_node_list_entry *entries,
>>                            int nr_entries, uint64_t ctime, uint32_t epoch,
>>                            uint32_t *status, uint8_t *inc_epoch)
>>  {
>> -    int i;
>> -    int nr_local_entries, nr_leave_entries;
>> +    int i, ret = SD_RES_SUCCESS;
>> +    int nr, nr_local_entries, nr_leave_entries;
>>      struct sheepdog_node_list_entry local_entries[SD_MAX_NODES];
>>      struct node *node;
>> -    uint32_t local_epoch;
>>      char str[256];
>>  
>>      *status = sys->status;
>>      if (inc_epoch)
>>              *inc_epoch = 0;
>>  
>> +    ret = cluster_sanity_check(entries, nr_entries, ctime, epoch);
>> +    if (ret)
>> +            goto out;
>> +
>>      switch (sys->status) {
>>      case SD_STATUS_OK:
>>              if (inc_epoch)
>>                      *inc_epoch = 1;
>> -
>> -            if (nr_entries == 0)
>> -                    break;
>> -
>> -            if (ctime != get_cluster_ctime()) {
>> -                    eprintf("joining node has invalid ctime, %s\n",
>> -                            addr_to_str(str, sizeof(str), from->addr, 
>> from->port));
>> -                    return SD_RES_INVALID_CTIME;
>> -            }
>> -
>> -            local_epoch = get_latest_epoch();
>> -            if (epoch > local_epoch) {
>> -                    eprintf("sheepdog is running with older epoch, 
>> %"PRIu32" %"PRIu32" %s\n",
>> -                            epoch, local_epoch,
>> -                            addr_to_str(str, sizeof(str), from->addr, 
>> from->port));
>> -                    return SD_RES_OLD_NODE_VER;
>> -            }
>>              break;
>>      case SD_STATUS_WAIT_FOR_FORMAT:
>> -            if (nr_entries != 0) {
>> -                    eprintf("joining node is not clean, %s\n",
>> -                            addr_to_str(str, sizeof(str), from->addr, 
>> from->port));
>> -                    return SD_RES_NOT_FORMATTED;
>> -            }
>> +            if (nr_entries != 0)
>> +                    ret = SD_RES_NOT_FORMATTED;
>>              break;
>>      case SD_STATUS_WAIT_FOR_JOIN:
>> -            if (ctime != get_cluster_ctime()) {
>> -                    eprintf("joining node has invalid ctime, %s\n",
>> -                            addr_to_str(str, sizeof(str), from->addr, 
>> from->port));
>> -                    return SD_RES_INVALID_CTIME;
>> -            }
>> -
>> -            local_epoch = get_latest_epoch();
>> -            if (epoch > local_epoch) {
>> -                    eprintf("sheepdog is waiting with older epoch, 
>> %"PRIu32" %"PRIu32" %s\n",
>> -                            epoch, local_epoch,
>> -                            addr_to_str(str, sizeof(str), from->addr, 
>> from->port));
>> -                    return SD_RES_OLD_NODE_VER;
>> -            } else if (epoch < local_epoch) {
>> -                    eprintf("sheepdog is waiting with newer epoch, 
>> %"PRIu32" %"PRIu32" %s\n",
>> -                            epoch, local_epoch,
>> -                            addr_to_str(str, sizeof(str), from->addr, 
>> from->port));
>> -                    return SD_RES_NEW_NODE_VER;
>> -            }
>> -
>> +            nr = get_nodes_nr_from(&sys->sd_node_list) + 1;
>>              nr_local_entries = epoch_log_read(epoch, (char *)local_entries,
>>                                                sizeof(local_entries));
>> -            nr_local_entries /= sizeof(local_entries[0]);
> 
> We can't remove this line because epoch_log_read() returns the number
> of bytes read.  Perhaps, should we change epoch_log_read() to return
> the number of nodes?
> 


Hmm, it was removed accidentally by me. So this proves it quite
error-prone. So yeah, I think so, return the number of nodes. I am going
to prepare it in this patch series.

Thanks,
Yuan
-- 
sheepdog mailing list
[email protected]
http://lists.wpkg.org/mailman/listinfo/sheepdog

Reply via email to