If cluster members change frequently, especially the first two or more nodes of the cluster leave and join frequently, the following error message will be found in sheep.log
Failed to find requested tag, remote address: 127.0.0.1:7001, op name: GET_EPOCH This patch adds epoch recovery to reduce these errors. To achieve it, it is better to unify epoch file because rb_node is unused in it. And then, we can create a new epoch file as long as the file is not found in local node. Signed-off-by: Ruoyu <[email protected]> --- sheep/group.c | 7 +++++-- sheep/store.c | 11 ++++++++++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/sheep/group.c b/sheep/group.c index 27e3574..6e4f8ad 100644 --- a/sheep/group.c +++ b/sheep/group.c @@ -359,7 +359,7 @@ int epoch_log_read_remote(uint32_t epoch, struct sd_node *nodes, int len, rb_for_each_entry(node, &vinfo->nroot, rb) { struct sd_req hdr; struct sd_rsp *rsp = (struct sd_rsp *)&hdr; - int nodes_len; + int nodes_len, nr_nodes; if (node_is_local(node)) continue; @@ -382,7 +382,10 @@ int epoch_log_read_remote(uint32_t epoch, struct sd_node *nodes, int len, memcpy(timestamp, buf + nodes_len, sizeof(*timestamp)); free(buf); - return nodes_len / sizeof(struct sd_node); + nr_nodes = nodes_len / sizeof(struct sd_node); + /* epoch file is missing in local node, try to create one */ + update_epoch_log(epoch, nodes, nr_nodes); + return nr_nodes; } /* diff --git a/sheep/store.c b/sheep/store.c index 70fddb8..87913d4 100644 --- a/sheep/store.c +++ b/sheep/store.c @@ -19,7 +19,7 @@ LIST_HEAD(store_drivers); int update_epoch_log(uint32_t epoch, struct sd_node *nodes, size_t nr_nodes) { - int ret, len, nodes_len; + int ret, len, nodes_len, i; time_t t; char path[PATH_MAX], *buf; @@ -33,6 +33,15 @@ int update_epoch_log(uint32_t epoch, struct sd_node *nodes, size_t nr_nodes) memcpy(buf, nodes, nodes_len); memcpy(buf + nodes_len, &t, sizeof(time_t)); + /* + * rb_node is unused in epoch file, + * unifying it is good for epoch recovery + */ + for (i = 0; i < nr_nodes; i++) + memset(buf + i * sizeof(struct sd_node) + + offsetof(struct sd_node, rb), + '\0', sizeof(struct rb_node)); + snprintf(path, sizeof(path), "%s%08u", epoch_path, epoch); ret = atomic_create_and_write(path, buf, len, true); -- 1.8.3.2 -- sheepdog mailing list [email protected] http://lists.wpkg.org/mailman/listinfo/sheepdog
