Sometimes we want to quickly check whether some of the vdi objects or data objects are lost due to unexpected issue.
Although vdi check will do, it spends a lot of time because of too many client-server communication. And the probability of triggering data auto fixing is quite low since the writing process is strong consistency. Therefore, the new option -e (--exist) check whether all the objects related to the vdi are existed or not. It is fast because it submit the batched object id only one time per node. I think this is enough for the situation. Usage: dog vdi check -e <vdiname> Example: $ dog vdi check -e test test is fine, no object is missing. $ dog vdi check -e ucweb [127.0.0.1:7001] oid 80b8071d00000000 is missing. [127.0.0.1:7001] oid 00b8071d000000ee is missing. ucweb lost 2 object(s). v4 is rebased on the latest master and 1. helping message is updated to tell user vdi check -e will not comparing nor repairing objects 2. the function do_obj_check is renamed as do_vdi_check_exist 3. a new command flag, SD_FLAG_CMD_FILTER, is introduced because both read and write is not so appropriate 4. the value of SD_FLAG_CMD_FILTER is changed because the original one is occupied Signed-off-by: Ruoyu <[email protected]> --- dog/vdi.c | 115 ++++++++++++++++++++++++++++++++++++++++++++++- include/internal_proto.h | 9 ++++ include/sheep.h | 6 +++ include/sheepdog_proto.h | 1 + lib/net.c | 3 ++ sheep/ops.c | 31 +++++++++++++ 6 files changed, 163 insertions(+), 2 deletions(-) diff --git a/dog/vdi.c b/dog/vdi.c index 97ae63c..93ae763 100644 --- a/dog/vdi.c +++ b/dog/vdi.c @@ -21,6 +21,8 @@ #include "sha1.h" #include "fec.h" +struct rb_root oid_tree = RB_ROOT; + static struct sd_option vdi_options[] = { {'P', "prealloc", false, "preallocate all the data objects"}, {'n', "no-share", false, "share nothing with its parent"}, @@ -34,6 +36,8 @@ static struct sd_option vdi_options[] = { {'f', "force", false, "do operation forcibly"}, {'y', "hyper", false, "create a hyper volume"}, {'o', "oid", true, "specify the object id of the tracking object"}, + {'e', "exist", false, "only check objects exist or not,\n" + " neither comparing nor repairing"}, { 0, NULL, false, NULL }, }; @@ -53,6 +57,7 @@ static struct vdi_cmd_data { uint8_t store_policy; uint64_t oid; bool no_share; + bool exist; } vdi_cmd_data = { ~0, }; struct get_vdi_info { @@ -985,6 +990,106 @@ out: return ret; } +#define OIDS_INIT_LENGTH 1024 + +static void save_oid(uint64_t oid, int copies) +{ + const struct sd_vnode *vnodes[SD_MAX_COPIES]; + struct oid_entry *entry; + + oid_to_vnodes(oid, &sd_vroot, copies, vnodes); + for (int i = 0; i < copies; i++) { + struct oid_entry key = { + .node = (struct sd_node *) vnodes[i]->node + }; + entry = rb_search(&oid_tree, &key, rb, oid_entry_cmp); + if (!entry) + panic("rb_search() failure."); + + if (entry->last >= entry->end) { + entry->end *= 2; + entry->oids = xrealloc(entry->oids, + sizeof(uint64_t) * entry->end); + } + entry->oids[entry->last] = oid; + entry->last++; + } +} + +static void build_oid_tree(const struct sd_inode *inode) +{ + uint32_t max_idx, vid; + uint64_t oid; + struct sd_node *node; + struct oid_entry *entry; + int copies = min((int)inode->nr_copies, sd_zones_nr); + + rb_for_each_entry(node, &sd_nroot, rb) { + entry = xmalloc(sizeof(*entry)); + entry->node = node; + entry->oids = xmalloc(sizeof(uint64_t) * OIDS_INIT_LENGTH); + entry->end = OIDS_INIT_LENGTH; + entry->last = 0; + rb_insert(&oid_tree, entry, rb, oid_entry_cmp); + } + + save_oid(vid_to_vdi_oid(inode->vdi_id), copies); + max_idx = count_data_objs(inode); + for (uint32_t idx = 0; idx < max_idx; idx++) { + vid = sd_inode_get_vid(inode, idx); + if (vid == 0) + continue; + oid = vid_to_data_oid(vid, idx); + save_oid(oid, copies); + } +} + +static void destroy_oid_tree(void) +{ + struct oid_entry *entry; + + rb_for_each_entry(entry, &oid_tree, rb) + free(entry->oids); + rb_destroy(&oid_tree, struct oid_entry, rb); +} + +static int do_vdi_check_exist(const struct sd_inode *inode) +{ + int total = 0; + struct oid_entry *entry; + struct sd_req hdr; + struct sd_rsp *rsp = (struct sd_rsp *)&hdr; + + build_oid_tree(inode); + + rb_for_each_entry(entry, &oid_tree, rb) { + sd_init_req(&hdr, SD_OP_OIDS_EXIST); + hdr.data_length = sizeof(uint64_t) * entry->last; + hdr.flags = SD_FLAG_CMD_FILTER; + int ret = dog_exec_req(&entry->node->nid, &hdr, entry->oids); + if (ret < 0) + panic("dog_exec_req() failure."); + + int n = rsp->data_length / sizeof(uint64_t); + total += n; + for (int i = 0; i < n; i++) + printf("[%s] oid %016"PRIx64" is missing.\n", + addr_to_str(entry->node->nid.addr, + entry->node->nid.port), + entry->oids[i]); + } + + destroy_oid_tree(); + + if (total == 0) { + printf("%s is fine, no object is missing.\n", inode->name); + return EXIT_SUCCESS; + } else { + printf("%s lost %d object(s).\n", inode->name, total); + return EXIT_FAILURE; + } +} + static int do_track_object(uint64_t oid, uint8_t nr_copies) { int i, j, ret; @@ -1873,7 +1978,10 @@ static int vdi_check(int argc, char **argv) goto out; } - ret = do_vdi_check(inode); + if (vdi_cmd_data.exist) + ret = do_vdi_check_exist(inode); + else + ret = do_vdi_check(inode); out: free(inode); return ret; @@ -2591,7 +2699,7 @@ static int vdi_alter_copy(int argc, char **argv) } static struct subcommand vdi_cmd[] = { - {"check", "<vdiname>", "sapht", "check and repair image's consistency", + {"check", "<vdiname>", "seapht", "check and repair image's consistency", NULL, CMD_NEED_NODELIST|CMD_NEED_ARG, vdi_check, vdi_options}, {"create", "<vdiname> <size>", "Pycaphrvt", "create an image", @@ -2735,6 +2843,9 @@ static int vdi_parser(int ch, const char *opt) exit(EXIT_FAILURE); } break; + case 'e': + vdi_cmd_data.exist = true; + break; } return 0; diff --git a/include/internal_proto.h b/include/internal_proto.h index 2affc42..37afb46 100644 --- a/include/internal_proto.h +++ b/include/internal_proto.h @@ -107,6 +107,7 @@ #define SD_OP_PREVENT_INODE_UPDATE 0xC3 #define SD_OP_ALLOW_INODE_UPDATE 0xC4 #define SD_OP_REPAIR_REPLICA 0xC5 +#define SD_OP_OIDS_EXIST 0xC6 /* internal flags for hdr.flags, must be above 0x80 */ #define SD_FLAG_CMD_RECOVERY 0x0080 @@ -180,6 +181,14 @@ struct sd_node { #endif }; +struct oid_entry { + struct rb_node rb; + struct sd_node *node; /* key */ + uint64_t *oids; /* object id array */ + int end; /* idx to the end of the allocated oid array */ + int last; /* idx to the last element of the oid array */ +}; + /* * A joining sheep multicasts the local cluster info. Then, the existing nodes * reply the latest cluster info which is unique among all of the nodes. diff --git a/include/sheep.h b/include/sheep.h index e062372..5b136a8 100644 --- a/include/sheep.h +++ b/include/sheep.h @@ -199,6 +199,12 @@ static inline int node_cmp(const struct sd_node *node1, return node_id_cmp(&node1->nid, &node2->nid); } +static inline int oid_entry_cmp(const struct oid_entry *entry1, + const struct oid_entry *entry2) +{ + return node_cmp(entry1->node, entry2->node); +} + static inline bool node_eq(const struct sd_node *a, const struct sd_node *b) { return node_cmp(a, b) == 0; diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h index d6a8d35..b4e1e13 100644 --- a/include/sheepdog_proto.h +++ b/include/sheepdog_proto.h @@ -50,6 +50,7 @@ #define SD_FLAG_CMD_COW 0x02 #define SD_FLAG_CMD_CACHE 0x04 #define SD_FLAG_CMD_DIRECT 0x08 /* don't use object cache */ +#define SD_FLAG_CMD_FILTER 0x11 /* write & read, output is subset of input */ /* flags above 0x80 are sheepdog-internal */ #define SD_RES_SUCCESS 0x00 /* Success */ diff --git a/lib/net.c b/lib/net.c index b32e022..552e945 100644 --- a/lib/net.c +++ b/lib/net.c @@ -334,6 +334,9 @@ int exec_req(int sockfd, struct sd_req *hdr, void *data, if (hdr->flags & SD_FLAG_CMD_WRITE) { wlen = hdr->data_length; rlen = 0; + } else if (hdr->flags & SD_FLAG_CMD_FILTER) { + wlen = hdr->data_length; + rlen = hdr->data_length; } else { wlen = 0; rlen = hdr->data_length; diff --git a/sheep/ops.c b/sheep/ops.c index dc10f0f..3d20c7d 100644 --- a/sheep/ops.c +++ b/sheep/ops.c @@ -1056,6 +1056,30 @@ static int local_oid_exist(struct request *req) return SD_RES_NO_OBJ; } +static int local_oids_exist(const struct sd_req *req, struct sd_rsp *rsp, + void *data) +{ + struct request *r = container_of(req, struct request, rq); + uint64_t *oids = (uint64_t *) data; + uint8_t ec_index; + int i, j, n = req->data_length / sizeof(uint64_t); + + for (i = 0, j = 0; i < n; i++) { + ec_index = local_ec_index(r->vinfo, oids[i]); + if (is_erasure_oid(oids[i]) && ec_index == SD_MAX_COPIES) + oids[j++] = oids[i]; + else if (!sd_store->exist(oids[i], ec_index)) + oids[j++] = oids[i]; + } + + if (j > 0) { + rsp->data_length = sizeof(uint64_t) * j; + return SD_RES_NO_OBJ; + } + + return SD_RES_SUCCESS; +} + static int local_cluster_info(const struct sd_req *req, struct sd_rsp *rsp, void *data) { @@ -1594,6 +1618,13 @@ static struct sd_op_template sd_ops[] = { .process_work = local_oid_exist, }, + [SD_OP_OIDS_EXIST] = { + .name = "OIDS_EXIST", + .type = SD_OP_TYPE_LOCAL, + .force = true, + .process_main = local_oids_exist, + }, + [SD_OP_CLUSTER_INFO] = { .name = "CLUSTER INFO", .type = SD_OP_TYPE_LOCAL, -- 1.8.3.2 -- sheepdog mailing list [email protected] http://lists.wpkg.org/mailman/listinfo/sheepdog
