From: Hitoshi Mitake <[email protected]> Current sheepdog doesn't support vdi locking. This patch and succeeding ones revive the feature. With this feature, more than two clients (including QEMU and tgt) are not allowed to open same VDI at the same time.
Signed-off-by: Hitoshi Mitake <[email protected]> --- sheep/group.c | 1 + sheep/ops.c | 16 ++++++++ sheep/sheep_priv.h | 4 ++ sheep/vdi.c | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 126 insertions(+), 0 deletions(-) diff --git a/sheep/group.c b/sheep/group.c index 9bd6808..91fce50 100644 --- a/sheep/group.c +++ b/sheep/group.c @@ -997,6 +997,7 @@ main_fn void sd_leave_handler(const struct sd_node *left, put_vnode_info(old_vnode_info); sockfd_cache_del_node(&left->nid); + unlock_all_vdis(&left->nid); } static void update_node_size(struct sd_node *node) diff --git a/sheep/ops.c b/sheep/ops.c index 51880b9..1056935 100644 --- a/sheep/ops.c +++ b/sheep/ops.c @@ -1242,6 +1242,21 @@ static int local_allow_inode_update(const struct sd_req *req, return SD_RES_SUCCESS; } +static int cluster_lock_vdi(const struct sd_req *req, struct sd_rsp *rsp, + void *data, const struct sd_node *sender) +{ + uint32_t vid = rsp->vdi.vdi_id; + + sd_info("node: %s is locking VDI: %"PRIx32, node_to_str(sender), vid); + + if (!lock_vdi(vid, &sender->nid)) { + sd_err("locking %"PRIx32 "failed", vid); + return SD_RES_VDI_NOT_LOCKED; + } + + return SD_RES_SUCCESS; +} + static struct sd_op_template sd_ops[] = { /* cluster operations */ @@ -1336,6 +1351,7 @@ static struct sd_op_template sd_ops[] = { .name = "LOCK_VDI", .type = SD_OP_TYPE_CLUSTER, .process_work = cluster_get_vdi_info, + .process_main = cluster_lock_vdi, }, [SD_OP_REWEIGHT] = { diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h index 7939667..2c106ee 100644 --- a/sheep/sheep_priv.h +++ b/sheep/sheep_priv.h @@ -344,6 +344,10 @@ int sd_delete_vdi(const char *name); int sd_lookup_vdi(const char *name, uint32_t *vid); int sd_create_hyper_volume(const char *name, uint32_t *vdi_id); +bool lock_vdi(uint32_t vid, const struct node_id *owner); +bool unlock_vdi(uint32_t vid, const struct node_id *owner); +void unlock_all_vdis(const struct node_id *owner); + extern int ec_max_data_strip; int read_vdis(char *data, int len, unsigned int *rsp_len); diff --git a/sheep/vdi.c b/sheep/vdi.c index b58b538..7c23bab 100644 --- a/sheep/vdi.c +++ b/sheep/vdi.c @@ -11,11 +11,23 @@ #include "sheep_priv.h" +enum lock_state { + LOCK_STATE_INIT, + LOCK_STATE_LOCKED, + LOCK_STATE_UNLOCKED, +}; + +struct vdi_lock_state { + enum lock_state state; + struct node_id owner; +}; + struct vdi_state_entry { uint32_t vid; unsigned int nr_copies; bool snapshot; uint8_t copy_policy; + struct vdi_lock_state lock_state; struct rb_node node; }; @@ -148,6 +160,9 @@ int add_vdi_state(uint32_t vid, int nr_copies, bool snapshot, uint8_t cp) entry->snapshot = snapshot; entry->copy_policy = cp; + entry->lock_state.state = LOCK_STATE_INIT; + memset(&entry->lock_state.owner, 0, sizeof(struct node_id)); + if (cp) { int d; @@ -222,6 +237,96 @@ out: return ret; } +bool lock_vdi(uint32_t vid, const struct node_id *owner) +{ + struct vdi_state_entry *entry; + bool ret = false; + + sd_write_lock(&vdi_state_lock); + + entry = vdi_state_search(&vdi_state_root, vid); + if (!entry) { + sd_err("no vdi state entry of %"PRIx32" found", vid); + ret = false; + goto out; + } + + switch (entry->lock_state.state) { + case LOCK_STATE_INIT: + case LOCK_STATE_UNLOCKED: + entry->lock_state.state = LOCK_STATE_LOCKED; + memcpy(&entry->lock_state.owner, owner, sizeof(*owner)); + sd_info("VDI %"PRIx32" is locked", vid); + ret = false; + goto out; + case LOCK_STATE_LOCKED: + sd_info("VDI %"PRIx32" is already locked", vid); + break; + default: + sd_alert("lock state of VDI (%"PRIx32") is unknown: %d", + vid, entry->lock_state.state); + break; + } + +out: + sd_rw_unlock(&vdi_state_lock); + return ret; +} + +bool unlock_vdi(uint32_t vid, const struct node_id *owner) +{ + struct vdi_state_entry *entry; + bool ret = false; + + sd_write_lock(&vdi_state_lock); + + entry = vdi_state_search(&vdi_state_root, vid); + if (!entry) { + sd_err("no vdi state entry of %"PRIx32" found", vid); + ret = false; + goto out; + } + + switch (entry->lock_state.state) { + case LOCK_STATE_INIT: + case LOCK_STATE_UNLOCKED: + sd_err("unlocking unlocked VDI: %"PRIx32, vid); + break; + case LOCK_STATE_LOCKED: + if (memcmp(&entry->lock_state.owner, owner, sizeof(*owner))) { + sd_err("unlocking locked by different owner"); + ret = false; + goto out; + } + entry->lock_state.state = LOCK_STATE_UNLOCKED; + memset(&entry->lock_state.owner, 0, + sizeof(entry->lock_state.owner)); + ret = true; + default: + sd_alert("lock state of VDI (%"PRIx32") is unknown: %d", + vid, entry->lock_state.state); + break; + } + +out: + sd_rw_unlock(&vdi_state_lock); + return ret; +} + +void unlock_all_vdis(const struct node_id *owner) +{ + struct vdi_state_entry *entry; + + rb_for_each_entry(entry, &vdi_state_root, node) { + if (memcmp(&entry->lock_state.owner, owner, sizeof(*owner))) + continue; + + entry->lock_state.state = LOCK_STATE_UNLOCKED; + memset(&entry->lock_state.owner, 0, + sizeof(entry->lock_state.owner)); + } +} + static struct sd_inode *alloc_inode(const struct vdi_iocb *iocb, uint32_t new_snapid, uint32_t new_vid, uint32_t *data_vdi_id, -- 1.7.1 -- sheepdog mailing list [email protected] http://lists.wpkg.org/mailman/listinfo/sheepdog
