The commit is pushed to "branch-rh7-3.10.0-327.10.1.vz7.12.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after rh7-3.10.0-327.10.1.vz7.12.16 ------> commit 3ba853c4b37a22fd7198d6a0489cfa4c25c05ac3 Author: Maxim Patlasov <mpatla...@virtuozzo.com> Date: Mon May 2 18:40:40 2016 +0400
ploop: introduce pbd Patchset description: ploop: push_backup The following series implements new feature: ploop push_backup. The idea is to suspend all incoming WRITE-requests until userspace backup application reports explicitly that corresponding parts of ploop block device are "pushed" -- i.e. stored in backup. To improve latency, the kernel ploop tells userspace about suspended requests. This lets userspace to "push" correspondign parts of device out-of-band. After that, the userspace may tell kernel to re-schedule those requests. https://jira.sw.ru/browse/PSBM-45000 Maxim Patlasov (4): ploop: introduce pbd ploop: implement PLOOP_IOC_PUSH_BACKUP_IO ploop: wire push_backup into state-machine ploop: push_backup cleanup ================================= This patch description: The patch introduce push_backup descriptor ("pbd") and a few simple functions to create and release it. Userspace can govern it by new ioctls: PLOOP_IOC_PUSH_BACKUP_INIT and PLOOP_IOC_PUSH_BACKUP_STOP. Signed-off-by: Maxim Patlasov <mpatla...@virtuozzo.com> Acked-by: Dmitry Monakhov <dmonak...@openvz.org> --- drivers/block/ploop/Makefile | 2 +- drivers/block/ploop/dev.c | 89 +++++++++++++ drivers/block/ploop/push_backup.c | 271 ++++++++++++++++++++++++++++++++++++++ drivers/block/ploop/push_backup.h | 8 ++ include/linux/ploop/ploop.h | 3 + include/linux/ploop/ploop_if.h | 19 +++ 6 files changed, 391 insertions(+), 1 deletion(-) diff --git a/drivers/block/ploop/Makefile b/drivers/block/ploop/Makefile index e36a027..0fecf16 100644 --- a/drivers/block/ploop/Makefile +++ b/drivers/block/ploop/Makefile @@ -5,7 +5,7 @@ CFLAGS_io_direct.o = -I$(src) CFLAGS_ploop_events.o = -I$(src) obj-$(CONFIG_BLK_DEV_PLOOP) += ploop.o -ploop-objs := dev.o map.o io.o sysfs.o tracker.o freeblks.o ploop_events.o discard.o +ploop-objs := dev.o map.o io.o sysfs.o tracker.o freeblks.o ploop_events.o discard.o push_backup.o obj-$(CONFIG_BLK_DEV_PLOOP) += pfmt_ploop1.o pfmt_ploop1-objs := fmt_ploop1.o diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c index 1da073c..23da9f5 100644 --- a/drivers/block/ploop/dev.c +++ b/drivers/block/ploop/dev.c @@ -19,6 +19,7 @@ #include "ploop_events.h" #include "freeblks.h" #include "discard.h" +#include "push_backup.h" /* Structures and terms: * @@ -3766,6 +3767,9 @@ static int ploop_stop(struct ploop_device * plo, struct block_device *bdev) return -EBUSY; } + clear_bit(PLOOP_S_PUSH_BACKUP, &plo->state); + ploop_pb_stop(plo->pbd); + for (p = plo->disk->minors - 1; p > 0; p--) invalidate_partition(plo->disk, p); invalidate_partition(plo->disk, 0); @@ -3892,6 +3896,7 @@ static int ploop_clear(struct ploop_device * plo, struct block_device * bdev) } ploop_fb_fini(plo->fbd, 0); + ploop_pb_fini(plo->pbd); plo->maintenance_type = PLOOP_MNTN_OFF; plo->bd_size = 0; @@ -4477,6 +4482,84 @@ static int ploop_getdevice_ioc(unsigned long arg) return err; } +static int ploop_push_backup_init(struct ploop_device *plo, unsigned long arg) +{ + struct ploop_push_backup_init_ctl ctl; + struct ploop_pushbackup_desc *pbd = NULL; + int rc = 0; + + if (list_empty(&plo->map.delta_list)) + return -ENOENT; + + if (plo->maintenance_type != PLOOP_MNTN_OFF) + return -EINVAL; + + BUG_ON(plo->pbd); + + if (copy_from_user(&ctl, (void*)arg, sizeof(ctl))) + return -EFAULT; + + pbd = ploop_pb_alloc(plo); + if (!pbd) { + rc = -ENOMEM; + goto pb_init_done; + } + + ploop_quiesce(plo); + + rc = ploop_pb_init(pbd, ctl.cbt_uuid, !ctl.cbt_mask_addr); + if (rc) { + ploop_relax(plo); + goto pb_init_done; + } + + plo->pbd = pbd; + + atomic_set(&plo->maintenance_cnt, 0); + plo->maintenance_type = PLOOP_MNTN_PUSH_BACKUP; + set_bit(PLOOP_S_PUSH_BACKUP, &plo->state); + + ploop_relax(plo); + + if (ctl.cbt_mask_addr) + rc = ploop_pb_copy_cbt_to_user(pbd, (char *)ctl.cbt_mask_addr); +pb_init_done: + if (rc) + ploop_pb_fini(pbd); + return rc; +} + +static int ploop_push_backup_stop(struct ploop_device *plo, unsigned long arg) +{ + struct ploop_pushbackup_desc *pbd = plo->pbd; + struct ploop_push_backup_stop_ctl ctl; + + if (plo->maintenance_type != PLOOP_MNTN_PUSH_BACKUP) + return -EINVAL; + + if (copy_from_user(&ctl, (void*)arg, sizeof(ctl))) + return -EFAULT; + + if (pbd && ploop_pb_check_uuid(pbd, ctl.cbt_uuid)) { + printk("ploop(%d): PUSH_BACKUP_STOP uuid mismatch\n", + plo->index); + return -EINVAL; + } + + if (!test_and_clear_bit(PLOOP_S_PUSH_BACKUP, &plo->state)) + return -EINVAL; + + BUG_ON (!pbd); + ctl.status = ploop_pb_stop(pbd); + + ploop_quiesce(plo); + ploop_pb_fini(plo->pbd); + plo->maintenance_type = PLOOP_MNTN_OFF; + ploop_relax(plo); + + return 0; +} + static int ploop_ioctl(struct block_device *bdev, fmode_t fmode, unsigned int cmd, unsigned long arg) { @@ -4581,6 +4664,12 @@ static int ploop_ioctl(struct block_device *bdev, fmode_t fmode, unsigned int cm case PLOOP_IOC_MAX_DELTA_SIZE: err = ploop_set_max_delta_size(plo, arg); break; + case PLOOP_IOC_PUSH_BACKUP_INIT: + err = ploop_push_backup_init(plo, arg); + break; + case PLOOP_IOC_PUSH_BACKUP_STOP: + err = ploop_push_backup_stop(plo, arg); + break; default: err = -EINVAL; } diff --git a/drivers/block/ploop/push_backup.c b/drivers/block/ploop/push_backup.c new file mode 100644 index 0000000..ecc9862 --- /dev/null +++ b/drivers/block/ploop/push_backup.c @@ -0,0 +1,271 @@ +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/file.h> +#include <linux/bio.h> +#include <linux/interrupt.h> +#include <linux/buffer_head.h> +#include <linux/kthread.h> + +#include <trace/events/block.h> + +#include <linux/ploop/ploop.h> +#include "push_backup.h" + +#define NR_PAGES(bits) (((bits) + PAGE_SIZE*8 - 1) / (PAGE_SIZE*8)) +#define BITS_PER_PAGE (1UL << (PAGE_SHIFT + 3)) + +struct ploop_pushbackup_desc { + struct ploop_device *plo; + struct page **cbt_map; /* a 'snapshot' copy of CBT mask */ + blkcnt_t cbt_block_max; + blkcnt_t cbt_block_bits; + __u8 cbt_uuid[16]; + + struct page **ppb_map; /* Ploop Push Backup mask */ + cluster_t ppb_block_max; /* first invalid index in ppb_map */ + cluster_t ppb_offset; /* [0, ppb_offset) is ACKed by userspace */ + + spinlock_t ppb_lock; + struct completion ppb_comp; + bool ppb_waiting; + + + struct rb_root pending_tree; + struct rb_root reported_tree; +}; + +int ploop_pb_check_uuid(struct ploop_pushbackup_desc *pbd, __u8 *uuid) +{ + if (memcmp(pbd->cbt_uuid, uuid, sizeof(pbd->cbt_uuid))) + return -1; + return 0; +} + +struct ploop_pushbackup_desc *ploop_pb_alloc(struct ploop_device *plo) +{ + struct ploop_pushbackup_desc *pbd; + int i, npages; + + pbd = kmalloc(sizeof(struct ploop_pushbackup_desc), GFP_KERNEL|__GFP_ZERO); + if (pbd == NULL) + return NULL; + + pbd->ppb_block_max = (plo->bd_size + (1 << plo->cluster_log) - 1) + >> plo->cluster_log; + npages = NR_PAGES(pbd->ppb_block_max); + + pbd->ppb_map = vmalloc(npages * sizeof(void *)); + if (!pbd->ppb_map) { + kfree(pbd); + return NULL; + } + + memset(pbd->ppb_map, 0, npages * sizeof(void *)); + + for (i = 0; i < npages; i++) { + pbd->ppb_map[i] = alloc_page(GFP_KERNEL|__GFP_ZERO); + if (!pbd->ppb_map[i]) { + while (--i >= 0) + __free_page(pbd->ppb_map[i]); + vfree(pbd->ppb_map); + kfree(pbd); + return NULL; + } + } + + spin_lock_init(&pbd->ppb_lock); + init_completion(&pbd->ppb_comp); + pbd->pending_tree = RB_ROOT; + pbd->reported_tree = RB_ROOT; + pbd->plo = plo; + + return pbd; +} + +static int find_first_blk_in_map(struct page **map, u64 map_max, u64 *blk_p) +{ + u64 blk = *blk_p; + unsigned long idx = blk >> (PAGE_SHIFT + 3); + + while (blk < map_max) { + unsigned long off = blk & (BITS_PER_PAGE -1); + unsigned long next_bit; + struct page *page = map[idx]; + + if (!page) + goto next; + + next_bit = find_next_bit(page_address(page), BITS_PER_PAGE, off); + if (next_bit != BITS_PER_PAGE) { + *blk_p = ((u64)idx << (PAGE_SHIFT + 3)) + next_bit; + return 0; + } + + next: + idx++; + blk = (u64)idx << (PAGE_SHIFT + 3); + } + + return -1; +} + +enum { + SET_BIT, + CLEAR_BIT, + CHECK_BIT, +}; + +static bool do_bit_in_map(struct page **map, u64 map_max, u64 blk, int action) +{ + unsigned long idx = blk >> (PAGE_SHIFT + 3); + unsigned long off = blk & (BITS_PER_PAGE -1); + struct page *page = map[idx]; + + BUG_ON(blk >= map_max); + + switch (action) { + case SET_BIT: + __set_bit(off, page_address(page)); + break; + case CLEAR_BIT: + __clear_bit(off, page_address(page)); + break; + case CHECK_BIT: + return test_bit(off, page_address(page)); + default: + BUG(); + } + + return false; +} + +static void set_bit_in_map(struct page **map, u64 map_max, u64 blk) +{ + do_bit_in_map(map, map_max, blk, SET_BIT); +} + +static int convert_map_to_map(struct ploop_pushbackup_desc *pbd) +{ + struct page **from_map = pbd->cbt_map; + blkcnt_t from_max = pbd->cbt_block_max - 1; + blkcnt_t from_bits = pbd->cbt_block_bits; + + struct page **to_map = pbd->ppb_map; + cluster_t to_max = pbd->ppb_block_max; + int to_bits = pbd->plo->cluster_log + 9; + + u64 from_blk, to_blk; + + if ((u64)from_max << from_bits != (u64)to_max << to_bits) { + printk("mismatch in map convert: %lu %lu ---> %u %d\n", + from_max, from_bits, to_max, to_bits); + return -EINVAL; + } + + for (from_blk = 0; from_blk < from_max; + from_blk = (++to_blk << to_bits) >> from_bits) { + + if (find_first_blk_in_map(from_map, from_max, &from_blk)) + break; + + to_blk = (from_blk << from_bits) >> to_bits; + set_bit_in_map(to_map, to_max, to_blk); + } + + return 0; + +} + +int ploop_pb_init(struct ploop_pushbackup_desc *pbd, __u8 *uuid, bool full) +{ + int rc; + + memcpy(pbd->cbt_uuid, uuid, sizeof(pbd->cbt_uuid)); + + if (full) { + int i; + for (i = 0; i < NR_PAGES(pbd->ppb_block_max); i++) + memset(page_address(pbd->ppb_map[i]), 0xff, PAGE_SIZE); + return 0; + } + + rc = blk_cbt_map_copy_once(pbd->plo->queue, + uuid, + &pbd->cbt_map, + &pbd->cbt_block_max, + &pbd->cbt_block_bits); + if (rc) + return rc; + + return convert_map_to_map(pbd); +} + +static void ploop_pb_free_cbt_map(struct ploop_pushbackup_desc *pbd) +{ + if (pbd->cbt_map) { + unsigned long i; + for (i = 0; i < NR_PAGES(pbd->cbt_block_max); i++) + if (pbd->cbt_map[i]) + __free_page(pbd->cbt_map[i]); + + vfree(pbd->cbt_map); + pbd->cbt_map = NULL; + } +} + +void ploop_pb_fini(struct ploop_pushbackup_desc *pbd) +{ + int i; + + if (pbd == NULL) + return; + + if (!RB_EMPTY_ROOT(&pbd->pending_tree)) + printk("ploop_pb_fini: pending_tree is not empty!\n"); + if (!RB_EMPTY_ROOT(&pbd->reported_tree)) + printk("ploop_pb_fini: reported_tree is not empty!\n"); + + if (pbd->plo) + pbd->plo->pbd = NULL; + + ploop_pb_free_cbt_map(pbd); + + for (i = 0; i < NR_PAGES(pbd->ppb_block_max); i++) + __free_page(pbd->ppb_map[i]); + + vfree(pbd->ppb_map); + kfree(pbd); +} + +int ploop_pb_copy_cbt_to_user(struct ploop_pushbackup_desc *pbd, char *user_addr) +{ + unsigned long i; + + for (i = 0; i < NR_PAGES(pbd->cbt_block_max); i++) { + struct page *page = pbd->cbt_map[i] ? : ZERO_PAGE(0); + + if (copy_to_user(user_addr, page_address(page), PAGE_SIZE)) + return -EFAULT; + + user_addr += PAGE_SIZE; + } + + ploop_pb_free_cbt_map(pbd); + return 0; +} + +unsigned long ploop_pb_stop(struct ploop_pushbackup_desc *pbd) +{ + if (pbd == NULL) + return 0; + + spin_lock(&pbd->ppb_lock); + + if (pbd->ppb_waiting) + complete(&pbd->ppb_comp); + spin_unlock(&pbd->ppb_lock); + + return 0; +} diff --git a/drivers/block/ploop/push_backup.h b/drivers/block/ploop/push_backup.h new file mode 100644 index 0000000..40d23f5 --- /dev/null +++ b/drivers/block/ploop/push_backup.h @@ -0,0 +1,8 @@ +struct ploop_pushbackup_desc; + +struct ploop_pushbackup_desc *ploop_pb_alloc(struct ploop_device *plo); +int ploop_pb_init(struct ploop_pushbackup_desc *pbd, __u8 *uuid, bool full); +void ploop_pb_fini(struct ploop_pushbackup_desc *pbd); +int ploop_pb_copy_cbt_to_user(struct ploop_pushbackup_desc *pbd, char *user_addr); +unsigned long ploop_pb_stop(struct ploop_pushbackup_desc *pbd); +int ploop_pb_check_uuid(struct ploop_pushbackup_desc *pbd, __u8 *uuid); diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h index c9fb1b0..09f419d3 100644 --- a/include/linux/ploop/ploop.h +++ b/include/linux/ploop/ploop.h @@ -53,6 +53,7 @@ enum { PLOOP_S_LOCKED, /* ploop is locked by userspace (for minor mgmt only) */ PLOOP_S_ONCE, /* An event (e.g. printk once) happened */ + PLOOP_S_PUSH_BACKUP, /* Push_backup is in progress */ }; struct ploop_snapdata @@ -337,6 +338,7 @@ struct ploop_stats }; struct ploop_freeblks_desc; +struct ploop_pushbackup_desc; struct ploop_device { @@ -438,6 +440,7 @@ struct ploop_device char cookie[PLOOP_COOKIE_SIZE]; struct ploop_freeblks_desc *fbd; + struct ploop_pushbackup_desc *pbd; unsigned long locking_state; /* plo locked by userspace */ }; diff --git a/include/linux/ploop/ploop_if.h b/include/linux/ploop/ploop_if.h index aacddb3..83a68e5 100644 --- a/include/linux/ploop/ploop_if.h +++ b/include/linux/ploop/ploop_if.h @@ -186,6 +186,18 @@ struct ploop_getdevice_ctl __u32 __mbz1; } __attribute__ ((aligned (8))); +struct ploop_push_backup_init_ctl +{ + __u8 cbt_uuid[16]; + __u64 cbt_mask_addr; /* page-aligned space for CBT mask */ +} __attribute__ ((aligned (8))); + +struct ploop_push_backup_stop_ctl +{ + __u8 cbt_uuid[16]; + __u32 status; /* for sanity: non-zero if pending or active queue is not empty */ +} __attribute__ ((aligned (8))); + /* maintenance types */ enum { PLOOP_MNTN_OFF = 0, /* no maintenance is in progress */ @@ -202,6 +214,7 @@ enum { PLOOP_MNTN_MERGE, /* merge is in progress */ PLOOP_MNTN_GROW, /* grow is in progress */ PLOOP_MNTN_RELOC, /* relocation is in progress */ + PLOOP_MNTN_PUSH_BACKUP, /* push backup is in progress */ }; /* @@ -302,6 +315,12 @@ struct ploop_track_extent /* Set maximum size for the top delta . */ #define PLOOP_IOC_MAX_DELTA_SIZE _IOW(PLOOPCTLTYPE, 28, __u64) +/* Start push backup */ +#define PLOOP_IOC_PUSH_BACKUP_INIT _IOR(PLOOPCTLTYPE, 29, struct ploop_push_backup_init_ctl) + +/* Stop push backup */ +#define PLOOP_IOC_PUSH_BACKUP_STOP _IOR(PLOOPCTLTYPE, 31, struct ploop_push_backup_stop_ctl) + /* Events exposed via /sys/block/ploopN/pstate/event */ #define PLOOP_EVENT_ABORTED 1 #define PLOOP_EVENT_STOPPED 2 _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel