The commit is pushed to "branch-rh7-3.10.0-327.10.1.vz7.12.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after rh7-3.10.0-327.10.1.vz7.12.16 ------> commit adcff732cabc43be32649055afd8a1aed41c63d9 Author: Maxim Patlasov <mpatla...@virtuozzo.com> Date: Mon May 2 18:40:40 2016 +0400
ploop: implement PLOOP_IOC_PUSH_BACKUP_IO The ioctl(PLOOP_IOC_PUSH_BACKUP_IO) has two mode of operation: 1) ctl.direction=PLOOP_READ tells userspace which cluster-blocks to push out-of-band; moves processed preq-s from pending_tree to reported_tree 2) ctl.direction=PLOOP_WRITE tells kernel which cluster-blocks were pushed -- they are either ordinarily processed preq-s or out-of-band ones; the kernel match the blocks to preq-s in reported_tree and re-schedules them. Signed-off-by: Maxim Patlasov <mpatla...@virtuozzo.com> Acked-by: Dmitry Monakhov <dmonak...@openvz.org> --- drivers/block/ploop/dev.c | 105 ++++++++++++++++++++ drivers/block/ploop/push_backup.c | 197 ++++++++++++++++++++++++++++++++++++++ drivers/block/ploop/push_backup.h | 5 + include/linux/ploop/ploop_if.h | 23 +++++ 4 files changed, 330 insertions(+) diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c index 23da9f5..2a77d2e 100644 --- a/drivers/block/ploop/dev.c +++ b/drivers/block/ploop/dev.c @@ -4529,6 +4529,108 @@ pb_init_done: return rc; } +static int ploop_push_backup_io_read(struct ploop_device *plo, unsigned long arg, + struct ploop_push_backup_io_ctl *ctl) +{ + struct ploop_push_backup_ctl_extent *e; + unsigned n_extents = 0; + int rc = 0; + + e = kmalloc(sizeof(*e) * ctl->n_extents, GFP_KERNEL); + if (!e) + return -ENOMEM; + + while (n_extents < ctl->n_extents) { + cluster_t clu, len; + rc = ploop_pb_get_pending(plo->pbd, &clu, &len, n_extents); + if (rc) + goto io_read_done; + + e[n_extents].clu = clu; + e[n_extents].len = len; + n_extents++; + } + + rc = -EFAULT; + ctl->n_extents = n_extents; + if (copy_to_user((void*)arg, ctl, sizeof(*ctl))) + goto io_read_done; + if (n_extents && + copy_to_user((void*)(arg + sizeof(*ctl)), e, + n_extents * sizeof(*e))) + goto io_read_done; + rc = 0; + +io_read_done: + kfree(e); + return rc; +} + +static int ploop_push_backup_io_write(struct ploop_device *plo, unsigned long arg, + struct ploop_push_backup_io_ctl *ctl) +{ + struct ploop_push_backup_ctl_extent *e; + unsigned i; + int rc = 0; + + e = kmalloc(sizeof(*e) * ctl->n_extents, GFP_KERNEL); + if (!e) + return -ENOMEM; + + rc = -EFAULT; + if (copy_from_user(e, (void*)(arg + sizeof(*ctl)), + ctl->n_extents * sizeof(*e))) + goto io_write_done; + + rc = 0; + for (i = 0; i < ctl->n_extents; i++) { + cluster_t j; + for (j = e[i].clu; j < e[i].clu + e[i].len; j++) + ploop_pb_put_reported(plo->pbd, j, 1); + /* OPTIMIZE ME LATER: like this: + * ploop_pb_put_reported(plo->pbd, e[i].clu, e[i].len); */ + } + +io_write_done: + kfree(e); + return rc; +} + +static int ploop_push_backup_io(struct ploop_device *plo, unsigned long arg) +{ + struct ploop_push_backup_io_ctl ctl; + struct ploop_pushbackup_desc *pbd = plo->pbd; + + if (list_empty(&plo->map.delta_list)) + return -ENOENT; + + if (plo->maintenance_type != PLOOP_MNTN_PUSH_BACKUP) + return -EINVAL; + + BUG_ON (!pbd); + + if (copy_from_user(&ctl, (void*)arg, sizeof(ctl))) + return -EFAULT; + + if (!ctl.n_extents) + return -EINVAL; + + if (ploop_pb_check_uuid(pbd, ctl.cbt_uuid)) { + printk("ploop(%d): PUSH_BACKUP_IO uuid mismatch\n", + plo->index); + return -EINVAL; + } + + switch(ctl.direction) { + case PLOOP_READ: + return ploop_push_backup_io_read(plo, arg, &ctl); + case PLOOP_WRITE: + return ploop_push_backup_io_write(plo, arg, &ctl); + } + + return -EINVAL; +} + static int ploop_push_backup_stop(struct ploop_device *plo, unsigned long arg) { struct ploop_pushbackup_desc *pbd = plo->pbd; @@ -4667,6 +4769,9 @@ static int ploop_ioctl(struct block_device *bdev, fmode_t fmode, unsigned int cm case PLOOP_IOC_PUSH_BACKUP_INIT: err = ploop_push_backup_init(plo, arg); break; + case PLOOP_IOC_PUSH_BACKUP_IO: + err = ploop_push_backup_io(plo, arg); + break; case PLOOP_IOC_PUSH_BACKUP_STOP: err = ploop_push_backup_stop(plo, arg); break; diff --git a/drivers/block/ploop/push_backup.c b/drivers/block/ploop/push_backup.c index ecc9862..477caf7 100644 --- a/drivers/block/ploop/push_backup.c +++ b/drivers/block/ploop/push_backup.c @@ -256,6 +256,89 @@ int ploop_pb_copy_cbt_to_user(struct ploop_pushbackup_desc *pbd, char *user_addr return 0; } +static void ploop_pb_add_req_to_tree(struct ploop_request *preq, + struct rb_root *tree) +{ + struct rb_node ** p = &tree->rb_node; + struct rb_node *parent = NULL; + struct ploop_request * pr; + + while (*p) { + parent = *p; + pr = rb_entry(parent, struct ploop_request, reloc_link); + BUG_ON (preq->req_cluster == pr->req_cluster); + + if (preq->req_cluster < pr->req_cluster) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + rb_link_node(&preq->reloc_link, parent, p); + rb_insert_color(&preq->reloc_link, tree); +} + +static void ploop_pb_add_req_to_reported(struct ploop_pushbackup_desc *pbd, + struct ploop_request *preq) +{ + ploop_pb_add_req_to_tree(preq, &pbd->reported_tree); +} + +static struct ploop_request *ploop_pb_get_req_from_tree(struct rb_root *tree, + cluster_t clu) +{ + struct rb_node *n = tree->rb_node; + struct ploop_request *p; + + while (n) { + p = rb_entry(n, struct ploop_request, reloc_link); + + if (clu < p->req_cluster) + n = n->rb_left; + else if (clu > p->req_cluster) + n = n->rb_right; + else { + rb_erase(&p->reloc_link, tree); + return p; + } + } + return NULL; +} + +static struct ploop_request * +ploop_pb_get_first_req_from_tree(struct rb_root *tree) +{ + static struct ploop_request *p; + struct rb_node *n = rb_first(tree); + + if (!n) + return NULL; + + p = rb_entry(n, struct ploop_request, reloc_link); + rb_erase(&p->reloc_link, tree); + return p; +} + +static struct ploop_request * +ploop_pb_get_first_req_from_pending(struct ploop_pushbackup_desc *pbd) +{ + return ploop_pb_get_first_req_from_tree(&pbd->pending_tree); +} + +static struct ploop_request * +ploop_pb_get_req_from_pending(struct ploop_pushbackup_desc *pbd, + cluster_t clu) +{ + return ploop_pb_get_req_from_tree(&pbd->pending_tree, clu); +} + +static struct ploop_request * +ploop_pb_get_req_from_reported(struct ploop_pushbackup_desc *pbd, + cluster_t clu) +{ + return ploop_pb_get_req_from_tree(&pbd->reported_tree, clu); +} + unsigned long ploop_pb_stop(struct ploop_pushbackup_desc *pbd) { if (pbd == NULL) @@ -269,3 +352,117 @@ unsigned long ploop_pb_stop(struct ploop_pushbackup_desc *pbd) return 0; } + +int ploop_pb_get_pending(struct ploop_pushbackup_desc *pbd, + cluster_t *clu_p, cluster_t *len_p, unsigned n_done) +{ + bool blocking = !n_done; + struct ploop_request *preq; + int err = 0; + + spin_lock(&pbd->ppb_lock); + + /* OPTIMIZE ME LATER: rb_first() once, then rb_next() */ + preq = ploop_pb_get_first_req_from_pending(pbd); + if (!preq) { + struct ploop_device *plo = pbd->plo; + + if (!blocking) { + err = -ENOENT; + goto get_pending_unlock; + } + + /* blocking case */ + pbd->ppb_waiting = true; + spin_unlock(&pbd->ppb_lock); + + mutex_unlock(&plo->ctl_mutex); + err = wait_for_completion_interruptible(&pbd->ppb_comp); + mutex_lock(&plo->ctl_mutex); + + if (plo->pbd != pbd) + return -EINTR; + + spin_lock(&pbd->ppb_lock); + pbd->ppb_waiting = false; + init_completion(&pbd->ppb_comp); + + preq = ploop_pb_get_first_req_from_pending(pbd); + if (!preq) { + if (!test_bit(PLOOP_S_PUSH_BACKUP, &plo->state)) + err = -EINTR; + else if (signal_pending(current)) + err = -ERESTARTSYS; + else err = -ENOENT; + + goto get_pending_unlock; + } + } + + ploop_pb_add_req_to_reported(pbd, preq); + + *clu_p = preq->req_cluster; + *len_p = 1; + +get_pending_unlock: + spin_unlock(&pbd->ppb_lock); + return err; +} + +void ploop_pb_put_reported(struct ploop_pushbackup_desc *pbd, + cluster_t clu, cluster_t len) +{ + struct ploop_request *preq; + int n_found = 0; + + /* OPTIMIZE ME LATER: find leftmost item for [clu, clu+len), + * then rb_next() while req_cluster < clu+len. + * Do this firstly for reported, then for pending */ + BUG_ON(len != 1); + + spin_lock(&pbd->ppb_lock); + + preq = ploop_pb_get_req_from_reported(pbd, clu); + if (!preq) + preq = ploop_pb_get_req_from_pending(pbd, clu); + else + n_found++; + + /* + * If preq not found above, it's unsolicited report. Then it's + * enough to have corresponding bit set in reported_map because if + * any WRITE-request comes afterwards, ploop_pb_preq_add_pending() + * fails and ploop_thread will clear corresponding bit in ppb_map + * -- see "push_backup special processing" in ploop_entry_request() + * for details. + */ + + /* + * "If .. else if .." below will be fully reworked when switching + * from pbd->ppb_offset to pbd->reported_map. All we need here is + * actaully simply to set bits corresponding to [clu, clu+len) in + * pbd->reported_map. + */ + if (pbd->ppb_offset >= clu) { /* lucky strike */ + if (clu + len > pbd->ppb_offset) { + pbd->ppb_offset = clu + len; + } + } else if (n_found != len) { /* a hole, bad luck */ + printk("ploop: push_backup ERR: off=%u ext=[%u, %u) found %d\n", + pbd->ppb_offset, clu, clu + len, n_found); + } + + spin_unlock(&pbd->ppb_lock); + + if (preq) { + struct ploop_device *plo = preq->plo; + BUG_ON(preq->req_cluster != clu); + BUG_ON(plo != pbd->plo); + + spin_lock_irq(&plo->lock); + list_add_tail(&preq->list, &plo->ready_queue); + if (test_bit(PLOOP_S_WAIT_PROCESS, &plo->state)) + wake_up_interruptible(&plo->waitq); + spin_unlock_irq(&plo->lock); + } +} diff --git a/drivers/block/ploop/push_backup.h b/drivers/block/ploop/push_backup.h index 40d23f5..482e070 100644 --- a/drivers/block/ploop/push_backup.h +++ b/drivers/block/ploop/push_backup.h @@ -6,3 +6,8 @@ void ploop_pb_fini(struct ploop_pushbackup_desc *pbd); int ploop_pb_copy_cbt_to_user(struct ploop_pushbackup_desc *pbd, char *user_addr); unsigned long ploop_pb_stop(struct ploop_pushbackup_desc *pbd); int ploop_pb_check_uuid(struct ploop_pushbackup_desc *pbd, __u8 *uuid); + +int ploop_pb_get_pending(struct ploop_pushbackup_desc *pbd, + cluster_t *clu_p, cluster_t *len_p, unsigned n_done); +void ploop_pb_put_reported(struct ploop_pushbackup_desc *pbd, + cluster_t clu, cluster_t len); diff --git a/include/linux/ploop/ploop_if.h b/include/linux/ploop/ploop_if.h index 83a68e5..81cc8d1 100644 --- a/include/linux/ploop/ploop_if.h +++ b/include/linux/ploop/ploop_if.h @@ -192,6 +192,26 @@ struct ploop_push_backup_init_ctl __u64 cbt_mask_addr; /* page-aligned space for CBT mask */ } __attribute__ ((aligned (8))); +struct ploop_push_backup_ctl_extent +{ + __u32 clu; + __u32 len; +} __attribute__ ((aligned (8))); + +/* ploop_push_backup_io_ctl.direction */ +enum { + PLOOP_READ = 0, /* wait for requests */ + PLOOP_WRITE, /* ACK requests */ +}; + +struct ploop_push_backup_io_ctl +{ + __u8 cbt_uuid[16]; + __u32 direction; + __u32 n_extents; + struct ploop_push_backup_ctl_extent extents[0]; +} __attribute__ ((aligned (8))); + struct ploop_push_backup_stop_ctl { __u8 cbt_uuid[16]; @@ -318,6 +338,9 @@ struct ploop_track_extent /* Start push backup */ #define PLOOP_IOC_PUSH_BACKUP_INIT _IOR(PLOOPCTLTYPE, 29, struct ploop_push_backup_init_ctl) +/* Wait for push backup out-of-order requests; or ACK them */ +#define PLOOP_IOC_PUSH_BACKUP_IO _IOR(PLOOPCTLTYPE, 30, struct ploop_push_backup_io_ctl) + /* Stop push backup */ #define PLOOP_IOC_PUSH_BACKUP_STOP _IOR(PLOOPCTLTYPE, 31, struct ploop_push_backup_stop_ctl) _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel