Re: [Devel] [PATCH rh7 1/4] ploop: introduce pbd
Maxim Patlasov writes: > The patch introduce push_backup descriptor ("pbd") and a few simple > functions to create and release it. > > Userspace can govern it by new ioctls: PLOOP_IOC_PUSH_BACKUP_INIT and > PLOOP_IOC_PUSH_BACKUP_STOP. Acked-by: Dmitry Monakhov > > Signed-off-by: Maxim Patlasov > --- > drivers/block/ploop/Makefile |2 > drivers/block/ploop/dev.c | 89 > drivers/block/ploop/push_backup.c | 271 > + > drivers/block/ploop/push_backup.h |8 + > include/linux/ploop/ploop.h |3 > include/linux/ploop/ploop_if.h| 19 +++ > 6 files changed, 391 insertions(+), 1 deletion(-) > create mode 100644 drivers/block/ploop/push_backup.c > create mode 100644 drivers/block/ploop/push_backup.h > > diff --git a/drivers/block/ploop/Makefile b/drivers/block/ploop/Makefile > index e36a027..0fecf16 100644 > --- a/drivers/block/ploop/Makefile > +++ b/drivers/block/ploop/Makefile > @@ -5,7 +5,7 @@ CFLAGS_io_direct.o = -I$(src) > CFLAGS_ploop_events.o = -I$(src) > > obj-$(CONFIG_BLK_DEV_PLOOP) += ploop.o > -ploop-objs := dev.o map.o io.o sysfs.o tracker.o freeblks.o ploop_events.o > discard.o > +ploop-objs := dev.o map.o io.o sysfs.o tracker.o freeblks.o ploop_events.o > discard.o push_backup.o > > obj-$(CONFIG_BLK_DEV_PLOOP) += pfmt_ploop1.o > pfmt_ploop1-objs := fmt_ploop1.o > diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c > index 1da073c..23da9f5 100644 > --- a/drivers/block/ploop/dev.c > +++ b/drivers/block/ploop/dev.c > @@ -19,6 +19,7 @@ > #include "ploop_events.h" > #include "freeblks.h" > #include "discard.h" > +#include "push_backup.h" > > /* Structures and terms: > * > @@ -3766,6 +3767,9 @@ static int ploop_stop(struct ploop_device * plo, struct > block_device *bdev) > return -EBUSY; > } > > + clear_bit(PLOOP_S_PUSH_BACKUP, &plo->state); > + ploop_pb_stop(plo->pbd); > + > for (p = plo->disk->minors - 1; p > 0; p--) > invalidate_partition(plo->disk, p); > invalidate_partition(plo->disk, 0); > @@ -3892,6 +3896,7 @@ static int ploop_clear(struct ploop_device * plo, > struct block_device * bdev) > } > > ploop_fb_fini(plo->fbd, 0); > + ploop_pb_fini(plo->pbd); > > plo->maintenance_type = PLOOP_MNTN_OFF; > plo->bd_size = 0; > @@ -4477,6 +4482,84 @@ static int ploop_getdevice_ioc(unsigned long arg) > return err; > } > > +static int ploop_push_backup_init(struct ploop_device *plo, unsigned long > arg) > +{ > + struct ploop_push_backup_init_ctl ctl; > + struct ploop_pushbackup_desc *pbd = NULL; > + int rc = 0; > + > + if (list_empty(&plo->map.delta_list)) > + return -ENOENT; > + > + if (plo->maintenance_type != PLOOP_MNTN_OFF) > + return -EINVAL; > + > + BUG_ON(plo->pbd); > + > + if (copy_from_user(&ctl, (void*)arg, sizeof(ctl))) > + return -EFAULT; > + > + pbd = ploop_pb_alloc(plo); > + if (!pbd) { > + rc = -ENOMEM; > + goto pb_init_done; > + } > + > + ploop_quiesce(plo); > + > + rc = ploop_pb_init(pbd, ctl.cbt_uuid, !ctl.cbt_mask_addr); > + if (rc) { > + ploop_relax(plo); > + goto pb_init_done; > + } > + > + plo->pbd = pbd; > + > + atomic_set(&plo->maintenance_cnt, 0); > + plo->maintenance_type = PLOOP_MNTN_PUSH_BACKUP; > + set_bit(PLOOP_S_PUSH_BACKUP, &plo->state); > + > + ploop_relax(plo); > + > + if (ctl.cbt_mask_addr) > + rc = ploop_pb_copy_cbt_to_user(pbd, (char *)ctl.cbt_mask_addr); > +pb_init_done: > + if (rc) > + ploop_pb_fini(pbd); > + return rc; > +} > + > +static int ploop_push_backup_stop(struct ploop_device *plo, unsigned long > arg) > +{ > + struct ploop_pushbackup_desc *pbd = plo->pbd; > + struct ploop_push_backup_stop_ctl ctl; > + > + if (plo->maintenance_type != PLOOP_MNTN_PUSH_BACKUP) > + return -EINVAL; > + > + if (copy_from_user(&ctl, (void*)arg, sizeof(ctl))) > + return -EFAULT; > + > + if (pbd && ploop_pb_check_uuid(pbd, ctl.cbt_uuid)) { > + printk("ploop(%d): PUSH_BACKUP_STOP uuid mismatch\n", > +plo->index); > + return -EINVAL; > + } > + > + if (!test_and_clear_bit(PLOOP_S_PUSH_BACKUP, &plo->state)) > + return -EINVAL; > + > + BUG_ON (!pbd); > + ctl.status = ploop_pb_stop(pbd); > + > + ploop_quiesce(plo); > + ploop_pb_fini(plo->pbd); > + plo->maintenance_type = PLOOP_MNTN_OFF; > + ploop_relax(plo); > + > + return 0; > +} > + > static int ploop_ioctl(struct block_device *bdev, fmode_t fmode, unsigned > int cmd, > unsigned long arg) > { > @@ -4581,6 +4664,12 @@ static int ploop_ioctl(struct block_device *bdev, > fmode_t fmode, unsigned int cm > case PLOOP_IOC_MAX_DELTA_SIZE: > err = ploop_se
Re: [Devel] [PATCH rh7 3/4] ploop: wire push_backup into state-machine
Maxim Patlasov writes: I can not avoid obsession that this request joggling fully destroys FS barriers assumptions. For example: fs does submit_bio(data_b1) submit_bio(data_b2) submit_bio(commit_b3, FLUSH|FUA) journal commit record wait_for_bio(commit_b3) But there is no guaranee that data_b1 and data_b2 was completed already. They can be in pedned list. In case of power-loss we have good commit record which reference b1 and b2, but b1 and b2 was not flushed, which result expose of unitialized data. In fact ext4/jbd2 will wait b1 and b2 first and only after that it will b3 so ext4 will works fine. Otherwise looks good. > When ploop state-machine looks at preq first time, it suspends the preq if > its cluster-block matches pbd->ppb_map -- the copy of CBT mask initially. > To suspend preq we simply put it to pbd->pending_tree and plo->lockout_tree. > > Later, when userspace reports that out-of-band processing is done, we > set PLOOP_REQ_PUSH_BACKUP bit in preq->state, re-schedule the preq and > wakeup ploop state-machine. This PLOOP_REQ_PUSH_BACKUP bit lets state-machine > know that given preq is OK and we shouldn't suspend further preq-s for > given cluster-block anymore. > > Signed-off-by: Maxim Patlasov > --- > drivers/block/ploop/dev.c | 32 +++ > drivers/block/ploop/push_backup.c | 62 > + > drivers/block/ploop/push_backup.h |6 > include/linux/ploop/ploop.h |1 + > 4 files changed, 101 insertions(+) > > diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c > index 2a77d2e..c7cc385 100644 > --- a/drivers/block/ploop/dev.c > +++ b/drivers/block/ploop/dev.c > @@ -2021,6 +2021,38 @@ restart: > return; > } > > + /* push_backup special processing */ > + if (!test_bit(PLOOP_REQ_LOCKOUT, &preq->state) && > + (preq->req_rw & REQ_WRITE) && preq->req_size && > + ploop_pb_check_bit(plo->pbd, preq->req_cluster)) { > + if (ploop_pb_preq_add_pending(plo->pbd, preq)) { > + /* already reported by userspace push_backup */ > + ploop_pb_clear_bit(plo->pbd, preq->req_cluster); > + } else { > + spin_lock_irq(&plo->lock); > + ploop_add_lockout(preq, 0); > + spin_unlock_irq(&plo->lock); > + /* > + * preq IN: preq is in ppb_pending tree waiting for > + * out-of-band push_backup processing by userspace ... > + */ > + return; > + } > + } else if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state) && > +test_and_clear_bit(PLOOP_REQ_PUSH_BACKUP, &preq->state)) { > + /* > + * preq OUT: out-of-band push_backup processing by > + * userspace done; preq was re-scheduled > + */ > + ploop_pb_clear_bit(plo->pbd, preq->req_cluster); > + > + spin_lock_irq(&plo->lock); > + del_lockout(preq); > + if (!list_empty(&preq->delay_list)) > + list_splice_init(&preq->delay_list, > plo->ready_queue.prev); > + spin_unlock_irq(&plo->lock); > + } > + > if (plo->trans_map) { > err = ploop_find_trans_map(plo->trans_map, preq); > if (err) { > diff --git a/drivers/block/ploop/push_backup.c > b/drivers/block/ploop/push_backup.c > index 477caf7..488b8fb 100644 > --- a/drivers/block/ploop/push_backup.c > +++ b/drivers/block/ploop/push_backup.c > @@ -146,6 +146,32 @@ static void set_bit_in_map(struct page **map, u64 > map_max, u64 blk) > do_bit_in_map(map, map_max, blk, SET_BIT); > } > > +static void clear_bit_in_map(struct page **map, u64 map_max, u64 blk) > +{ > + do_bit_in_map(map, map_max, blk, CLEAR_BIT); > +} > + > +static bool check_bit_in_map(struct page **map, u64 map_max, u64 blk) > +{ > + return do_bit_in_map(map, map_max, blk, CHECK_BIT); > +} > + > +/* intentionally lockless */ > +void ploop_pb_clear_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu) > +{ > + BUG_ON(!pbd); > + clear_bit_in_map(pbd->ppb_map, pbd->ppb_block_max, clu); > +} > + > +/* intentionally lockless */ > +bool ploop_pb_check_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu) > +{ > + if (!pbd) > + return false; > + > + return check_bit_in_map(pbd->ppb_map, pbd->ppb_block_max, clu); > +} > + > static int convert_map_to_map(struct ploop_pushbackup_desc *pbd) > { > struct page **from_map = pbd->cbt_map; > @@ -278,6 +304,12 @@ static void ploop_pb_add_req_to_tree(struct > ploop_request *preq, > rb_insert_color(&preq->reloc_link, tree); > } > > +static void ploop_pb_add_req_to_pending(struct ploop_pushbackup_desc *pbd, > + struct ploop_request *preq) > +{ > + ploop_pb_add_req_to_tree(preq, &pbd->pending_tree); > +
Re: [Devel] [PATCH rh7 4/4] ploop: push_backup cleanup
Maxim Patlasov writes: > ploop_pb_stop() is called either explicitly, when userspace makes > ioctl(PLOOP_IOC_PUSH_BACKUP_STOP), or implicitly on ploop shutdown > when userspace stops ploop device by ioctl(PLOOP_IOC_STOP). > > In both cases, it's useful to re-schedule all suspended preq-s. Otherwise, > we won't be able to destroy ploop because some preq-s are still not > completed. > Acked-by: Dmitry Monakhov > Signed-off-by: Maxim Patlasov > --- > drivers/block/ploop/push_backup.c | 36 +++- > 1 file changed, 35 insertions(+), 1 deletion(-) > > diff --git a/drivers/block/ploop/push_backup.c > b/drivers/block/ploop/push_backup.c > index 488b8fb..05af67c 100644 > --- a/drivers/block/ploop/push_backup.c > +++ b/drivers/block/ploop/push_backup.c > @@ -358,6 +358,12 @@ ploop_pb_get_first_req_from_pending(struct > ploop_pushbackup_desc *pbd) > } > > static struct ploop_request * > +ploop_pb_get_first_req_from_reported(struct ploop_pushbackup_desc *pbd) > +{ > + return ploop_pb_get_first_req_from_tree(&pbd->reported_tree); > +} > + > +static struct ploop_request * > ploop_pb_get_req_from_pending(struct ploop_pushbackup_desc *pbd, > cluster_t clu) > { > @@ -400,16 +406,44 @@ int ploop_pb_preq_add_pending(struct > ploop_pushbackup_desc *pbd, > > unsigned long ploop_pb_stop(struct ploop_pushbackup_desc *pbd) > { > + unsigned long ret = 0; > + LIST_HEAD(drop_list); > + > if (pbd == NULL) > return 0; > > spin_lock(&pbd->ppb_lock); > > + while (!RB_EMPTY_ROOT(&pbd->pending_tree)) { > + struct ploop_request *preq = > + ploop_pb_get_first_req_from_pending(pbd); > + list_add(&preq->list, &drop_list); > + ret++; > + } > + > + while (!RB_EMPTY_ROOT(&pbd->reported_tree)) { > + struct ploop_request *preq = > + ploop_pb_get_first_req_from_reported(pbd); > + list_add(&preq->list, &drop_list); > + ret++; > + } > + > if (pbd->ppb_waiting) > complete(&pbd->ppb_comp); > spin_unlock(&pbd->ppb_lock); > > - return 0; > + if (!list_empty(&drop_list)) { > + struct ploop_device *plo = pbd->plo; > + > + BUG_ON(!plo); > + spin_lock_irq(&plo->lock); > + list_splice_init(&drop_list, plo->ready_queue.prev); > + if (test_bit(PLOOP_S_WAIT_PROCESS, &plo->state)) > + wake_up_interruptible(&plo->waitq); > + spin_unlock_irq(&plo->lock); > + } > + > + return ret; > } > > int ploop_pb_get_pending(struct ploop_pushbackup_desc *pbd, signature.asc Description: PGP signature ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
Re: [Devel] [PATCH rh7 2/4] ploop: implement PLOOP_IOC_PUSH_BACKUP_IO
Maxim Patlasov writes: > The ioctl(PLOOP_IOC_PUSH_BACKUP_IO) has two mode of operation: > Ack. See minor issue below. > 1) ctl.direction=PLOOP_READ tells userspace which cluster-blocks to > push out-of-band; moves processed preq-s from pending_tree to reported_tree > > 2) ctl.direction=PLOOP_WRITE tells kernel which cluster-blocks were pushed -- > they are either ordinarily processed preq-s or out-of-band ones; the kernel > match the blocks to preq-s in reported_tree and re-schedules them. > > Signed-off-by: Maxim Patlasov > --- > drivers/block/ploop/dev.c | 105 > drivers/block/ploop/push_backup.c | 197 > + > drivers/block/ploop/push_backup.h |5 + > include/linux/ploop/ploop_if.h| 23 > 4 files changed, 330 insertions(+) > > diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c > index 23da9f5..2a77d2e 100644 > --- a/drivers/block/ploop/dev.c > +++ b/drivers/block/ploop/dev.c > @@ -4529,6 +4529,108 @@ pb_init_done: > return rc; > } > > +static int ploop_push_backup_io_read(struct ploop_device *plo, unsigned long > arg, > + struct ploop_push_backup_io_ctl *ctl) > +{ > + struct ploop_push_backup_ctl_extent *e; > + unsigned n_extents = 0; > + int rc = 0; > + > + e = kmalloc(sizeof(*e) * ctl->n_extents, GFP_KERNEL); > + if (!e) > + return -ENOMEM; > + > + while (n_extents < ctl->n_extents) { > + cluster_t clu, len; > + rc = ploop_pb_get_pending(plo->pbd, &clu, &len, n_extents); > + if (rc) > + goto io_read_done; > + > + e[n_extents].clu = clu; > + e[n_extents].len = len; > + n_extents++; > + } > + > + rc = -EFAULT; > + ctl->n_extents = n_extents; > + if (copy_to_user((void*)arg, ctl, sizeof(*ctl))) > + goto io_read_done; > + if (n_extents && > + copy_to_user((void*)(arg + sizeof(*ctl)), e, > + n_extents * sizeof(*e))) > + goto io_read_done; > + rc = 0; > + > +io_read_done: > + kfree(e); > + return rc; > +} > + > +static int ploop_push_backup_io_write(struct ploop_device *plo, unsigned > long arg, > + struct ploop_push_backup_io_ctl *ctl) > +{ > + struct ploop_push_backup_ctl_extent *e; > + unsigned i; > + int rc = 0; > + > + e = kmalloc(sizeof(*e) * ctl->n_extents, GFP_KERNEL); > + if (!e) > + return -ENOMEM; > + > + rc = -EFAULT; > + if (copy_from_user(e, (void*)(arg + sizeof(*ctl)), > +ctl->n_extents * sizeof(*e))) > + goto io_write_done; > + > + rc = 0; > + for (i = 0; i < ctl->n_extents; i++) { > + cluster_t j; > + for (j = e[i].clu; j < e[i].clu + e[i].len; j++) > + ploop_pb_put_reported(plo->pbd, j, 1); > +/* OPTIMIZE ME LATER: like this: > + * ploop_pb_put_reported(plo->pbd, e[i].clu, e[i].len); */ > + } > + > +io_write_done: > + kfree(e); > + return rc; > +} > + > +static int ploop_push_backup_io(struct ploop_device *plo, unsigned long arg) > +{ > + struct ploop_push_backup_io_ctl ctl; > + struct ploop_pushbackup_desc *pbd = plo->pbd; > + > + if (list_empty(&plo->map.delta_list)) > + return -ENOENT; > + > + if (plo->maintenance_type != PLOOP_MNTN_PUSH_BACKUP) > + return -EINVAL; > + > + BUG_ON (!pbd); > + > + if (copy_from_user(&ctl, (void*)arg, sizeof(ctl))) > + return -EFAULT; > + > + if (!ctl.n_extents) > + return -EINVAL; > + > + if (ploop_pb_check_uuid(pbd, ctl.cbt_uuid)) { > + printk("ploop(%d): PUSH_BACKUP_IO uuid mismatch\n", > +plo->index); > + return -EINVAL; > + } > + > + switch(ctl.direction) { > + case PLOOP_READ: > + return ploop_push_backup_io_read(plo, arg, &ctl); > + case PLOOP_WRITE: > + return ploop_push_backup_io_write(plo, arg, &ctl); > + } > + > + return -EINVAL; > +} > + > static int ploop_push_backup_stop(struct ploop_device *plo, unsigned long > arg) > { > struct ploop_pushbackup_desc *pbd = plo->pbd; > @@ -4667,6 +4769,9 @@ static int ploop_ioctl(struct block_device *bdev, > fmode_t fmode, unsigned int cm > case PLOOP_IOC_PUSH_BACKUP_INIT: > err = ploop_push_backup_init(plo, arg); > break; > + case PLOOP_IOC_PUSH_BACKUP_IO: > + err = ploop_push_backup_io(plo, arg); > + break; > case PLOOP_IOC_PUSH_BACKUP_STOP: > err = ploop_push_backup_stop(plo, arg); > break; > diff --git a/drivers/block/ploop/push_backup.c > b/drivers/block/ploop/push_backup.c > index ecc9862..477caf7 100644 > --- a/drivers/block/ploop/push_backup.c > +++ b/drivers/block/plo