Re: [Devel] [PATCH rh7 1/4] ploop: introduce pbd

2016-04-30 Thread Dmitry Monakhov
Maxim Patlasov  writes:

> The patch introduce push_backup descriptor ("pbd") and a few simple
> functions to create and release it.
>
> Userspace can govern it by new ioctls: PLOOP_IOC_PUSH_BACKUP_INIT and
> PLOOP_IOC_PUSH_BACKUP_STOP.
Acked-by: Dmitry Monakhov 
>
> Signed-off-by: Maxim Patlasov 
> ---
>  drivers/block/ploop/Makefile  |2 
>  drivers/block/ploop/dev.c |   89 
>  drivers/block/ploop/push_backup.c |  271 
> +
>  drivers/block/ploop/push_backup.h |8 +
>  include/linux/ploop/ploop.h   |3 
>  include/linux/ploop/ploop_if.h|   19 +++
>  6 files changed, 391 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/block/ploop/push_backup.c
>  create mode 100644 drivers/block/ploop/push_backup.h
>
> diff --git a/drivers/block/ploop/Makefile b/drivers/block/ploop/Makefile
> index e36a027..0fecf16 100644
> --- a/drivers/block/ploop/Makefile
> +++ b/drivers/block/ploop/Makefile
> @@ -5,7 +5,7 @@ CFLAGS_io_direct.o = -I$(src)
>  CFLAGS_ploop_events.o = -I$(src)
>  
>  obj-$(CONFIG_BLK_DEV_PLOOP)  += ploop.o
> -ploop-objs := dev.o map.o io.o sysfs.o tracker.o freeblks.o ploop_events.o 
> discard.o
> +ploop-objs := dev.o map.o io.o sysfs.o tracker.o freeblks.o ploop_events.o 
> discard.o push_backup.o
>  
>  obj-$(CONFIG_BLK_DEV_PLOOP)  += pfmt_ploop1.o
>  pfmt_ploop1-objs := fmt_ploop1.o
> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
> index 1da073c..23da9f5 100644
> --- a/drivers/block/ploop/dev.c
> +++ b/drivers/block/ploop/dev.c
> @@ -19,6 +19,7 @@
>  #include "ploop_events.h"
>  #include "freeblks.h"
>  #include "discard.h"
> +#include "push_backup.h"
>  
>  /* Structures and terms:
>   *
> @@ -3766,6 +3767,9 @@ static int ploop_stop(struct ploop_device * plo, struct 
> block_device *bdev)
>   return -EBUSY;
>   }
>  
> + clear_bit(PLOOP_S_PUSH_BACKUP, &plo->state);
> + ploop_pb_stop(plo->pbd);
> +
>   for (p = plo->disk->minors - 1; p > 0; p--)
>   invalidate_partition(plo->disk, p);
>   invalidate_partition(plo->disk, 0);
> @@ -3892,6 +3896,7 @@ static int ploop_clear(struct ploop_device * plo, 
> struct block_device * bdev)
>   }
>  
>   ploop_fb_fini(plo->fbd, 0);
> + ploop_pb_fini(plo->pbd);
>  
>   plo->maintenance_type = PLOOP_MNTN_OFF;
>   plo->bd_size = 0;
> @@ -4477,6 +4482,84 @@ static int ploop_getdevice_ioc(unsigned long arg)
>   return err;
>  }
>  
> +static int ploop_push_backup_init(struct ploop_device *plo, unsigned long 
> arg)
> +{
> + struct ploop_push_backup_init_ctl ctl;
> + struct ploop_pushbackup_desc *pbd = NULL;
> + int rc = 0;
> +
> + if (list_empty(&plo->map.delta_list))
> + return -ENOENT;
> +
> + if (plo->maintenance_type != PLOOP_MNTN_OFF)
> + return -EINVAL;
> +
> + BUG_ON(plo->pbd);
> +
> + if (copy_from_user(&ctl, (void*)arg, sizeof(ctl)))
> + return -EFAULT;
> +
> + pbd = ploop_pb_alloc(plo);
> + if (!pbd) {
> + rc = -ENOMEM;
> + goto pb_init_done;
> + }
> +
> + ploop_quiesce(plo);
> +
> + rc = ploop_pb_init(pbd, ctl.cbt_uuid, !ctl.cbt_mask_addr);
> + if (rc) {
> + ploop_relax(plo);
> + goto pb_init_done;
> + }
> +
> + plo->pbd = pbd;
> +
> + atomic_set(&plo->maintenance_cnt, 0);
> + plo->maintenance_type = PLOOP_MNTN_PUSH_BACKUP;
> + set_bit(PLOOP_S_PUSH_BACKUP, &plo->state);
> +
> + ploop_relax(plo);
> +
> + if (ctl.cbt_mask_addr)
> + rc = ploop_pb_copy_cbt_to_user(pbd, (char *)ctl.cbt_mask_addr);
> +pb_init_done:
> + if (rc)
> + ploop_pb_fini(pbd);
> + return rc;
> +}
> +
> +static int ploop_push_backup_stop(struct ploop_device *plo, unsigned long 
> arg)
> +{
> + struct ploop_pushbackup_desc *pbd = plo->pbd;
> + struct ploop_push_backup_stop_ctl ctl;
> +
> + if (plo->maintenance_type != PLOOP_MNTN_PUSH_BACKUP)
> + return -EINVAL;
> +
> + if (copy_from_user(&ctl, (void*)arg, sizeof(ctl)))
> + return -EFAULT;
> +
> + if (pbd && ploop_pb_check_uuid(pbd, ctl.cbt_uuid)) {
> + printk("ploop(%d): PUSH_BACKUP_STOP uuid mismatch\n",
> +plo->index);
> + return -EINVAL;
> + }
> +
> + if (!test_and_clear_bit(PLOOP_S_PUSH_BACKUP, &plo->state))
> + return -EINVAL;
> +
> + BUG_ON (!pbd);
> + ctl.status = ploop_pb_stop(pbd);
> +
> + ploop_quiesce(plo);
> + ploop_pb_fini(plo->pbd);
> + plo->maintenance_type = PLOOP_MNTN_OFF;
> + ploop_relax(plo);
> +
> + return 0;
> +}
> +
>  static int ploop_ioctl(struct block_device *bdev, fmode_t fmode, unsigned 
> int cmd,
>  unsigned long arg)
>  {
> @@ -4581,6 +4664,12 @@ static int ploop_ioctl(struct block_device *bdev, 
> fmode_t fmode, unsigned int cm
>   case PLOOP_IOC_MAX_DELTA_SIZE:
>   err = ploop_se

Re: [Devel] [PATCH rh7 3/4] ploop: wire push_backup into state-machine

2016-04-30 Thread Dmitry Monakhov
Maxim Patlasov  writes:

I can not avoid obsession that this request joggling fully destroys FS
barriers assumptions.

For example: fs does
submit_bio(data_b1)
submit_bio(data_b2) 
submit_bio(commit_b3, FLUSH|FUA) journal commit record
wait_for_bio(commit_b3)
But there is no guaranee that data_b1 and data_b2 was completed already.
They can be in pedned list. In case of power-loss we have good commit
record which reference b1 and b2, but  b1 and b2 was not flushed,
which result expose of unitialized data.
In fact ext4/jbd2 will wait b1 and b2 first and only after that it will b3 so
ext4 will works fine.

Otherwise looks good.

> When ploop state-machine looks at preq first time, it suspends the preq if
> its cluster-block matches pbd->ppb_map -- the copy of CBT mask initially.
> To suspend preq we simply put it to pbd->pending_tree and plo->lockout_tree.
>
> Later, when userspace reports that out-of-band processing is done, we
> set PLOOP_REQ_PUSH_BACKUP bit in preq->state, re-schedule the preq and
> wakeup ploop state-machine. This PLOOP_REQ_PUSH_BACKUP bit lets state-machine
> know that given preq is OK and we shouldn't suspend further preq-s for
> given cluster-block anymore.
>
> Signed-off-by: Maxim Patlasov 
> ---
>  drivers/block/ploop/dev.c |   32 +++
>  drivers/block/ploop/push_backup.c |   62 
> +
>  drivers/block/ploop/push_backup.h |6 
>  include/linux/ploop/ploop.h   |1 +
>  4 files changed, 101 insertions(+)
>
> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
> index 2a77d2e..c7cc385 100644
> --- a/drivers/block/ploop/dev.c
> +++ b/drivers/block/ploop/dev.c
> @@ -2021,6 +2021,38 @@ restart:
>   return;
>   }
>  
> + /* push_backup special processing */
> + if (!test_bit(PLOOP_REQ_LOCKOUT, &preq->state) &&
> + (preq->req_rw & REQ_WRITE) && preq->req_size &&
> + ploop_pb_check_bit(plo->pbd, preq->req_cluster)) {
> + if (ploop_pb_preq_add_pending(plo->pbd, preq)) {
> + /* already reported by userspace push_backup */
> + ploop_pb_clear_bit(plo->pbd, preq->req_cluster);
> + } else {
> + spin_lock_irq(&plo->lock);
> + ploop_add_lockout(preq, 0);
> + spin_unlock_irq(&plo->lock);
> + /*
> +  * preq IN: preq is in ppb_pending tree waiting for
> +  * out-of-band push_backup processing by userspace ...
> +  */
> + return;
> + }
> + } else if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state) &&
> +test_and_clear_bit(PLOOP_REQ_PUSH_BACKUP, &preq->state)) {
> + /*
> +  * preq OUT: out-of-band push_backup processing by
> +  * userspace done; preq was re-scheduled
> +  */
> + ploop_pb_clear_bit(plo->pbd, preq->req_cluster);
> +
> + spin_lock_irq(&plo->lock);
> + del_lockout(preq);
> + if (!list_empty(&preq->delay_list))
> + list_splice_init(&preq->delay_list, 
> plo->ready_queue.prev);
> + spin_unlock_irq(&plo->lock);
> + }
> +
>   if (plo->trans_map) {
>   err = ploop_find_trans_map(plo->trans_map, preq);
>   if (err) {
> diff --git a/drivers/block/ploop/push_backup.c 
> b/drivers/block/ploop/push_backup.c
> index 477caf7..488b8fb 100644
> --- a/drivers/block/ploop/push_backup.c
> +++ b/drivers/block/ploop/push_backup.c
> @@ -146,6 +146,32 @@ static void set_bit_in_map(struct page **map, u64 
> map_max, u64 blk)
>   do_bit_in_map(map, map_max, blk, SET_BIT);
>  }
>  
> +static void clear_bit_in_map(struct page **map, u64 map_max, u64 blk)
> +{
> + do_bit_in_map(map, map_max, blk, CLEAR_BIT);
> +}
> +
> +static bool check_bit_in_map(struct page **map, u64 map_max, u64 blk)
> +{
> + return do_bit_in_map(map, map_max, blk, CHECK_BIT);
> +}
> +
> +/* intentionally lockless */
> +void ploop_pb_clear_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu)
> +{
> + BUG_ON(!pbd);
> + clear_bit_in_map(pbd->ppb_map, pbd->ppb_block_max, clu);
> +}
> +
> +/* intentionally lockless */
> +bool ploop_pb_check_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu)
> +{
> + if (!pbd)
> + return false;
> +
> + return check_bit_in_map(pbd->ppb_map, pbd->ppb_block_max, clu);
> +}
> +
>  static int convert_map_to_map(struct ploop_pushbackup_desc *pbd)
>  {
>   struct page **from_map = pbd->cbt_map;
> @@ -278,6 +304,12 @@ static void ploop_pb_add_req_to_tree(struct 
> ploop_request *preq,
>   rb_insert_color(&preq->reloc_link, tree);
>  }
>  
> +static void ploop_pb_add_req_to_pending(struct ploop_pushbackup_desc *pbd,
> + struct ploop_request *preq)
> +{
> + ploop_pb_add_req_to_tree(preq, &pbd->pending_tree);
> +

Re: [Devel] [PATCH rh7 4/4] ploop: push_backup cleanup

2016-04-30 Thread Dmitry Monakhov
Maxim Patlasov  writes:

> ploop_pb_stop() is called either explicitly, when userspace makes
> ioctl(PLOOP_IOC_PUSH_BACKUP_STOP), or implicitly on ploop shutdown
> when userspace stops ploop device by ioctl(PLOOP_IOC_STOP).
>
> In both cases, it's useful to re-schedule all suspended preq-s. Otherwise,
> we won't be able to destroy ploop because some preq-s are still not
> completed.
>
Acked-by: Dmitry Monakhov 
> Signed-off-by: Maxim Patlasov 
> ---
>  drivers/block/ploop/push_backup.c |   36 +++-
>  1 file changed, 35 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/block/ploop/push_backup.c 
> b/drivers/block/ploop/push_backup.c
> index 488b8fb..05af67c 100644
> --- a/drivers/block/ploop/push_backup.c
> +++ b/drivers/block/ploop/push_backup.c
> @@ -358,6 +358,12 @@ ploop_pb_get_first_req_from_pending(struct 
> ploop_pushbackup_desc *pbd)
>  }
>  
>  static struct ploop_request *
> +ploop_pb_get_first_req_from_reported(struct ploop_pushbackup_desc *pbd)
> +{
> + return ploop_pb_get_first_req_from_tree(&pbd->reported_tree);
> +}
> +
> +static struct ploop_request *
>  ploop_pb_get_req_from_pending(struct ploop_pushbackup_desc *pbd,
> cluster_t clu)
>  {
> @@ -400,16 +406,44 @@ int ploop_pb_preq_add_pending(struct 
> ploop_pushbackup_desc *pbd,
>  
>  unsigned long ploop_pb_stop(struct ploop_pushbackup_desc *pbd)
>  {
> + unsigned long ret = 0;
> + LIST_HEAD(drop_list);
> +
>   if (pbd == NULL)
>   return 0;
>  
>   spin_lock(&pbd->ppb_lock);
>  
> + while (!RB_EMPTY_ROOT(&pbd->pending_tree)) {
> + struct ploop_request *preq =
> + ploop_pb_get_first_req_from_pending(pbd);
> + list_add(&preq->list, &drop_list);
> + ret++;
> + }
> +
> + while (!RB_EMPTY_ROOT(&pbd->reported_tree)) {
> + struct ploop_request *preq =
> + ploop_pb_get_first_req_from_reported(pbd);
> + list_add(&preq->list, &drop_list);
> + ret++;
> + }
> +
>   if (pbd->ppb_waiting)
>   complete(&pbd->ppb_comp);
>   spin_unlock(&pbd->ppb_lock);
>  
> - return 0;
> + if (!list_empty(&drop_list)) {
> + struct ploop_device *plo = pbd->plo;
> +
> + BUG_ON(!plo);
> + spin_lock_irq(&plo->lock);
> + list_splice_init(&drop_list, plo->ready_queue.prev);
> + if (test_bit(PLOOP_S_WAIT_PROCESS, &plo->state))
> + wake_up_interruptible(&plo->waitq);
> + spin_unlock_irq(&plo->lock);
> + }
> +
> + return ret;
>  }
>  
>  int ploop_pb_get_pending(struct ploop_pushbackup_desc *pbd,


signature.asc
Description: PGP signature
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


Re: [Devel] [PATCH rh7 2/4] ploop: implement PLOOP_IOC_PUSH_BACKUP_IO

2016-04-30 Thread Dmitry Monakhov
Maxim Patlasov  writes:

> The ioctl(PLOOP_IOC_PUSH_BACKUP_IO) has two mode of operation:
>
Ack. See minor issue below.
> 1) ctl.direction=PLOOP_READ tells userspace which cluster-blocks to
> push out-of-band; moves processed preq-s from pending_tree to reported_tree
>
> 2) ctl.direction=PLOOP_WRITE tells kernel which cluster-blocks were pushed --
> they are either ordinarily processed preq-s or out-of-band ones; the kernel
> match the blocks to preq-s in reported_tree and re-schedules them.
>
> Signed-off-by: Maxim Patlasov 
> ---
>  drivers/block/ploop/dev.c |  105 
>  drivers/block/ploop/push_backup.c |  197 
> +
>  drivers/block/ploop/push_backup.h |5 +
>  include/linux/ploop/ploop_if.h|   23 
>  4 files changed, 330 insertions(+)
>
> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
> index 23da9f5..2a77d2e 100644
> --- a/drivers/block/ploop/dev.c
> +++ b/drivers/block/ploop/dev.c
> @@ -4529,6 +4529,108 @@ pb_init_done:
>   return rc;
>  }
>  
> +static int ploop_push_backup_io_read(struct ploop_device *plo, unsigned long 
> arg,
> +  struct ploop_push_backup_io_ctl *ctl)
> +{
> + struct ploop_push_backup_ctl_extent *e;
> + unsigned n_extents = 0;
> + int rc = 0;
> +
> + e = kmalloc(sizeof(*e) * ctl->n_extents, GFP_KERNEL);
> + if (!e)
> + return -ENOMEM;
> +
> + while (n_extents < ctl->n_extents) {
> + cluster_t clu, len;
> + rc = ploop_pb_get_pending(plo->pbd, &clu, &len, n_extents);
> + if (rc)
> + goto io_read_done;
> +
> + e[n_extents].clu = clu;
> + e[n_extents].len = len;
> + n_extents++;
> + }
> +
> + rc = -EFAULT;
> + ctl->n_extents = n_extents;
> + if (copy_to_user((void*)arg, ctl, sizeof(*ctl)))
> + goto io_read_done;
> + if (n_extents &&
> + copy_to_user((void*)(arg + sizeof(*ctl)), e,
> +  n_extents * sizeof(*e)))
> + goto io_read_done;
> + rc = 0;
> +
> +io_read_done:
> + kfree(e);
> + return rc;
> +}
> +
> +static int ploop_push_backup_io_write(struct ploop_device *plo, unsigned 
> long arg,
> +   struct ploop_push_backup_io_ctl *ctl)
> +{
> + struct ploop_push_backup_ctl_extent *e;
> + unsigned i;
> + int rc = 0;
> +
> + e = kmalloc(sizeof(*e) * ctl->n_extents, GFP_KERNEL);
> + if (!e)
> + return -ENOMEM;
> +
> + rc = -EFAULT;
> + if (copy_from_user(e, (void*)(arg + sizeof(*ctl)),
> +ctl->n_extents * sizeof(*e)))
> + goto io_write_done;
> +
> + rc = 0;
> + for (i = 0; i < ctl->n_extents; i++) {
> + cluster_t j;
> + for (j = e[i].clu; j < e[i].clu + e[i].len; j++)
> + ploop_pb_put_reported(plo->pbd, j, 1);
> +/* OPTIMIZE ME LATER: like this:
> +  * ploop_pb_put_reported(plo->pbd, e[i].clu, e[i].len); */
> + }
> +
> +io_write_done:
> + kfree(e);
> + return rc;
> +}
> +
> +static int ploop_push_backup_io(struct ploop_device *plo, unsigned long arg)
> +{
> + struct ploop_push_backup_io_ctl ctl;
> + struct ploop_pushbackup_desc *pbd = plo->pbd;
> +
> + if (list_empty(&plo->map.delta_list))
> + return -ENOENT;
> +
> + if (plo->maintenance_type != PLOOP_MNTN_PUSH_BACKUP)
> + return -EINVAL;
> +
> + BUG_ON (!pbd);
> +
> + if (copy_from_user(&ctl, (void*)arg, sizeof(ctl)))
> + return -EFAULT;
> +
> + if (!ctl.n_extents)
> + return -EINVAL;
> +
> + if (ploop_pb_check_uuid(pbd, ctl.cbt_uuid)) {
> + printk("ploop(%d): PUSH_BACKUP_IO uuid mismatch\n",
> +plo->index);
> + return -EINVAL;
> + }
> +
> + switch(ctl.direction) {
> + case PLOOP_READ:
> + return ploop_push_backup_io_read(plo, arg, &ctl);
> + case PLOOP_WRITE:
> + return ploop_push_backup_io_write(plo, arg, &ctl);
> + }
> +
> + return -EINVAL;
> +}
> +
>  static int ploop_push_backup_stop(struct ploop_device *plo, unsigned long 
> arg)
>  {
>   struct ploop_pushbackup_desc *pbd = plo->pbd;
> @@ -4667,6 +4769,9 @@ static int ploop_ioctl(struct block_device *bdev, 
> fmode_t fmode, unsigned int cm
>   case PLOOP_IOC_PUSH_BACKUP_INIT:
>   err = ploop_push_backup_init(plo, arg);
>   break;
> + case PLOOP_IOC_PUSH_BACKUP_IO:
> + err = ploop_push_backup_io(plo, arg);
> + break;
>   case PLOOP_IOC_PUSH_BACKUP_STOP:
>   err = ploop_push_backup_stop(plo, arg);
>   break;
> diff --git a/drivers/block/ploop/push_backup.c 
> b/drivers/block/ploop/push_backup.c
> index ecc9862..477caf7 100644
> --- a/drivers/block/ploop/push_backup.c
> +++ b/drivers/block/plo