Maxim Patlasov <mpatla...@virtuozzo.com> writes:

I can not avoid obsession that this request joggling fully destroys FS
barriers assumptions.

For example: fs does
submit_bio(data_b1)
submit_bio(data_b2) 
submit_bio(commit_b3, FLUSH|FUA) journal commit record
wait_for_bio(commit_b3)
But there is no guaranee that data_b1 and data_b2 was completed already.
They can be in pedned list. In case of power-loss we have good commit
record which reference b1 and b2, but  b1 and b2 was not flushed,
which result expose of unitialized data.
In fact ext4/jbd2 will wait b1 and b2 first and only after that it will b3 so
ext4 will works fine.

Otherwise looks good.

> When ploop state-machine looks at preq first time, it suspends the preq if
> its cluster-block matches pbd->ppb_map -- the copy of CBT mask initially.
> To suspend preq we simply put it to pbd->pending_tree and plo->lockout_tree.
>
> Later, when userspace reports that out-of-band processing is done, we
> set PLOOP_REQ_PUSH_BACKUP bit in preq->state, re-schedule the preq and
> wakeup ploop state-machine. This PLOOP_REQ_PUSH_BACKUP bit lets state-machine
> know that given preq is OK and we shouldn't suspend further preq-s for
> given cluster-block anymore.
>
> Signed-off-by: Maxim Patlasov <mpatla...@virtuozzo.com>
> ---
>  drivers/block/ploop/dev.c         |   32 +++++++++++++++++++
>  drivers/block/ploop/push_backup.c |   62 
> +++++++++++++++++++++++++++++++++++++
>  drivers/block/ploop/push_backup.h |    6 ++++
>  include/linux/ploop/ploop.h       |    1 +
>  4 files changed, 101 insertions(+)
>
> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
> index 2a77d2e..c7cc385 100644
> --- a/drivers/block/ploop/dev.c
> +++ b/drivers/block/ploop/dev.c
> @@ -2021,6 +2021,38 @@ restart:
>               return;
>       }
>  
> +     /* push_backup special processing */
> +     if (!test_bit(PLOOP_REQ_LOCKOUT, &preq->state) &&
> +         (preq->req_rw & REQ_WRITE) && preq->req_size &&
> +         ploop_pb_check_bit(plo->pbd, preq->req_cluster)) {
> +             if (ploop_pb_preq_add_pending(plo->pbd, preq)) {
> +                     /* already reported by userspace push_backup */
> +                     ploop_pb_clear_bit(plo->pbd, preq->req_cluster);
> +             } else {
> +                     spin_lock_irq(&plo->lock);
> +                     ploop_add_lockout(preq, 0);
> +                     spin_unlock_irq(&plo->lock);
> +                     /*
> +                      * preq IN: preq is in ppb_pending tree waiting for
> +                      * out-of-band push_backup processing by userspace ...
> +                      */
> +                     return;
> +             }
> +     } else if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state) &&
> +                test_and_clear_bit(PLOOP_REQ_PUSH_BACKUP, &preq->state)) {
> +             /*
> +              * preq OUT: out-of-band push_backup processing by
> +              * userspace done; preq was re-scheduled
> +              */
> +             ploop_pb_clear_bit(plo->pbd, preq->req_cluster);
> +
> +             spin_lock_irq(&plo->lock);
> +             del_lockout(preq);
> +             if (!list_empty(&preq->delay_list))
> +                     list_splice_init(&preq->delay_list, 
> plo->ready_queue.prev);
> +             spin_unlock_irq(&plo->lock);
> +     }
> +
>       if (plo->trans_map) {
>               err = ploop_find_trans_map(plo->trans_map, preq);
>               if (err) {
> diff --git a/drivers/block/ploop/push_backup.c 
> b/drivers/block/ploop/push_backup.c
> index 477caf7..488b8fb 100644
> --- a/drivers/block/ploop/push_backup.c
> +++ b/drivers/block/ploop/push_backup.c
> @@ -146,6 +146,32 @@ static void set_bit_in_map(struct page **map, u64 
> map_max, u64 blk)
>       do_bit_in_map(map, map_max, blk, SET_BIT);
>  }
>  
> +static void clear_bit_in_map(struct page **map, u64 map_max, u64 blk)
> +{
> +     do_bit_in_map(map, map_max, blk, CLEAR_BIT);
> +}
> +
> +static bool check_bit_in_map(struct page **map, u64 map_max, u64 blk)
> +{
> +     return do_bit_in_map(map, map_max, blk, CHECK_BIT);
> +}
> +
> +/* intentionally lockless */
> +void ploop_pb_clear_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu)
> +{
> +     BUG_ON(!pbd);
> +     clear_bit_in_map(pbd->ppb_map, pbd->ppb_block_max, clu);
> +}
> +
> +/* intentionally lockless */
> +bool ploop_pb_check_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu)
> +{
> +     if (!pbd)
> +             return false;
> +
> +     return check_bit_in_map(pbd->ppb_map, pbd->ppb_block_max, clu);
> +}
> +
>  static int convert_map_to_map(struct ploop_pushbackup_desc *pbd)
>  {
>       struct page **from_map = pbd->cbt_map;
> @@ -278,6 +304,12 @@ static void ploop_pb_add_req_to_tree(struct 
> ploop_request *preq,
>       rb_insert_color(&preq->reloc_link, tree);
>  }
>  
> +static void ploop_pb_add_req_to_pending(struct ploop_pushbackup_desc *pbd,
> +                                     struct ploop_request *preq)
> +{
> +     ploop_pb_add_req_to_tree(preq, &pbd->pending_tree);
> +}
> +
>  static void ploop_pb_add_req_to_reported(struct ploop_pushbackup_desc *pbd,
>                                        struct ploop_request *preq)
>  {
> @@ -339,6 +371,33 @@ ploop_pb_get_req_from_reported(struct 
> ploop_pushbackup_desc *pbd,
>       return ploop_pb_get_req_from_tree(&pbd->reported_tree, clu);
>  }
>  
> +int ploop_pb_preq_add_pending(struct ploop_pushbackup_desc *pbd,
> +                            struct ploop_request *preq)
> +{
> +     BUG_ON(!pbd);
> +
> +     spin_lock(&pbd->ppb_lock);
> +
> +     if (!test_bit(PLOOP_S_PUSH_BACKUP, &pbd->plo->state)) {
> +             spin_unlock(&pbd->ppb_lock);
> +             return -EINTR;
> +     }
> +
> +     /* if (preq matches pbd->reported_map) return -EALREADY; */
> +     if (preq->req_cluster < pbd->ppb_offset) {
> +             spin_unlock(&pbd->ppb_lock);
> +             return -EALREADY;
> +     }
> +
> +     ploop_pb_add_req_to_pending(pbd, preq);
> +
> +     if (pbd->ppb_waiting)
> +             complete(&pbd->ppb_comp);
> +
> +     spin_unlock(&pbd->ppb_lock);
> +     return 0;
> +}
> +
>  unsigned long ploop_pb_stop(struct ploop_pushbackup_desc *pbd)
>  {
>       if (pbd == NULL)
> @@ -428,6 +487,9 @@ void ploop_pb_put_reported(struct ploop_pushbackup_desc 
> *pbd,
>       else
>               n_found++;
>  
> +     if (preq)
> +             __set_bit(PLOOP_REQ_PUSH_BACKUP, &preq->state);
> +
>       /*
>        * If preq not found above, it's unsolicited report. Then it's
>        * enough to have corresponding bit set in reported_map because if
> diff --git a/drivers/block/ploop/push_backup.h 
> b/drivers/block/ploop/push_backup.h
> index 482e070..476ac53 100644
> --- a/drivers/block/ploop/push_backup.h
> +++ b/drivers/block/ploop/push_backup.h
> @@ -11,3 +11,9 @@ int ploop_pb_get_pending(struct ploop_pushbackup_desc *pbd,
>                        cluster_t *clu_p, cluster_t *len_p, unsigned n_done);
>  void ploop_pb_put_reported(struct ploop_pushbackup_desc *pbd,
>                          cluster_t clu, cluster_t len);
> +
> +void ploop_pb_clear_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu);
> +bool ploop_pb_check_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu);
> +
> +int ploop_pb_preq_add_pending(struct ploop_pushbackup_desc *pbd,
> +                            struct ploop_request *preq);
> diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
> index 09f419d3..762d2fd 100644
> --- a/include/linux/ploop/ploop.h
> +++ b/include/linux/ploop/ploop.h
> @@ -464,6 +464,7 @@ enum
>       PLOOP_REQ_FORCE_FLUSH,  /*force flush by engine */
>       PLOOP_REQ_KAIO_FSYNC,   /*force image fsync by KAIO module */
>       PLOOP_REQ_POST_SUBMIT, /* preq needs post_submit processing */
> +     PLOOP_REQ_PUSH_BACKUP, /* preq was ACKed by userspace push_backup */
>  };
>  
>  enum

Attachment: signature.asc
Description: PGP signature

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to