Maxim Patlasov <mpatla...@virtuozzo.com> writes:

> It was not very nice idea to reuse plo->lockout_tree for push_backup. Because
> by design only one preq (for any given req_cluster) can sit in the lockout
> tree, but while we're reusing the tree for a WRITE request, a READ from
> backup tool may come. Such a READ may want to to use the tree: see how
> map_index_fault calls add_lockout for snapshot configuration.
>
> The patch introduces ad-hoc separate push_backup lockout tree. This fix the
> issue (PSBM-47680) and makes the code much easier to understand.
>
> https://jira.sw.ru/browse/PSBM-47680
ACK
>
> Signed-off-by: Maxim Patlasov <mpatla...@virtuozzo.com>
> ---
>  drivers/block/ploop/dev.c    |  111 
> ++++++++++++++++++++++++++++++++++--------
>  drivers/block/ploop/events.h |    1 
>  include/linux/ploop/ploop.h  |    3 +
>  3 files changed, 95 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
> index d3f0ec0..27827a8 100644
> --- a/drivers/block/ploop/dev.c
> +++ b/drivers/block/ploop/dev.c
> @@ -1117,20 +1117,25 @@ static int ploop_congested(void *data, int bits)
>       return ret;
>  }
>  
> -static int check_lockout(struct ploop_request *preq)
> +static int __check_lockout(struct ploop_request *preq, bool pb)
>  {
>       struct ploop_device * plo = preq->plo;
> -     struct rb_node * n = plo->lockout_tree.rb_node;
> +     struct rb_node * n = pb ? plo->lockout_pb_tree.rb_node :
> +                               plo->lockout_tree.rb_node;
>       struct ploop_request * p;
> +     int lockout_bit = pb ? PLOOP_REQ_PB_LOCKOUT : PLOOP_REQ_LOCKOUT;
>  
>       if (n == NULL)
>               return 0;
>  
> -     if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state))
> +     if (test_bit(lockout_bit, &preq->state))
>               return 0;
>  
>       while (n) {
> -             p = rb_entry(n, struct ploop_request, lockout_link);
> +             if (pb)
> +                     p = rb_entry(n, struct ploop_request, lockout_pb_link);
> +             else
> +                     p = rb_entry(n, struct ploop_request, lockout_link);
>  
>               if (preq->req_cluster < p->req_cluster)
>                       n = n->rb_left;
> @@ -1146,19 +1151,51 @@ static int check_lockout(struct ploop_request *preq)
>       return 0;
>  }
>  
> -int ploop_add_lockout(struct ploop_request *preq, int try)
> +static int check_lockout(struct ploop_request *preq)
> +{
> +     if (__check_lockout(preq, false))
> +             return 1;
> +
> +     /* push_backup passes READs intact */
> +     if (!(preq->req_rw & REQ_WRITE))
> +             return 0;
> +
> +     if (__check_lockout(preq, true))
> +             return 1;
> +
> +     return 0;
> +}
> +
> +static int __ploop_add_lockout(struct ploop_request *preq, int try, bool pb)
>  {
>       struct ploop_device * plo = preq->plo;
> -     struct rb_node ** p = &plo->lockout_tree.rb_node;
> +     struct rb_node ** p;
>       struct rb_node *parent = NULL;
>       struct ploop_request * pr;
> +     struct rb_node *link;
> +     struct rb_root *tree;
> +     int lockout_bit;
> +
> +     if (pb) {
> +             link = &preq->lockout_pb_link;
> +             tree = &plo->lockout_pb_tree;
> +             lockout_bit = PLOOP_REQ_PB_LOCKOUT;
> +     } else {
> +             link = &preq->lockout_link;
> +             tree = &plo->lockout_tree;
> +             lockout_bit = PLOOP_REQ_LOCKOUT;
> +     }
>  
> -     if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state))
> +     if (test_bit(lockout_bit, &preq->state))
>               return 0;
>  
> +     p = &tree->rb_node;
>       while (*p) {
>               parent = *p;
> -             pr = rb_entry(parent, struct ploop_request, lockout_link);
> +             if (pb)
> +                     pr = rb_entry(parent, struct ploop_request, 
> lockout_pb_link);
> +             else
> +                     pr = rb_entry(parent, struct ploop_request, 
> lockout_link);
>  
>               if (preq->req_cluster == pr->req_cluster) {
>                       if (try)
> @@ -1174,23 +1211,56 @@ int ploop_add_lockout(struct ploop_request *preq, int 
> try)
>  
>       trace_add_lockout(preq);
>  
> -     rb_link_node(&preq->lockout_link, parent, p);
> -     rb_insert_color(&preq->lockout_link, &plo->lockout_tree);
> -     __set_bit(PLOOP_REQ_LOCKOUT, &preq->state);
> +     rb_link_node(link, parent, p);
> +     rb_insert_color(link, tree);
> +     __set_bit(lockout_bit, &preq->state);
>       return 0;
>  }
> +
> +int ploop_add_lockout(struct ploop_request *preq, int try)
> +{
> +     return __ploop_add_lockout(preq, try, false);
> +}
>  EXPORT_SYMBOL(ploop_add_lockout);
>  
> -void del_lockout(struct ploop_request *preq)
> +static void ploop_add_pb_lockout(struct ploop_request *preq)
> +{
> +     __ploop_add_lockout(preq, 0, true);
> +}
> +
> +static void __del_lockout(struct ploop_request *preq, bool pb)
>  {
>       struct ploop_device * plo = preq->plo;
> +     struct rb_node *link;
> +     struct rb_root *tree;
> +     int lockout_bit;
> +
> +     if (pb) {
> +             link = &preq->lockout_pb_link;
> +             tree = &plo->lockout_pb_tree;
> +             lockout_bit = PLOOP_REQ_PB_LOCKOUT;
> +     } else {
> +             link = &preq->lockout_link;
> +             tree = &plo->lockout_tree;
> +             lockout_bit = PLOOP_REQ_LOCKOUT;
> +     }
>  
> -     if (!test_and_clear_bit(PLOOP_REQ_LOCKOUT, &preq->state))
> +     if (!test_and_clear_bit(lockout_bit, &preq->state))
>               return;
>  
>       trace_del_lockout(preq);
>  
> -     rb_erase(&preq->lockout_link, &plo->lockout_tree);
> +     rb_erase(link, tree);
> +}
> +
> +void del_lockout(struct ploop_request *preq)
> +{
> +     __del_lockout(preq, false);
> +}
> +
> +static void del_pb_lockout(struct ploop_request *preq)
> +{
> +     __del_lockout(preq, true);
>  }
>  
>  static void ploop_discard_wakeup(struct ploop_request *preq, int err)
> @@ -1284,6 +1354,7 @@ static void ploop_complete_request(struct ploop_request 
> * preq)
>       spin_lock_irq(&plo->lock);
>  
>       del_lockout(preq);
> +     del_pb_lockout(preq); /* preq may die via ploop_fail_immediate() */
>  
>       if (!list_empty(&preq->delay_list))
>               list_splice_init(&preq->delay_list, plo->ready_queue.prev);
> @@ -2040,23 +2111,22 @@ restart:
>       }
>  
>       /* push_backup special processing */
> -     if (!test_bit(PLOOP_REQ_LOCKOUT, &preq->state) &&
> +     if (!test_bit(PLOOP_REQ_PB_LOCKOUT, &preq->state) &&
>           (preq->req_rw & REQ_WRITE) && preq->req_size &&
>           ploop_pb_check_bit(plo->pbd, preq->req_cluster)) {
>               if (ploop_pb_preq_add_pending(plo->pbd, preq)) {
>                       /* already reported by userspace push_backup */
>                       ploop_pb_clear_bit(plo->pbd, preq->req_cluster);
>               } else {
> -                     spin_lock_irq(&plo->lock);
> -                     ploop_add_lockout(preq, 0);
> -                     spin_unlock_irq(&plo->lock);
> +                     /* needn't lock because only ploop_thread accesses */
> +                     ploop_add_pb_lockout(preq);
>                       /*
>                        * preq IN: preq is in ppb_pending tree waiting for
>                        * out-of-band push_backup processing by userspace ...
>                        */
>                       return;
>               }
> -     } else if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state) &&
> +     } else if (test_bit(PLOOP_REQ_PB_LOCKOUT, &preq->state) &&
>                  test_and_clear_bit(PLOOP_REQ_PUSH_BACKUP, &preq->state)) {
>               /*
>                * preq OUT: out-of-band push_backup processing by
> @@ -2064,8 +2134,8 @@ restart:
>                */
>               ploop_pb_clear_bit(plo->pbd, preq->req_cluster);
>  
> +             del_pb_lockout(preq);
>               spin_lock_irq(&plo->lock);
> -             del_lockout(preq);
>               if (!list_empty(&preq->delay_list))
>                       list_splice_init(&preq->delay_list, 
> plo->ready_queue.prev);
>               spin_unlock_irq(&plo->lock);
> @@ -4894,6 +4964,7 @@ static struct ploop_device *__ploop_dev_alloc(int index)
>       INIT_LIST_HEAD(&plo->entry_queue);
>       plo->entry_tree[0] = plo->entry_tree[1] = RB_ROOT;
>       plo->lockout_tree = RB_ROOT;
> +     plo->lockout_pb_tree = RB_ROOT;
>       INIT_LIST_HEAD(&plo->ready_queue);
>       INIT_LIST_HEAD(&plo->free_list);
>       init_waitqueue_head(&plo->waitq);
> diff --git a/drivers/block/ploop/events.h b/drivers/block/ploop/events.h
> index bc73b72..c22dbde 100644
> --- a/drivers/block/ploop/events.h
> +++ b/drivers/block/ploop/events.h
> @@ -26,6 +26,7 @@
>  #define PRINT_PREQ_STATE(state)                                      \
>                       __print_flags(state, "|",               \
>                       { 1 << PLOOP_REQ_LOCKOUT,       "L"},   \
> +                     { 1 << PLOOP_REQ_PB_LOCKOUT,    "BL"},  \
>                       { 1 << PLOOP_REQ_SYNC,          "S"},   \
>                       { 1 << PLOOP_REQ_BARRIER,       "B"},   \
>                       { 1 << PLOOP_REQ_UNSTABLE,      "U"},   \
> diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
> index 77fd833..2b63493 100644
> --- a/include/linux/ploop/ploop.h
> +++ b/include/linux/ploop/ploop.h
> @@ -368,6 +368,7 @@ struct ploop_device
>       struct list_head        ready_queue;
>  
>       struct rb_root          lockout_tree;
> +     struct rb_root          lockout_pb_tree;
>  
>       int                     cluster_log;
>       int                     fmt_version;
> @@ -453,6 +454,7 @@ struct ploop_device
>  enum
>  {
>       PLOOP_REQ_LOCKOUT,      /* This preq is locking overapping requests */
> +     PLOOP_REQ_PB_LOCKOUT,   /* This preq is locking overlapping WRITEs */
>       PLOOP_REQ_SYNC,
>       PLOOP_REQ_BARRIER,
>       PLOOP_REQ_UNSTABLE,
> @@ -574,6 +576,7 @@ struct ploop_request
>        * until we allocate and initialize block in delta.
>        */
>       struct rb_node          lockout_link;
> +     struct rb_node          lockout_pb_link;
>  
>       u32                     track_cluster;
>  

Attachment: signature.asc
Description: PGP signature

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to