The commit is pushed to "branch-rh7-3.10.0-327.18.2.vz7.14.x-ovz" and will 
appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-327.18.2.vz7.14.11
------>
commit ee80c8760dfcb40a3159c7d1ac19b2f73036203d
Author: Maxim Patlasov <mpatla...@virtuozzo.com>
Date:   Fri Jun 3 16:11:05 2016 +0400

    ploop: push_backup: rework lockout machinery
    
    It was not very nice idea to reuse plo->lockout_tree for push_backup. 
Because
    by design only one preq (for any given req_cluster) can sit in the lockout
    tree, but while we're reusing the tree for a WRITE request, a READ from
    backup tool may come. Such a READ may want to to use the tree: see how
    map_index_fault calls add_lockout for snapshot configuration.
    
    The patch introduces ad-hoc separate push_backup lockout tree. This fix the
    issue (PSBM-47680) and makes the code much easier to understand.
    
    https://jira.sw.ru/browse/PSBM-47680
    
    Signed-off-by: Maxim Patlasov <mpatla...@virtuozzo.com>
---
 drivers/block/ploop/dev.c    | 111 +++++++++++++++++++++++++++++++++++--------
 drivers/block/ploop/events.h |   1 +
 include/linux/ploop/ploop.h  |   3 ++
 3 files changed, 95 insertions(+), 20 deletions(-)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index d3f0ec0..27827a8 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -1117,20 +1117,25 @@ static int ploop_congested(void *data, int bits)
        return ret;
 }
 
-static int check_lockout(struct ploop_request *preq)
+static int __check_lockout(struct ploop_request *preq, bool pb)
 {
        struct ploop_device * plo = preq->plo;
-       struct rb_node * n = plo->lockout_tree.rb_node;
+       struct rb_node * n = pb ? plo->lockout_pb_tree.rb_node :
+                                 plo->lockout_tree.rb_node;
        struct ploop_request * p;
+       int lockout_bit = pb ? PLOOP_REQ_PB_LOCKOUT : PLOOP_REQ_LOCKOUT;
 
        if (n == NULL)
                return 0;
 
-       if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state))
+       if (test_bit(lockout_bit, &preq->state))
                return 0;
 
        while (n) {
-               p = rb_entry(n, struct ploop_request, lockout_link);
+               if (pb)
+                       p = rb_entry(n, struct ploop_request, lockout_pb_link);
+               else
+                       p = rb_entry(n, struct ploop_request, lockout_link);
 
                if (preq->req_cluster < p->req_cluster)
                        n = n->rb_left;
@@ -1146,19 +1151,51 @@ static int check_lockout(struct ploop_request *preq)
        return 0;
 }
 
-int ploop_add_lockout(struct ploop_request *preq, int try)
+static int check_lockout(struct ploop_request *preq)
+{
+       if (__check_lockout(preq, false))
+               return 1;
+
+       /* push_backup passes READs intact */
+       if (!(preq->req_rw & REQ_WRITE))
+               return 0;
+
+       if (__check_lockout(preq, true))
+               return 1;
+
+       return 0;
+}
+
+static int __ploop_add_lockout(struct ploop_request *preq, int try, bool pb)
 {
        struct ploop_device * plo = preq->plo;
-       struct rb_node ** p = &plo->lockout_tree.rb_node;
+       struct rb_node ** p;
        struct rb_node *parent = NULL;
        struct ploop_request * pr;
+       struct rb_node *link;
+       struct rb_root *tree;
+       int lockout_bit;
+
+       if (pb) {
+               link = &preq->lockout_pb_link;
+               tree = &plo->lockout_pb_tree;
+               lockout_bit = PLOOP_REQ_PB_LOCKOUT;
+       } else {
+               link = &preq->lockout_link;
+               tree = &plo->lockout_tree;
+               lockout_bit = PLOOP_REQ_LOCKOUT;
+       }
 
-       if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state))
+       if (test_bit(lockout_bit, &preq->state))
                return 0;
 
+       p = &tree->rb_node;
        while (*p) {
                parent = *p;
-               pr = rb_entry(parent, struct ploop_request, lockout_link);
+               if (pb)
+                       pr = rb_entry(parent, struct ploop_request, 
lockout_pb_link);
+               else
+                       pr = rb_entry(parent, struct ploop_request, 
lockout_link);
 
                if (preq->req_cluster == pr->req_cluster) {
                        if (try)
@@ -1174,23 +1211,56 @@ int ploop_add_lockout(struct ploop_request *preq, int 
try)
 
        trace_add_lockout(preq);
 
-       rb_link_node(&preq->lockout_link, parent, p);
-       rb_insert_color(&preq->lockout_link, &plo->lockout_tree);
-       __set_bit(PLOOP_REQ_LOCKOUT, &preq->state);
+       rb_link_node(link, parent, p);
+       rb_insert_color(link, tree);
+       __set_bit(lockout_bit, &preq->state);
        return 0;
 }
+
+int ploop_add_lockout(struct ploop_request *preq, int try)
+{
+       return __ploop_add_lockout(preq, try, false);
+}
 EXPORT_SYMBOL(ploop_add_lockout);
 
-void del_lockout(struct ploop_request *preq)
+static void ploop_add_pb_lockout(struct ploop_request *preq)
+{
+       __ploop_add_lockout(preq, 0, true);
+}
+
+static void __del_lockout(struct ploop_request *preq, bool pb)
 {
        struct ploop_device * plo = preq->plo;
+       struct rb_node *link;
+       struct rb_root *tree;
+       int lockout_bit;
+
+       if (pb) {
+               link = &preq->lockout_pb_link;
+               tree = &plo->lockout_pb_tree;
+               lockout_bit = PLOOP_REQ_PB_LOCKOUT;
+       } else {
+               link = &preq->lockout_link;
+               tree = &plo->lockout_tree;
+               lockout_bit = PLOOP_REQ_LOCKOUT;
+       }
 
-       if (!test_and_clear_bit(PLOOP_REQ_LOCKOUT, &preq->state))
+       if (!test_and_clear_bit(lockout_bit, &preq->state))
                return;
 
        trace_del_lockout(preq);
 
-       rb_erase(&preq->lockout_link, &plo->lockout_tree);
+       rb_erase(link, tree);
+}
+
+void del_lockout(struct ploop_request *preq)
+{
+       __del_lockout(preq, false);
+}
+
+static void del_pb_lockout(struct ploop_request *preq)
+{
+       __del_lockout(preq, true);
 }
 
 static void ploop_discard_wakeup(struct ploop_request *preq, int err)
@@ -1284,6 +1354,7 @@ static void ploop_complete_request(struct ploop_request * 
preq)
        spin_lock_irq(&plo->lock);
 
        del_lockout(preq);
+       del_pb_lockout(preq); /* preq may die via ploop_fail_immediate() */
 
        if (!list_empty(&preq->delay_list))
                list_splice_init(&preq->delay_list, plo->ready_queue.prev);
@@ -2040,23 +2111,22 @@ restart:
        }
 
        /* push_backup special processing */
-       if (!test_bit(PLOOP_REQ_LOCKOUT, &preq->state) &&
+       if (!test_bit(PLOOP_REQ_PB_LOCKOUT, &preq->state) &&
            (preq->req_rw & REQ_WRITE) && preq->req_size &&
            ploop_pb_check_bit(plo->pbd, preq->req_cluster)) {
                if (ploop_pb_preq_add_pending(plo->pbd, preq)) {
                        /* already reported by userspace push_backup */
                        ploop_pb_clear_bit(plo->pbd, preq->req_cluster);
                } else {
-                       spin_lock_irq(&plo->lock);
-                       ploop_add_lockout(preq, 0);
-                       spin_unlock_irq(&plo->lock);
+                       /* needn't lock because only ploop_thread accesses */
+                       ploop_add_pb_lockout(preq);
                        /*
                         * preq IN: preq is in ppb_pending tree waiting for
                         * out-of-band push_backup processing by userspace ...
                         */
                        return;
                }
-       } else if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state) &&
+       } else if (test_bit(PLOOP_REQ_PB_LOCKOUT, &preq->state) &&
                   test_and_clear_bit(PLOOP_REQ_PUSH_BACKUP, &preq->state)) {
                /*
                 * preq OUT: out-of-band push_backup processing by
@@ -2064,8 +2134,8 @@ restart:
                 */
                ploop_pb_clear_bit(plo->pbd, preq->req_cluster);
 
+               del_pb_lockout(preq);
                spin_lock_irq(&plo->lock);
-               del_lockout(preq);
                if (!list_empty(&preq->delay_list))
                        list_splice_init(&preq->delay_list, 
plo->ready_queue.prev);
                spin_unlock_irq(&plo->lock);
@@ -4894,6 +4964,7 @@ static struct ploop_device *__ploop_dev_alloc(int index)
        INIT_LIST_HEAD(&plo->entry_queue);
        plo->entry_tree[0] = plo->entry_tree[1] = RB_ROOT;
        plo->lockout_tree = RB_ROOT;
+       plo->lockout_pb_tree = RB_ROOT;
        INIT_LIST_HEAD(&plo->ready_queue);
        INIT_LIST_HEAD(&plo->free_list);
        init_waitqueue_head(&plo->waitq);
diff --git a/drivers/block/ploop/events.h b/drivers/block/ploop/events.h
index bc73b72..c22dbde 100644
--- a/drivers/block/ploop/events.h
+++ b/drivers/block/ploop/events.h
@@ -26,6 +26,7 @@
 #define PRINT_PREQ_STATE(state)                                        \
                        __print_flags(state, "|",               \
                        { 1 << PLOOP_REQ_LOCKOUT,       "L"},   \
+                       { 1 << PLOOP_REQ_PB_LOCKOUT,    "BL"},  \
                        { 1 << PLOOP_REQ_SYNC,          "S"},   \
                        { 1 << PLOOP_REQ_BARRIER,       "B"},   \
                        { 1 << PLOOP_REQ_UNSTABLE,      "U"},   \
diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
index 77fd833..2b63493 100644
--- a/include/linux/ploop/ploop.h
+++ b/include/linux/ploop/ploop.h
@@ -368,6 +368,7 @@ struct ploop_device
        struct list_head        ready_queue;
 
        struct rb_root          lockout_tree;
+       struct rb_root          lockout_pb_tree;
 
        int                     cluster_log;
        int                     fmt_version;
@@ -453,6 +454,7 @@ struct ploop_device
 enum
 {
        PLOOP_REQ_LOCKOUT,      /* This preq is locking overapping requests */
+       PLOOP_REQ_PB_LOCKOUT,   /* This preq is locking overlapping WRITEs */
        PLOOP_REQ_SYNC,
        PLOOP_REQ_BARRIER,
        PLOOP_REQ_UNSTABLE,
@@ -574,6 +576,7 @@ struct ploop_request
         * until we allocate and initialize block in delta.
         */
        struct rb_node          lockout_link;
+       struct rb_node          lockout_pb_link;
 
        u32                     track_cluster;
 
_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to