The commit is pushed to "branch-rh7-3.10.0-327.18.2.vz7.14.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after rh7-3.10.0-327.18.2.vz7.14.11 ------> commit ee80c8760dfcb40a3159c7d1ac19b2f73036203d Author: Maxim Patlasov <mpatla...@virtuozzo.com> Date: Fri Jun 3 16:11:05 2016 +0400
ploop: push_backup: rework lockout machinery It was not very nice idea to reuse plo->lockout_tree for push_backup. Because by design only one preq (for any given req_cluster) can sit in the lockout tree, but while we're reusing the tree for a WRITE request, a READ from backup tool may come. Such a READ may want to to use the tree: see how map_index_fault calls add_lockout for snapshot configuration. The patch introduces ad-hoc separate push_backup lockout tree. This fix the issue (PSBM-47680) and makes the code much easier to understand. https://jira.sw.ru/browse/PSBM-47680 Signed-off-by: Maxim Patlasov <mpatla...@virtuozzo.com> --- drivers/block/ploop/dev.c | 111 +++++++++++++++++++++++++++++++++++-------- drivers/block/ploop/events.h | 1 + include/linux/ploop/ploop.h | 3 ++ 3 files changed, 95 insertions(+), 20 deletions(-) diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c index d3f0ec0..27827a8 100644 --- a/drivers/block/ploop/dev.c +++ b/drivers/block/ploop/dev.c @@ -1117,20 +1117,25 @@ static int ploop_congested(void *data, int bits) return ret; } -static int check_lockout(struct ploop_request *preq) +static int __check_lockout(struct ploop_request *preq, bool pb) { struct ploop_device * plo = preq->plo; - struct rb_node * n = plo->lockout_tree.rb_node; + struct rb_node * n = pb ? plo->lockout_pb_tree.rb_node : + plo->lockout_tree.rb_node; struct ploop_request * p; + int lockout_bit = pb ? PLOOP_REQ_PB_LOCKOUT : PLOOP_REQ_LOCKOUT; if (n == NULL) return 0; - if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state)) + if (test_bit(lockout_bit, &preq->state)) return 0; while (n) { - p = rb_entry(n, struct ploop_request, lockout_link); + if (pb) + p = rb_entry(n, struct ploop_request, lockout_pb_link); + else + p = rb_entry(n, struct ploop_request, lockout_link); if (preq->req_cluster < p->req_cluster) n = n->rb_left; @@ -1146,19 +1151,51 @@ static int check_lockout(struct ploop_request *preq) return 0; } -int ploop_add_lockout(struct ploop_request *preq, int try) +static int check_lockout(struct ploop_request *preq) +{ + if (__check_lockout(preq, false)) + return 1; + + /* push_backup passes READs intact */ + if (!(preq->req_rw & REQ_WRITE)) + return 0; + + if (__check_lockout(preq, true)) + return 1; + + return 0; +} + +static int __ploop_add_lockout(struct ploop_request *preq, int try, bool pb) { struct ploop_device * plo = preq->plo; - struct rb_node ** p = &plo->lockout_tree.rb_node; + struct rb_node ** p; struct rb_node *parent = NULL; struct ploop_request * pr; + struct rb_node *link; + struct rb_root *tree; + int lockout_bit; + + if (pb) { + link = &preq->lockout_pb_link; + tree = &plo->lockout_pb_tree; + lockout_bit = PLOOP_REQ_PB_LOCKOUT; + } else { + link = &preq->lockout_link; + tree = &plo->lockout_tree; + lockout_bit = PLOOP_REQ_LOCKOUT; + } - if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state)) + if (test_bit(lockout_bit, &preq->state)) return 0; + p = &tree->rb_node; while (*p) { parent = *p; - pr = rb_entry(parent, struct ploop_request, lockout_link); + if (pb) + pr = rb_entry(parent, struct ploop_request, lockout_pb_link); + else + pr = rb_entry(parent, struct ploop_request, lockout_link); if (preq->req_cluster == pr->req_cluster) { if (try) @@ -1174,23 +1211,56 @@ int ploop_add_lockout(struct ploop_request *preq, int try) trace_add_lockout(preq); - rb_link_node(&preq->lockout_link, parent, p); - rb_insert_color(&preq->lockout_link, &plo->lockout_tree); - __set_bit(PLOOP_REQ_LOCKOUT, &preq->state); + rb_link_node(link, parent, p); + rb_insert_color(link, tree); + __set_bit(lockout_bit, &preq->state); return 0; } + +int ploop_add_lockout(struct ploop_request *preq, int try) +{ + return __ploop_add_lockout(preq, try, false); +} EXPORT_SYMBOL(ploop_add_lockout); -void del_lockout(struct ploop_request *preq) +static void ploop_add_pb_lockout(struct ploop_request *preq) +{ + __ploop_add_lockout(preq, 0, true); +} + +static void __del_lockout(struct ploop_request *preq, bool pb) { struct ploop_device * plo = preq->plo; + struct rb_node *link; + struct rb_root *tree; + int lockout_bit; + + if (pb) { + link = &preq->lockout_pb_link; + tree = &plo->lockout_pb_tree; + lockout_bit = PLOOP_REQ_PB_LOCKOUT; + } else { + link = &preq->lockout_link; + tree = &plo->lockout_tree; + lockout_bit = PLOOP_REQ_LOCKOUT; + } - if (!test_and_clear_bit(PLOOP_REQ_LOCKOUT, &preq->state)) + if (!test_and_clear_bit(lockout_bit, &preq->state)) return; trace_del_lockout(preq); - rb_erase(&preq->lockout_link, &plo->lockout_tree); + rb_erase(link, tree); +} + +void del_lockout(struct ploop_request *preq) +{ + __del_lockout(preq, false); +} + +static void del_pb_lockout(struct ploop_request *preq) +{ + __del_lockout(preq, true); } static void ploop_discard_wakeup(struct ploop_request *preq, int err) @@ -1284,6 +1354,7 @@ static void ploop_complete_request(struct ploop_request * preq) spin_lock_irq(&plo->lock); del_lockout(preq); + del_pb_lockout(preq); /* preq may die via ploop_fail_immediate() */ if (!list_empty(&preq->delay_list)) list_splice_init(&preq->delay_list, plo->ready_queue.prev); @@ -2040,23 +2111,22 @@ restart: } /* push_backup special processing */ - if (!test_bit(PLOOP_REQ_LOCKOUT, &preq->state) && + if (!test_bit(PLOOP_REQ_PB_LOCKOUT, &preq->state) && (preq->req_rw & REQ_WRITE) && preq->req_size && ploop_pb_check_bit(plo->pbd, preq->req_cluster)) { if (ploop_pb_preq_add_pending(plo->pbd, preq)) { /* already reported by userspace push_backup */ ploop_pb_clear_bit(plo->pbd, preq->req_cluster); } else { - spin_lock_irq(&plo->lock); - ploop_add_lockout(preq, 0); - spin_unlock_irq(&plo->lock); + /* needn't lock because only ploop_thread accesses */ + ploop_add_pb_lockout(preq); /* * preq IN: preq is in ppb_pending tree waiting for * out-of-band push_backup processing by userspace ... */ return; } - } else if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state) && + } else if (test_bit(PLOOP_REQ_PB_LOCKOUT, &preq->state) && test_and_clear_bit(PLOOP_REQ_PUSH_BACKUP, &preq->state)) { /* * preq OUT: out-of-band push_backup processing by @@ -2064,8 +2134,8 @@ restart: */ ploop_pb_clear_bit(plo->pbd, preq->req_cluster); + del_pb_lockout(preq); spin_lock_irq(&plo->lock); - del_lockout(preq); if (!list_empty(&preq->delay_list)) list_splice_init(&preq->delay_list, plo->ready_queue.prev); spin_unlock_irq(&plo->lock); @@ -4894,6 +4964,7 @@ static struct ploop_device *__ploop_dev_alloc(int index) INIT_LIST_HEAD(&plo->entry_queue); plo->entry_tree[0] = plo->entry_tree[1] = RB_ROOT; plo->lockout_tree = RB_ROOT; + plo->lockout_pb_tree = RB_ROOT; INIT_LIST_HEAD(&plo->ready_queue); INIT_LIST_HEAD(&plo->free_list); init_waitqueue_head(&plo->waitq); diff --git a/drivers/block/ploop/events.h b/drivers/block/ploop/events.h index bc73b72..c22dbde 100644 --- a/drivers/block/ploop/events.h +++ b/drivers/block/ploop/events.h @@ -26,6 +26,7 @@ #define PRINT_PREQ_STATE(state) \ __print_flags(state, "|", \ { 1 << PLOOP_REQ_LOCKOUT, "L"}, \ + { 1 << PLOOP_REQ_PB_LOCKOUT, "BL"}, \ { 1 << PLOOP_REQ_SYNC, "S"}, \ { 1 << PLOOP_REQ_BARRIER, "B"}, \ { 1 << PLOOP_REQ_UNSTABLE, "U"}, \ diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h index 77fd833..2b63493 100644 --- a/include/linux/ploop/ploop.h +++ b/include/linux/ploop/ploop.h @@ -368,6 +368,7 @@ struct ploop_device struct list_head ready_queue; struct rb_root lockout_tree; + struct rb_root lockout_pb_tree; int cluster_log; int fmt_version; @@ -453,6 +454,7 @@ struct ploop_device enum { PLOOP_REQ_LOCKOUT, /* This preq is locking overapping requests */ + PLOOP_REQ_PB_LOCKOUT, /* This preq is locking overlapping WRITEs */ PLOOP_REQ_SYNC, PLOOP_REQ_BARRIER, PLOOP_REQ_UNSTABLE, @@ -574,6 +576,7 @@ struct ploop_request * until we allocate and initialize block in delta. */ struct rb_node lockout_link; + struct rb_node lockout_pb_link; u32 track_cluster; _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel