Maxim Patlasov <mpatla...@virtuozzo.com> writes:
> It was not very nice idea to reuse plo->lockout_tree for push_backup. Because > by design only one preq (for any given req_cluster) can sit in the lockout > tree, but while we're reusing the tree for a WRITE request, a READ from > backup tool may come. Such a READ may want to to use the tree: see how > map_index_fault calls add_lockout for snapshot configuration. > > The patch introduces ad-hoc separate push_backup lockout tree. This fix the > issue (PSBM-47680) and makes the code much easier to understand. > > https://jira.sw.ru/browse/PSBM-47680 ACK > > Signed-off-by: Maxim Patlasov <mpatla...@virtuozzo.com> > --- > drivers/block/ploop/dev.c | 111 > ++++++++++++++++++++++++++++++++++-------- > drivers/block/ploop/events.h | 1 > include/linux/ploop/ploop.h | 3 + > 3 files changed, 95 insertions(+), 20 deletions(-) > > diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c > index d3f0ec0..27827a8 100644 > --- a/drivers/block/ploop/dev.c > +++ b/drivers/block/ploop/dev.c > @@ -1117,20 +1117,25 @@ static int ploop_congested(void *data, int bits) > return ret; > } > > -static int check_lockout(struct ploop_request *preq) > +static int __check_lockout(struct ploop_request *preq, bool pb) > { > struct ploop_device * plo = preq->plo; > - struct rb_node * n = plo->lockout_tree.rb_node; > + struct rb_node * n = pb ? plo->lockout_pb_tree.rb_node : > + plo->lockout_tree.rb_node; > struct ploop_request * p; > + int lockout_bit = pb ? PLOOP_REQ_PB_LOCKOUT : PLOOP_REQ_LOCKOUT; > > if (n == NULL) > return 0; > > - if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state)) > + if (test_bit(lockout_bit, &preq->state)) > return 0; > > while (n) { > - p = rb_entry(n, struct ploop_request, lockout_link); > + if (pb) > + p = rb_entry(n, struct ploop_request, lockout_pb_link); > + else > + p = rb_entry(n, struct ploop_request, lockout_link); > > if (preq->req_cluster < p->req_cluster) > n = n->rb_left; > @@ -1146,19 +1151,51 @@ static int check_lockout(struct ploop_request *preq) > return 0; > } > > -int ploop_add_lockout(struct ploop_request *preq, int try) > +static int check_lockout(struct ploop_request *preq) > +{ > + if (__check_lockout(preq, false)) > + return 1; > + > + /* push_backup passes READs intact */ > + if (!(preq->req_rw & REQ_WRITE)) > + return 0; > + > + if (__check_lockout(preq, true)) > + return 1; > + > + return 0; > +} > + > +static int __ploop_add_lockout(struct ploop_request *preq, int try, bool pb) > { > struct ploop_device * plo = preq->plo; > - struct rb_node ** p = &plo->lockout_tree.rb_node; > + struct rb_node ** p; > struct rb_node *parent = NULL; > struct ploop_request * pr; > + struct rb_node *link; > + struct rb_root *tree; > + int lockout_bit; > + > + if (pb) { > + link = &preq->lockout_pb_link; > + tree = &plo->lockout_pb_tree; > + lockout_bit = PLOOP_REQ_PB_LOCKOUT; > + } else { > + link = &preq->lockout_link; > + tree = &plo->lockout_tree; > + lockout_bit = PLOOP_REQ_LOCKOUT; > + } > > - if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state)) > + if (test_bit(lockout_bit, &preq->state)) > return 0; > > + p = &tree->rb_node; > while (*p) { > parent = *p; > - pr = rb_entry(parent, struct ploop_request, lockout_link); > + if (pb) > + pr = rb_entry(parent, struct ploop_request, > lockout_pb_link); > + else > + pr = rb_entry(parent, struct ploop_request, > lockout_link); > > if (preq->req_cluster == pr->req_cluster) { > if (try) > @@ -1174,23 +1211,56 @@ int ploop_add_lockout(struct ploop_request *preq, int > try) > > trace_add_lockout(preq); > > - rb_link_node(&preq->lockout_link, parent, p); > - rb_insert_color(&preq->lockout_link, &plo->lockout_tree); > - __set_bit(PLOOP_REQ_LOCKOUT, &preq->state); > + rb_link_node(link, parent, p); > + rb_insert_color(link, tree); > + __set_bit(lockout_bit, &preq->state); > return 0; > } > + > +int ploop_add_lockout(struct ploop_request *preq, int try) > +{ > + return __ploop_add_lockout(preq, try, false); > +} > EXPORT_SYMBOL(ploop_add_lockout); > > -void del_lockout(struct ploop_request *preq) > +static void ploop_add_pb_lockout(struct ploop_request *preq) > +{ > + __ploop_add_lockout(preq, 0, true); > +} > + > +static void __del_lockout(struct ploop_request *preq, bool pb) > { > struct ploop_device * plo = preq->plo; > + struct rb_node *link; > + struct rb_root *tree; > + int lockout_bit; > + > + if (pb) { > + link = &preq->lockout_pb_link; > + tree = &plo->lockout_pb_tree; > + lockout_bit = PLOOP_REQ_PB_LOCKOUT; > + } else { > + link = &preq->lockout_link; > + tree = &plo->lockout_tree; > + lockout_bit = PLOOP_REQ_LOCKOUT; > + } > > - if (!test_and_clear_bit(PLOOP_REQ_LOCKOUT, &preq->state)) > + if (!test_and_clear_bit(lockout_bit, &preq->state)) > return; > > trace_del_lockout(preq); > > - rb_erase(&preq->lockout_link, &plo->lockout_tree); > + rb_erase(link, tree); > +} > + > +void del_lockout(struct ploop_request *preq) > +{ > + __del_lockout(preq, false); > +} > + > +static void del_pb_lockout(struct ploop_request *preq) > +{ > + __del_lockout(preq, true); > } > > static void ploop_discard_wakeup(struct ploop_request *preq, int err) > @@ -1284,6 +1354,7 @@ static void ploop_complete_request(struct ploop_request > * preq) > spin_lock_irq(&plo->lock); > > del_lockout(preq); > + del_pb_lockout(preq); /* preq may die via ploop_fail_immediate() */ > > if (!list_empty(&preq->delay_list)) > list_splice_init(&preq->delay_list, plo->ready_queue.prev); > @@ -2040,23 +2111,22 @@ restart: > } > > /* push_backup special processing */ > - if (!test_bit(PLOOP_REQ_LOCKOUT, &preq->state) && > + if (!test_bit(PLOOP_REQ_PB_LOCKOUT, &preq->state) && > (preq->req_rw & REQ_WRITE) && preq->req_size && > ploop_pb_check_bit(plo->pbd, preq->req_cluster)) { > if (ploop_pb_preq_add_pending(plo->pbd, preq)) { > /* already reported by userspace push_backup */ > ploop_pb_clear_bit(plo->pbd, preq->req_cluster); > } else { > - spin_lock_irq(&plo->lock); > - ploop_add_lockout(preq, 0); > - spin_unlock_irq(&plo->lock); > + /* needn't lock because only ploop_thread accesses */ > + ploop_add_pb_lockout(preq); > /* > * preq IN: preq is in ppb_pending tree waiting for > * out-of-band push_backup processing by userspace ... > */ > return; > } > - } else if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state) && > + } else if (test_bit(PLOOP_REQ_PB_LOCKOUT, &preq->state) && > test_and_clear_bit(PLOOP_REQ_PUSH_BACKUP, &preq->state)) { > /* > * preq OUT: out-of-band push_backup processing by > @@ -2064,8 +2134,8 @@ restart: > */ > ploop_pb_clear_bit(plo->pbd, preq->req_cluster); > > + del_pb_lockout(preq); > spin_lock_irq(&plo->lock); > - del_lockout(preq); > if (!list_empty(&preq->delay_list)) > list_splice_init(&preq->delay_list, > plo->ready_queue.prev); > spin_unlock_irq(&plo->lock); > @@ -4894,6 +4964,7 @@ static struct ploop_device *__ploop_dev_alloc(int index) > INIT_LIST_HEAD(&plo->entry_queue); > plo->entry_tree[0] = plo->entry_tree[1] = RB_ROOT; > plo->lockout_tree = RB_ROOT; > + plo->lockout_pb_tree = RB_ROOT; > INIT_LIST_HEAD(&plo->ready_queue); > INIT_LIST_HEAD(&plo->free_list); > init_waitqueue_head(&plo->waitq); > diff --git a/drivers/block/ploop/events.h b/drivers/block/ploop/events.h > index bc73b72..c22dbde 100644 > --- a/drivers/block/ploop/events.h > +++ b/drivers/block/ploop/events.h > @@ -26,6 +26,7 @@ > #define PRINT_PREQ_STATE(state) \ > __print_flags(state, "|", \ > { 1 << PLOOP_REQ_LOCKOUT, "L"}, \ > + { 1 << PLOOP_REQ_PB_LOCKOUT, "BL"}, \ > { 1 << PLOOP_REQ_SYNC, "S"}, \ > { 1 << PLOOP_REQ_BARRIER, "B"}, \ > { 1 << PLOOP_REQ_UNSTABLE, "U"}, \ > diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h > index 77fd833..2b63493 100644 > --- a/include/linux/ploop/ploop.h > +++ b/include/linux/ploop/ploop.h > @@ -368,6 +368,7 @@ struct ploop_device > struct list_head ready_queue; > > struct rb_root lockout_tree; > + struct rb_root lockout_pb_tree; > > int cluster_log; > int fmt_version; > @@ -453,6 +454,7 @@ struct ploop_device > enum > { > PLOOP_REQ_LOCKOUT, /* This preq is locking overapping requests */ > + PLOOP_REQ_PB_LOCKOUT, /* This preq is locking overlapping WRITEs */ > PLOOP_REQ_SYNC, > PLOOP_REQ_BARRIER, > PLOOP_REQ_UNSTABLE, > @@ -574,6 +576,7 @@ struct ploop_request > * until we allocate and initialize block in delta. > */ > struct rb_node lockout_link; > + struct rb_node lockout_pb_link; > > u32 track_cluster; >
signature.asc
Description: PGP signature
_______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel