barrier code is broken in many ways: Currently only ->dio_submit() handles PLOOP_REQ_FORCE_{FLUSH,FUA} correctly. But request also can goes though ->dio_submit_alloc()->dio_submit_pad and write_page (for indexes) So in case of grow_dev we have following sequance:
E_RELOC_DATA_READ: ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); ->delta->allocate ->io->submit_allloc: dio_submit_alloc ->dio_submit_pad E_DATA_WBI : data written, time to update index ->delta->allocate_complete:ploop_index_update ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); ->write_page ->ploop_map_wb_complete ->ploop_wb_complete_post_process ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); E_RELOC_NULLIFY: ->submit() This patch unify barrier handling like follows: - Add assertation to ploop_complete_request for FORCE_{FLUSH,FUA} state - Perform explicit FUA inside index_update for RELOC requests. This makes reloc sequence optimal: RELOC_S: R1, W2, WBI:FUA RELOC_A: R1, W2, WBI:FUA, W1:NULLIFY:FUA https://jira.sw.ru/browse/PSBM-47107 Signed-off-by: Dmitry Monakhov <dmonak...@openvz.org> --- drivers/block/ploop/dev.c | 10 +++++++--- drivers/block/ploop/map.c | 29 ++++++++++++----------------- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c index 96f7850..998fe71 100644 --- a/drivers/block/ploop/dev.c +++ b/drivers/block/ploop/dev.c @@ -1224,6 +1224,11 @@ static void ploop_complete_request(struct ploop_request * preq) __TRACE("Z %p %u\n", preq, preq->req_cluster); + if (!preq->error) { + unsigned long state = READ_ONCE(preq->state); + WARN_ON(state & (1 << PLOOP_REQ_FORCE_FUA)); + WARN_ON(state & (1 <<PLOOP_REQ_FORCE_FLUSH)); + } while (preq->bl.head) { struct bio * bio = preq->bl.head; preq->bl.head = bio->bi_next; @@ -2530,9 +2535,8 @@ restart: top_delta = ploop_top_delta(plo); sbl.head = sbl.tail = preq->aux_bio; - /* Relocated data write required sync before BAT updatee */ - set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); - + /* Relocated data write required sync before BAT updatee + * this will happen inside index_update */ if (test_bit(PLOOP_REQ_RELOC_S, &preq->state)) { preq->eng_state = PLOOP_E_DATA_WBI; plo->st.bio_out++; diff --git a/drivers/block/ploop/map.c b/drivers/block/ploop/map.c index 3a6365d..c17e598 100644 --- a/drivers/block/ploop/map.c +++ b/drivers/block/ploop/map.c @@ -896,6 +896,7 @@ void ploop_index_update(struct ploop_request * preq) struct ploop_device * plo = preq->plo; struct map_node * m = preq->map; struct ploop_delta * top_delta = map_top_delta(m->parent); + int fua = !!(preq->req_rw & REQ_FUA); u32 idx; map_index_t blk; int old_level; @@ -953,13 +954,13 @@ void ploop_index_update(struct ploop_request * preq) __TRACE("wbi %p %u %p\n", preq, preq->req_cluster, m); plo->st.map_single_writes++; top_delta->ops->map_index(top_delta, m->mn_start, &sec); - /* Relocate requires consistent writes, mark such reqs appropriately */ + /* Relocate requires consistent index update */ if (test_bit(PLOOP_REQ_RELOC_A, &preq->state) || test_bit(PLOOP_REQ_RELOC_S, &preq->state)) - set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); - - top_delta->io.ops->write_page(&top_delta->io, preq, page, sec, - !!(preq->req_rw & REQ_FUA)); + fua = 1; + if (fua) + clear_bit(PLOOP_REQ_FORCE_FLUSH, &preq->state); + top_delta->io.ops->write_page(&top_delta->io, preq, page, sec, fua); put_page(page); return; @@ -1078,7 +1079,7 @@ static void map_wb_complete(struct map_node * m, int err) int delayed = 0; unsigned int idx; sector_t sec; - int fua, force_fua; + int fua; /* First, complete processing of written back indices, * finally instantiate indices in mapping cache. @@ -1149,7 +1150,6 @@ static void map_wb_complete(struct map_node * m, int err) main_preq = NULL; fua = 0; - force_fua = 0; list_for_each_safe(cursor, tmp, &m->io_queue) { struct ploop_request * preq; @@ -1168,13 +1168,12 @@ static void map_wb_complete(struct map_node * m, int err) break; } - if (preq->req_rw & REQ_FUA) + if (preq->req_rw & REQ_FUA || + test_bit(PLOOP_REQ_RELOC_A, &preq->state) || + test_bit(PLOOP_REQ_RELOC_S, &preq->state)) { + clear_bit(PLOOP_REQ_FORCE_FLUSH, &preq->state); fua = 1; - - if (test_bit(PLOOP_REQ_RELOC_A, &preq->state) || - test_bit(PLOOP_REQ_RELOC_S, &preq->state)) - force_fua = 1; - + } preq->eng_state = PLOOP_E_INDEX_WB; get_page(page); preq->sinfo.wi.tpage = page; @@ -1199,10 +1198,6 @@ static void map_wb_complete(struct map_node * m, int err) __TRACE("wbi2 %p %u %p\n", main_preq, main_preq->req_cluster, m); plo->st.map_multi_writes++; top_delta->ops->map_index(top_delta, m->mn_start, &sec); - - if (force_fua) - set_bit(PLOOP_REQ_FORCE_FUA, &main_preq->state); - top_delta->io.ops->write_page(&top_delta->io, main_preq, page, sec, fua); put_page(page); } -- 1.8.3.1 _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel