[Devel] [RH7 PATCH 5/6] ploop: fixup barrier handling during relocation
barrier code is broken in many ways: Currently only ->dio_submit() handles PLOOP_REQ_FORCE_{FLUSH,FUA} correctly. But request also can goes though ->dio_submit_alloc()->dio_submit_pad and write_page (for indexes) So in case of grow_dev we have following sequance: E_RELOC_DATA_READ: ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); ->delta->allocate ->io->submit_allloc: dio_submit_alloc ->dio_submit_pad E_DATA_WBI : data written, time to update index ->delta->allocate_complete:ploop_index_update ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); ->write_page ->ploop_map_wb_complete ->ploop_wb_complete_post_process ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); E_RELOC_NULLIFY: ->submit() Once we have delayed_flush engine it is easy to implement correct scheme for both engines. E_RELOC_DATA_READ ->submit_allloc => wait->post_submit->issue_flush E_DATA_WBI ->ploop_index_update with FUA E_RELOC_NULLIFY ->submit: => wait->post_submit->issue_flush This makes reloc sequence optimal: RELOC_S: R1, W2,WAIT,FLUSH, WBI:FUA RELOC_A: R1, W2,WAIT,FLUSH, WBI:FUA, W1:NULLIFY,WAIT, FLUSH https://jira.sw.ru/browse/PSBM-47107 Signed-off-by: Dmitry Monakhov --- drivers/block/ploop/dev.c | 2 +- drivers/block/ploop/io_kaio.c | 3 +-- drivers/block/ploop/map.c | 28 ++-- 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c index 95e3067..090cd2d 100644 --- a/drivers/block/ploop/dev.c +++ b/drivers/block/ploop/dev.c @@ -2533,7 +2533,7 @@ restart: sbl.head = sbl.tail = preq->aux_bio; /* Relocated data write required sync before BAT updatee */ - set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); + preq->req_rw |= REQ_FUA; if (test_bit(PLOOP_REQ_RELOC_S, &preq->state)) { preq->eng_state = PLOOP_E_DATA_WBI; diff --git a/drivers/block/ploop/io_kaio.c b/drivers/block/ploop/io_kaio.c index 5341fd5..5217ab4 100644 --- a/drivers/block/ploop/io_kaio.c +++ b/drivers/block/ploop/io_kaio.c @@ -72,8 +72,7 @@ static void kaio_complete_io_state(struct ploop_request * preq) } /* Convert requested fua to fsync */ - if (test_and_clear_bit(PLOOP_REQ_FORCE_FUA, &preq->state) || - test_and_clear_bit(PLOOP_REQ_DEL_FLUSH, &preq->state) || + if (test_and_clear_bit(PLOOP_REQ_DEL_FLUSH, &preq->state) || test_and_clear_bit(PLOOP_REQ_KAIO_FSYNC, &preq->state)) post_fsync = 1; diff --git a/drivers/block/ploop/map.c b/drivers/block/ploop/map.c index 3a6365d..ef351fb 100644 --- a/drivers/block/ploop/map.c +++ b/drivers/block/ploop/map.c @@ -901,6 +901,8 @@ void ploop_index_update(struct ploop_request * preq) int old_level; struct page * page; sector_t sec; + int fua = !!(preq->req_rw & REQ_FUA); + unsigned long state = READ_ONCE(preq->state); /* No way back, we are going to initiate index write. */ @@ -954,12 +956,11 @@ void ploop_index_update(struct ploop_request * preq) plo->st.map_single_writes++; top_delta->ops->map_index(top_delta, m->mn_start, &sec); /* Relocate requires consistent writes, mark such reqs appropriately */ - if (test_bit(PLOOP_REQ_RELOC_A, &preq->state) || - test_bit(PLOOP_REQ_RELOC_S, &preq->state)) - set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); - - top_delta->io.ops->write_page(&top_delta->io, preq, page, sec, - !!(preq->req_rw & REQ_FUA)); + if (state & (PLOOP_REQ_RELOC_A_FL | PLOOP_REQ_RELOC_S_FL)) { + WARN_ON(state & PLOOP_REQ_DEL_FLUSH_FL); + fua = 1; + } + top_delta->io.ops->write_page(&top_delta->io, preq, page, sec, fua); put_page(page); return; @@ -1063,7 +1064,7 @@ static void map_wb_complete_post_process(struct ploop_map *map, * (see dio_submit()). So fsync of EXT4 image doesnt help us. * We need to force sync of nullified blocks. */ - set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); + preq->req_rw |= REQ_FUA; top_delta->io.ops->submit(&top_delta->io, preq, preq->req_rw, &sbl, preq->iblock, 1io_queue) { struct ploop_request * preq; + unsigned long state; preq = list_entry(cursor, struct ploop_request, list); + state = READ_ONCE(preq->state); switch (preq->eng_state) { case PLOOP_E_INDEX_DELAY: @@ -1171,9 +1174,10 @@ static void map_wb_complete(struct map_node * m, int err)
Re: [Devel] [RH7 PATCH 5/6] ploop: fixup barrier handling during relocation
No reasons to keep it along with optimization patches. See please the port I'll send soon today. On 06/23/2016 10:25 AM, Dmitry Monakhov wrote: barrier code is broken in many ways: Currently only ->dio_submit() handles PLOOP_REQ_FORCE_{FLUSH,FUA} correctly. But request also can goes though ->dio_submit_alloc()->dio_submit_pad and write_page (for indexes) So in case of grow_dev we have following sequance: E_RELOC_DATA_READ: ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); ->delta->allocate ->io->submit_allloc: dio_submit_alloc ->dio_submit_pad E_DATA_WBI : data written, time to update index ->delta->allocate_complete:ploop_index_update ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); ->write_page ->ploop_map_wb_complete ->ploop_wb_complete_post_process ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); E_RELOC_NULLIFY: ->submit() Once we have delayed_flush engine it is easy to implement correct scheme for both engines. E_RELOC_DATA_READ ->submit_allloc => wait->post_submit->issue_flush E_DATA_WBI ->ploop_index_update with FUA E_RELOC_NULLIFY ->submit: => wait->post_submit->issue_flush This makes reloc sequence optimal: RELOC_S: R1, W2,WAIT,FLUSH, WBI:FUA RELOC_A: R1, W2,WAIT,FLUSH, WBI:FUA, W1:NULLIFY,WAIT, FLUSH https://jira.sw.ru/browse/PSBM-47107 Signed-off-by: Dmitry Monakhov --- drivers/block/ploop/dev.c | 2 +- drivers/block/ploop/io_kaio.c | 3 +-- drivers/block/ploop/map.c | 28 ++-- 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c index 95e3067..090cd2d 100644 --- a/drivers/block/ploop/dev.c +++ b/drivers/block/ploop/dev.c @@ -2533,7 +2533,7 @@ restart: sbl.head = sbl.tail = preq->aux_bio; /* Relocated data write required sync before BAT updatee */ - set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); + preq->req_rw |= REQ_FUA; if (test_bit(PLOOP_REQ_RELOC_S, &preq->state)) { preq->eng_state = PLOOP_E_DATA_WBI; diff --git a/drivers/block/ploop/io_kaio.c b/drivers/block/ploop/io_kaio.c index 5341fd5..5217ab4 100644 --- a/drivers/block/ploop/io_kaio.c +++ b/drivers/block/ploop/io_kaio.c @@ -72,8 +72,7 @@ static void kaio_complete_io_state(struct ploop_request * preq) } /* Convert requested fua to fsync */ - if (test_and_clear_bit(PLOOP_REQ_FORCE_FUA, &preq->state) || - test_and_clear_bit(PLOOP_REQ_DEL_FLUSH, &preq->state) || + if (test_and_clear_bit(PLOOP_REQ_DEL_FLUSH, &preq->state) || test_and_clear_bit(PLOOP_REQ_KAIO_FSYNC, &preq->state)) post_fsync = 1; diff --git a/drivers/block/ploop/map.c b/drivers/block/ploop/map.c index 3a6365d..ef351fb 100644 --- a/drivers/block/ploop/map.c +++ b/drivers/block/ploop/map.c @@ -901,6 +901,8 @@ void ploop_index_update(struct ploop_request * preq) int old_level; struct page * page; sector_t sec; + int fua = !!(preq->req_rw & REQ_FUA); + unsigned long state = READ_ONCE(preq->state); /* No way back, we are going to initiate index write. */ @@ -954,12 +956,11 @@ void ploop_index_update(struct ploop_request * preq) plo->st.map_single_writes++; top_delta->ops->map_index(top_delta, m->mn_start, &sec); /* Relocate requires consistent writes, mark such reqs appropriately */ - if (test_bit(PLOOP_REQ_RELOC_A, &preq->state) || - test_bit(PLOOP_REQ_RELOC_S, &preq->state)) - set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); - - top_delta->io.ops->write_page(&top_delta->io, preq, page, sec, - !!(preq->req_rw & REQ_FUA)); + if (state & (PLOOP_REQ_RELOC_A_FL | PLOOP_REQ_RELOC_S_FL)) { + WARN_ON(state & PLOOP_REQ_DEL_FLUSH_FL); + fua = 1; + } + top_delta->io.ops->write_page(&top_delta->io, preq, page, sec, fua); put_page(page); return; @@ -1063,7 +1064,7 @@ static void map_wb_complete_post_process(struct ploop_map *map, * (see dio_submit()). So fsync of EXT4 image doesnt help us. * We need to force sync of nullified blocks. */ - set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); + preq->req_rw |= REQ_FUA; top_delta->io.ops->submit(&top_delta->io, preq, preq->req_rw, &sbl, preq->iblock, 1io_queue) { struct ploop_request * preq; + unsigned long state; preq = list_entry(cursor, struct ploop_request, list); + state = READ_ONCE(preq->state); switch (preq->eng_state) {