Maxim Patlasov <mpatla...@virtuozzo.com> writes: > Rebase Dima's patch on top of rh7-3.10.0-327.18.2.vz7.14.19, > but without help of delayed_flush engine: > > To ensure consistency on crash/power outage/hard reboot > events, ploop must implement the following barrier logic > for RELOC_A|S requests: > > 1) After we store data to new place, but before updating > BAT on disk, we have FLUSH everything (in fact, flushing > those data would be enough, but it is simplier to flush > everything). > > 2) We should not proceed handling RELOC_A|S until we > 100% sure new BAT value went to disk platters. So far as > new BAT is only one page, it's OK to mark corresponding > bio with FUA flag for io_direct case. For io_kaio, not > having FUA api, we have to post_fsync BAT update. > > PLOOP_REQ_FORCE_FLUSH/PLOOP_REQ_FORCE_FUA introduced > long time ago probably were intended to ensure the > logic above, but they actually didn't. > > The patch removes PLOOP_REQ_FORCE_FLUSH/PLOOP_REQ_FORCE_FUA, > and implements barriers in a straightforward and simple way: > check for RELOC_A|S explicitly and make FLUSH/FUA where > needed. > > Signed-off-by: Maxim Patlasov <mpatla...@virtuozzo.com> > --- > drivers/block/ploop/dev.c | 4 ++-- > drivers/block/ploop/io_direct.c | 7 ------- > drivers/block/ploop/io_kaio.c | 8 +++++--- > drivers/block/ploop/map.c | 22 ++++++++++------------ > include/linux/ploop/ploop.h | 1 - > 5 files changed, 17 insertions(+), 25 deletions(-) > > diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c > index 2b60dfa..40768b6 100644 > --- a/drivers/block/ploop/dev.c > +++ b/drivers/block/ploop/dev.c > @@ -2610,8 +2610,8 @@ restart: > top_delta = ploop_top_delta(plo); > sbl.head = sbl.tail = preq->aux_bio; > > - /* Relocated data write required sync before BAT updatee */ > - set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); > + /* Relocated data write required sync before BAT update > + * this will happen inside index_update */ > > if (test_bit(PLOOP_REQ_RELOC_S, &preq->state)) { > preq->eng_state = PLOOP_E_DATA_WBI; > diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c > index c4d0f63..266f041 100644 > --- a/drivers/block/ploop/io_direct.c > +++ b/drivers/block/ploop/io_direct.c > @@ -89,15 +89,11 @@ dio_submit(struct ploop_io *io, struct ploop_request * > preq, > sector_t sec, nsec; > int err; > struct bio_list_walk bw; > - int postfua = 0; > int write = !!(rw & REQ_WRITE); > int delayed_fua = 0; > > trace_submit(preq); > > - if (test_and_clear_bit(PLOOP_REQ_FORCE_FUA, &preq->state)) > - postfua = 1; > - > if ((rw & REQ_FUA) && ploop_req_delay_fua_possible(preq)) { > /* Mark req that delayed flush required */ > preq->req_rw |= (REQ_FLUSH | REQ_FUA); > @@ -233,9 +229,6 @@ flush_bio: > b->bi_private = preq; > b->bi_end_io = dio_endio_async; > > - if (unlikely(postfua && !bl.head)) > - rw2 |= REQ_FUA; > - > ploop_acc_ff_out(preq->plo, rw2 | b->bi_rw); > submit_bio(rw2, b); > } > diff --git a/drivers/block/ploop/io_kaio.c b/drivers/block/ploop/io_kaio.c > index ed550f4..85863df 100644 > --- a/drivers/block/ploop/io_kaio.c > +++ b/drivers/block/ploop/io_kaio.c > @@ -69,6 +69,8 @@ static void kaio_complete_io_state(struct ploop_request * > preq) > unsigned long flags; > int post_fsync = 0; > int need_fua = !!(preq->req_rw & REQ_FUA); > + unsigned long state = READ_ONCE(preq->state); > + int reloc = !!(state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL)); > > if (preq->error || !(preq->req_rw & REQ_FUA) || > preq->eng_state == PLOOP_E_INDEX_READ || > @@ -80,9 +82,9 @@ static void kaio_complete_io_state(struct ploop_request * > preq) > } > > /* Convert requested fua to fsync */ > - if (test_and_clear_bit(PLOOP_REQ_FORCE_FUA, &preq->state) || > - test_and_clear_bit(PLOOP_REQ_KAIO_FSYNC, &preq->state) || > - (need_fua && !ploop_req_delay_fua_possible(preq))) { This is the change I dislike the most. io_XXX should not care it is reloc or not. Caller should rule whenether PREFLUSH/POSTFLUSH should happen before preq completes. So IMHO this is a crunch, but correct one.
> + if (test_and_clear_bit(PLOOP_REQ_KAIO_FSYNC, &preq->state) || > + (need_fua && !ploop_req_delay_fua_possible(preq)) || > + (reloc && ploop_req_delay_fua_possible(preq))) { > post_fsync = 1; > preq->req_rw &= ~REQ_FUA; > } > diff --git a/drivers/block/ploop/map.c b/drivers/block/ploop/map.c > index 915a216..1883674 100644 > --- a/drivers/block/ploop/map.c > +++ b/drivers/block/ploop/map.c > @@ -909,6 +909,7 @@ void ploop_index_update(struct ploop_request * preq) > struct page * page; > sector_t sec; > unsigned long rw; > + unsigned long state = READ_ONCE(preq->state); > > /* No way back, we are going to initiate index write. */ > > @@ -961,10 +962,6 @@ void ploop_index_update(struct ploop_request * preq) > __TRACE("wbi %p %u %p\n", preq, preq->req_cluster, m); > plo->st.map_single_writes++; > top_delta->ops->map_index(top_delta, m->mn_start, &sec); > - /* Relocate requires consistent writes, mark such reqs appropriately */ > - if (test_bit(PLOOP_REQ_RELOC_A, &preq->state) || > - test_bit(PLOOP_REQ_RELOC_S, &preq->state)) > - set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); > > rw = (preq->req_rw & (REQ_FUA | REQ_FLUSH)); > > @@ -972,6 +969,10 @@ void ploop_index_update(struct ploop_request * preq) > will do the FLUSH */ > preq->req_rw &= ~REQ_FLUSH; > > + /* Relocate requires consistent index update */ > + if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL)) > + rw |= (REQ_FLUSH | REQ_FUA); > + > top_delta->io.ops->write_page(&top_delta->io, preq, page, sec, rw); > > put_page(page); > @@ -1094,7 +1095,6 @@ static void map_wb_complete(struct map_node * m, int > err) > int delayed = 0; > unsigned int idx; > sector_t sec; > - int force_fua; > unsigned long rw; > > /* First, complete processing of written back indices, > @@ -1166,10 +1166,10 @@ static void map_wb_complete(struct map_node * m, int > err) > > main_preq = NULL; > rw = 0; > - force_fua = 0; > > list_for_each_safe(cursor, tmp, &m->io_queue) { > struct ploop_request * preq; > + unsigned long state; > > preq = list_entry(cursor, struct ploop_request, list); > > @@ -1191,9 +1191,10 @@ static void map_wb_complete(struct map_node * m, int > err) > will do the FLUSH */ > preq->req_rw &= ~REQ_FLUSH; > > - if (test_bit(PLOOP_REQ_RELOC_A, &preq->state) || > - test_bit(PLOOP_REQ_RELOC_S, &preq->state)) > - force_fua = 1; > + state = READ_ONCE(preq->state); > + /* Relocate requires consistent index update */ > + if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL)) > + rw |= (REQ_FLUSH | REQ_FUA); > > preq->eng_state = PLOOP_E_INDEX_WB; > get_page(page); > @@ -1220,9 +1221,6 @@ static void map_wb_complete(struct map_node * m, int > err) > plo->st.map_multi_writes++; > top_delta->ops->map_index(top_delta, m->mn_start, &sec); > > - if (force_fua) > - set_bit(PLOOP_REQ_FORCE_FUA, &main_preq->state); > - > top_delta->io.ops->write_page(&top_delta->io, main_preq, page, sec, > rw); > put_page(page); > diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h > index 920daf7..6f41570 100644 > --- a/include/linux/ploop/ploop.h > +++ b/include/linux/ploop/ploop.h > @@ -474,7 +474,6 @@ enum > PLOOP_REQ_ZERO, > PLOOP_REQ_DISCARD, > PLOOP_REQ_RSYNC, > - PLOOP_REQ_FORCE_FUA, /*force fua of req write I/O by engine */ > PLOOP_REQ_KAIO_FSYNC, /*force image fsync by KAIO module */ > PLOOP_REQ_POST_SUBMIT, /* preq needs post_submit processing */ > PLOOP_REQ_PUSH_BACKUP, /* preq was ACKed by userspace push_backup */
signature.asc
Description: PGP signature
_______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel