Maxim Patlasov <mpatla...@virtuozzo.com> writes:

> Rebase Dima's patch on top of rh7-3.10.0-327.18.2.vz7.14.19,
> but without help of delayed_flush engine:
>
> To ensure consistency on crash/power outage/hard reboot
> events, ploop must implement the following barrier logic
> for RELOC_A|S requests:
>
> 1) After we store data to new place, but before updating
> BAT on disk, we have FLUSH everything (in fact, flushing
> those data would be enough, but it is simplier to flush
> everything).
>
> 2) We should not proceed handling RELOC_A|S until we
> 100% sure new BAT value went to disk platters. So far as
> new BAT is only one page, it's OK to mark corresponding
> bio with FUA flag for io_direct case. For io_kaio, not
> having FUA api, we have to post_fsync BAT update.
>
> PLOOP_REQ_FORCE_FLUSH/PLOOP_REQ_FORCE_FUA introduced
> long time ago probably were intended to ensure the
> logic above, but they actually didn't.
>
> The patch removes PLOOP_REQ_FORCE_FLUSH/PLOOP_REQ_FORCE_FUA,
> and implements barriers in a straightforward and simple way:
> check for RELOC_A|S explicitly and make FLUSH/FUA where
> needed.
>
> Signed-off-by: Maxim Patlasov <mpatla...@virtuozzo.com>
> ---
>  drivers/block/ploop/dev.c       |    4 ++--
>  drivers/block/ploop/io_direct.c |    7 -------
>  drivers/block/ploop/io_kaio.c   |    8 +++++---
>  drivers/block/ploop/map.c       |   22 ++++++++++------------
>  include/linux/ploop/ploop.h     |    1 -
>  5 files changed, 17 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
> index 2b60dfa..40768b6 100644
> --- a/drivers/block/ploop/dev.c
> +++ b/drivers/block/ploop/dev.c
> @@ -2610,8 +2610,8 @@ restart:
>               top_delta = ploop_top_delta(plo);
>               sbl.head = sbl.tail = preq->aux_bio;
>  
> -             /* Relocated data write required sync before BAT updatee */
> -             set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
> +             /* Relocated data write required sync before BAT update
> +              * this will happen inside index_update */
>  
>               if (test_bit(PLOOP_REQ_RELOC_S, &preq->state)) {
>                       preq->eng_state = PLOOP_E_DATA_WBI;
> diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c
> index c4d0f63..266f041 100644
> --- a/drivers/block/ploop/io_direct.c
> +++ b/drivers/block/ploop/io_direct.c
> @@ -89,15 +89,11 @@ dio_submit(struct ploop_io *io, struct ploop_request * 
> preq,
>       sector_t sec, nsec;
>       int err;
>       struct bio_list_walk bw;
> -     int postfua = 0;
>       int write = !!(rw & REQ_WRITE);
>       int delayed_fua = 0;
>  
>       trace_submit(preq);
>  
> -     if (test_and_clear_bit(PLOOP_REQ_FORCE_FUA, &preq->state))
> -             postfua = 1;
> -
>       if ((rw & REQ_FUA) && ploop_req_delay_fua_possible(preq)) {
>               /* Mark req that delayed flush required */
>               preq->req_rw |= (REQ_FLUSH | REQ_FUA);
> @@ -233,9 +229,6 @@ flush_bio:
>               b->bi_private = preq;
>               b->bi_end_io = dio_endio_async;
>  
> -             if (unlikely(postfua && !bl.head))
> -                     rw2 |= REQ_FUA;
> -
>               ploop_acc_ff_out(preq->plo, rw2 | b->bi_rw);
>               submit_bio(rw2, b);
>       }
> diff --git a/drivers/block/ploop/io_kaio.c b/drivers/block/ploop/io_kaio.c
> index ed550f4..85863df 100644
> --- a/drivers/block/ploop/io_kaio.c
> +++ b/drivers/block/ploop/io_kaio.c
> @@ -69,6 +69,8 @@ static void kaio_complete_io_state(struct ploop_request * 
> preq)
>       unsigned long flags;
>       int post_fsync = 0;
>       int need_fua = !!(preq->req_rw & REQ_FUA);
> +     unsigned long state = READ_ONCE(preq->state);
> +     int reloc = !!(state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL));
>  
>       if (preq->error || !(preq->req_rw & REQ_FUA) ||
>           preq->eng_state == PLOOP_E_INDEX_READ ||
> @@ -80,9 +82,9 @@ static void kaio_complete_io_state(struct ploop_request * 
> preq)
>       }
>  
>       /* Convert requested fua to fsync */
> -     if (test_and_clear_bit(PLOOP_REQ_FORCE_FUA, &preq->state) ||
> -         test_and_clear_bit(PLOOP_REQ_KAIO_FSYNC, &preq->state) ||
> -         (need_fua && !ploop_req_delay_fua_possible(preq))) {
This is the change I dislike the most. io_XXX should not care it is
reloc or not. Caller should rule whenether PREFLUSH/POSTFLUSH should
happen before preq completes. So IMHO this is a crunch, but correct one.

> +     if (test_and_clear_bit(PLOOP_REQ_KAIO_FSYNC, &preq->state) ||
> +         (need_fua && !ploop_req_delay_fua_possible(preq)) ||
> +         (reloc && ploop_req_delay_fua_possible(preq))) {
>               post_fsync = 1;
>               preq->req_rw &= ~REQ_FUA;
>       }
> diff --git a/drivers/block/ploop/map.c b/drivers/block/ploop/map.c
> index 915a216..1883674 100644
> --- a/drivers/block/ploop/map.c
> +++ b/drivers/block/ploop/map.c
> @@ -909,6 +909,7 @@ void ploop_index_update(struct ploop_request * preq)
>       struct page * page;
>       sector_t sec;
>       unsigned long rw;
> +     unsigned long state = READ_ONCE(preq->state);
>  
>       /* No way back, we are going to initiate index write. */
>  
> @@ -961,10 +962,6 @@ void ploop_index_update(struct ploop_request * preq)
>       __TRACE("wbi %p %u %p\n", preq, preq->req_cluster, m);
>       plo->st.map_single_writes++;
>       top_delta->ops->map_index(top_delta, m->mn_start, &sec);
> -     /* Relocate requires consistent writes, mark such reqs appropriately */
> -     if (test_bit(PLOOP_REQ_RELOC_A, &preq->state) ||
> -         test_bit(PLOOP_REQ_RELOC_S, &preq->state))
> -             set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
>  
>       rw = (preq->req_rw & (REQ_FUA | REQ_FLUSH));
>  
> @@ -972,6 +969,10 @@ void ploop_index_update(struct ploop_request * preq)
>          will do the FLUSH */
>       preq->req_rw &= ~REQ_FLUSH;
>  
> +     /* Relocate requires consistent index update */
> +     if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL))
> +             rw |= (REQ_FLUSH | REQ_FUA);
> +
>       top_delta->io.ops->write_page(&top_delta->io, preq, page, sec, rw);
>  
>       put_page(page);
> @@ -1094,7 +1095,6 @@ static void map_wb_complete(struct map_node * m, int 
> err)
>       int delayed = 0;
>       unsigned int idx;
>       sector_t sec;
> -     int force_fua;
>       unsigned long rw;
>  
>       /* First, complete processing of written back indices,
> @@ -1166,10 +1166,10 @@ static void map_wb_complete(struct map_node * m, int 
> err)
>  
>       main_preq = NULL;
>       rw = 0;
> -     force_fua = 0;
>  
>       list_for_each_safe(cursor, tmp, &m->io_queue) {
>               struct ploop_request * preq;
> +             unsigned long state;
>  
>               preq = list_entry(cursor, struct ploop_request, list);
>  
> @@ -1191,9 +1191,10 @@ static void map_wb_complete(struct map_node * m, int 
> err)
>                          will do the FLUSH */
>                       preq->req_rw &= ~REQ_FLUSH;
>  
> -                     if (test_bit(PLOOP_REQ_RELOC_A, &preq->state) ||
> -                         test_bit(PLOOP_REQ_RELOC_S, &preq->state))
> -                             force_fua = 1;
> +                     state = READ_ONCE(preq->state);
> +                     /* Relocate requires consistent index update */
> +                     if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL))
> +                             rw |= (REQ_FLUSH | REQ_FUA);
>  
>                       preq->eng_state = PLOOP_E_INDEX_WB;
>                       get_page(page);
> @@ -1220,9 +1221,6 @@ static void map_wb_complete(struct map_node * m, int 
> err)
>       plo->st.map_multi_writes++;
>       top_delta->ops->map_index(top_delta, m->mn_start, &sec);
>  
> -     if (force_fua)
> -             set_bit(PLOOP_REQ_FORCE_FUA, &main_preq->state);
> -
>       top_delta->io.ops->write_page(&top_delta->io, main_preq, page, sec,
>                                     rw);
>       put_page(page);
> diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
> index 920daf7..6f41570 100644
> --- a/include/linux/ploop/ploop.h
> +++ b/include/linux/ploop/ploop.h
> @@ -474,7 +474,6 @@ enum
>       PLOOP_REQ_ZERO,
>       PLOOP_REQ_DISCARD,
>       PLOOP_REQ_RSYNC,
> -     PLOOP_REQ_FORCE_FUA,    /*force fua of req write I/O by engine */
>       PLOOP_REQ_KAIO_FSYNC,   /*force image fsync by KAIO module */
>       PLOOP_REQ_POST_SUBMIT, /* preq needs post_submit processing */
>       PLOOP_REQ_PUSH_BACKUP, /* preq was ACKed by userspace push_backup */

Attachment: signature.asc
Description: PGP signature

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to