Commit 9f860e606 introduced an engine to delay fsync: doing fallocate(FALLOC_FL_CONVERT_UNWRITTEN) dio_post_submit marks io as PLOOP_IO_FSYNC_DELAYED to ensure that fsync happens later, when incoming FLUSH|FUA comes.
That was deemed as important because (PSBM-47026): > This optimization becomes more important due to the fact that customers tend > to use pcompact heavily => ploop images grow each day. Now, we can easily re-use the engine to delay fsync for reloc requests as well. As explained in the description of commit 5aa3fe09: > 1->read_data_from_old_post > 2->write_to_new_pos > ->sumbit_alloc > ->submit_pad > ->post_submit->convert_unwritten > 3->update_index ->write_page with FLUSH|FUA > 4->nullify_old_pos > 5->issue_flush by the time of step 3 extent coversion is not yet stable because belongs to uncommitted transaction. But instead of doing fsync inside ->post_submit, we can fsync later, as the very first step of write_page for index_update. https://jira.sw.ru/browse/PSBM-47026 Signed-off-by: Maxim Patlasov <mpatla...@virtuozzo.com> --- drivers/block/ploop/dev.c | 4 ++-- drivers/block/ploop/io_direct.c | 25 ++++++++++++++++++++++++- drivers/block/ploop/io_kaio.c | 3 ++- drivers/block/ploop/map.c | 17 ++++++++++++----- include/linux/ploop/ploop.h | 3 ++- 5 files changed, 42 insertions(+), 10 deletions(-) diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c index e5f010b..40768b6 100644 --- a/drivers/block/ploop/dev.c +++ b/drivers/block/ploop/dev.c @@ -4097,7 +4097,7 @@ static void ploop_relocate(struct ploop_device * plo) preq->bl.tail = preq->bl.head = NULL; preq->req_cluster = 0; preq->req_size = 0; - preq->req_rw = WRITE_SYNC|REQ_FUA; + preq->req_rw = WRITE_SYNC; preq->eng_state = PLOOP_E_ENTRY; preq->state = (1 << PLOOP_REQ_SYNC) | (1 << PLOOP_REQ_RELOC_A); preq->error = 0; @@ -4401,7 +4401,7 @@ static void ploop_relocblks_process(struct ploop_device *plo) preq->bl.tail = preq->bl.head = NULL; preq->req_cluster = ~0U; /* uninitialized */ preq->req_size = 0; - preq->req_rw = WRITE_SYNC|REQ_FUA; + preq->req_rw = WRITE_SYNC; preq->eng_state = PLOOP_E_ENTRY; preq->state = (1 << PLOOP_REQ_SYNC) | (1 << PLOOP_REQ_RELOC_S); preq->error = 0; diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c index 1086850..0a5fb15 100644 --- a/drivers/block/ploop/io_direct.c +++ b/drivers/block/ploop/io_direct.c @@ -1494,13 +1494,36 @@ dio_read_page(struct ploop_io * io, struct ploop_request * preq, static void dio_write_page(struct ploop_io * io, struct ploop_request * preq, - struct page * page, sector_t sec, unsigned long rw) + struct page * page, sector_t sec, unsigned long rw, + int do_fsync_if_delayed) { if (!(io->files.file->f_mode & FMODE_WRITE)) { PLOOP_FAIL_REQUEST(preq, -EBADF); return; } + if (do_fsync_if_delayed && + test_bit(PLOOP_IO_FSYNC_DELAYED, &io->io_state)) { + struct ploop_device * plo = io->plo; + u64 io_count; + int err; + + spin_lock_irq(&plo->lock); + io_count = io->io_count; + spin_unlock_irq(&plo->lock); + + err = io->ops->sync(io); + if (err) { + PLOOP_FAIL_REQUEST(preq, -EBADF); + return; + } + + spin_lock_irq(&plo->lock); + if (io_count == io->io_count && !(io_count & 1)) + clear_bit(PLOOP_IO_FSYNC_DELAYED, &io->io_state); + spin_unlock_irq(&plo->lock); + } + dio_io_page(io, rw | WRITE | REQ_SYNC, preq, page, sec); } diff --git a/drivers/block/ploop/io_kaio.c b/drivers/block/ploop/io_kaio.c index 85863df..0d731ef 100644 --- a/drivers/block/ploop/io_kaio.c +++ b/drivers/block/ploop/io_kaio.c @@ -614,7 +614,8 @@ kaio_read_page(struct ploop_io * io, struct ploop_request * preq, static void kaio_write_page(struct ploop_io * io, struct ploop_request * preq, - struct page * page, sector_t sec, unsigned long rw) + struct page * page, sector_t sec, unsigned long rw, + int do_fsync_if_delayed) { ploop_prepare_tracker(preq, sec); diff --git a/drivers/block/ploop/map.c b/drivers/block/ploop/map.c index 1883674..96e428b 100644 --- a/drivers/block/ploop/map.c +++ b/drivers/block/ploop/map.c @@ -910,6 +910,7 @@ void ploop_index_update(struct ploop_request * preq) sector_t sec; unsigned long rw; unsigned long state = READ_ONCE(preq->state); + int do_fsync_if_delayed = 0; /* No way back, we are going to initiate index write. */ @@ -970,10 +971,13 @@ void ploop_index_update(struct ploop_request * preq) preq->req_rw &= ~REQ_FLUSH; /* Relocate requires consistent index update */ - if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL)) + if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL)) { rw |= (REQ_FLUSH | REQ_FUA); + do_fsync_if_delayed = 1; + } - top_delta->io.ops->write_page(&top_delta->io, preq, page, sec, rw); + top_delta->io.ops->write_page(&top_delta->io, preq, page, sec, rw, + do_fsync_if_delayed); put_page(page); return; @@ -1096,6 +1100,7 @@ static void map_wb_complete(struct map_node * m, int err) unsigned int idx; sector_t sec; unsigned long rw; + int do_fsync_if_delayed = 0; /* First, complete processing of written back indices, * finally instantiate indices in mapping cache. @@ -1193,8 +1198,10 @@ static void map_wb_complete(struct map_node * m, int err) state = READ_ONCE(preq->state); /* Relocate requires consistent index update */ - if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL)) + if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL)) { rw |= (REQ_FLUSH | REQ_FUA); + do_fsync_if_delayed = 1; + } preq->eng_state = PLOOP_E_INDEX_WB; get_page(page); @@ -1221,8 +1228,8 @@ static void map_wb_complete(struct map_node * m, int err) plo->st.map_multi_writes++; top_delta->ops->map_index(top_delta, m->mn_start, &sec); - top_delta->io.ops->write_page(&top_delta->io, main_preq, page, sec, - rw); + top_delta->io.ops->write_page(&top_delta->io, main_preq, page, sec, rw, + do_fsync_if_delayed); put_page(page); } diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h index deee8a7..b03565b 100644 --- a/include/linux/ploop/ploop.h +++ b/include/linux/ploop/ploop.h @@ -164,7 +164,8 @@ struct ploop_io_ops void (*read_page)(struct ploop_io * io, struct ploop_request * preq, struct page * page, sector_t sec); void (*write_page)(struct ploop_io * io, struct ploop_request * preq, - struct page * page, sector_t sec, unsigned long rw); + struct page * page, sector_t sec, unsigned long rw, + int do_fsync_if_delayed); int (*sync_read)(struct ploop_io * io, struct page * page, _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel