Commit 9f860e606 introduced an engine to delay fsync: doing
fallocate(FALLOC_FL_CONVERT_UNWRITTEN) dio_post_submit marks
io as PLOOP_IO_FSYNC_DELAYED to ensure that fsync happens
later, when incoming FLUSH|FUA comes.

That was deemed as important because (PSBM-47026):

> This optimization becomes more important due to the fact that customers tend 
> to use pcompact heavily => ploop images grow each day.

Now, we can easily re-use the engine to delay fsync for reloc
requests as well. As explained in the description of commit
5aa3fe09:

>     1->read_data_from_old_post
>     2->write_to_new_pos
>       ->sumbit_alloc
>          ->submit_pad
>      ->post_submit->convert_unwritten
>     3->update_index ->write_page with FLUSH|FUA
>     4->nullify_old_pos
>    5->issue_flush

by the time of step 3 extent coversion is not yet stable because
belongs to uncommitted transaction. But instead of doing fsync
inside ->post_submit, we can fsync later, as the very first step
of write_page for index_update.

Changed in v2:
 - process delayed fsync asynchronously, via PLOOP_E_FSYNC_PENDED eng_state

Changed in v3:
 - use extra arg for ploop_index_wb_proceed_or_delay() instead of ad-hoc 
PLOOP_REQ_FSYNC_IF_DELAYED

https://jira.sw.ru/browse/PSBM-47026

Signed-off-by: Maxim Patlasov <mpatla...@virtuozzo.com>
---
 drivers/block/ploop/dev.c   |    9 +++++++--
 drivers/block/ploop/map.c   |   32 ++++++++++++++++++++++++++++----
 include/linux/ploop/ploop.h |    1 +
 3 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index df3eec9..ed60b1f 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -2720,6 +2720,11 @@ restart:
                ploop_index_wb_complete(preq);
                break;
 
+       case PLOOP_E_FSYNC_PENDED:
+               /* fsync done */
+               ploop_index_wb_proceed(preq);
+               break;
+
        default:
                BUG();
        }
@@ -4106,7 +4111,7 @@ static void ploop_relocate(struct ploop_device * plo)
        preq->bl.tail = preq->bl.head = NULL;
        preq->req_cluster = 0;
        preq->req_size = 0;
-       preq->req_rw = WRITE_SYNC|REQ_FUA;
+       preq->req_rw = WRITE_SYNC;
        preq->eng_state = PLOOP_E_ENTRY;
        preq->state = (1 << PLOOP_REQ_SYNC) | (1 << PLOOP_REQ_RELOC_A);
        preq->error = 0;
@@ -4410,7 +4415,7 @@ static void ploop_relocblks_process(struct ploop_device 
*plo)
                preq->bl.tail = preq->bl.head = NULL;
                preq->req_cluster = ~0U; /* uninitialized */
                preq->req_size = 0;
-               preq->req_rw = WRITE_SYNC|REQ_FUA;
+               preq->req_rw = WRITE_SYNC;
                preq->eng_state = PLOOP_E_ENTRY;
                preq->state = (1 << PLOOP_REQ_SYNC) | (1 << PLOOP_REQ_RELOC_S);
                preq->error = 0;
diff --git a/drivers/block/ploop/map.c b/drivers/block/ploop/map.c
index 5f7fd66..715dc15 100644
--- a/drivers/block/ploop/map.c
+++ b/drivers/block/ploop/map.c
@@ -915,6 +915,24 @@ void ploop_index_wb_proceed(struct ploop_request * preq)
        put_page(page);
 }
 
+static void ploop_index_wb_proceed_or_delay(struct ploop_request * preq,
+                                           int do_fsync_if_delayed)
+{
+       if (do_fsync_if_delayed) {
+               struct map_node * m = preq->map;
+               struct ploop_delta * top_delta = map_top_delta(m->parent);
+               struct ploop_io * top_io = &top_delta->io;
+
+               if (test_bit(PLOOP_IO_FSYNC_DELAYED, &top_io->io_state)) {
+                       preq->eng_state = PLOOP_E_FSYNC_PENDED;
+                       ploop_add_req_to_fsync_queue(preq);
+                       return;
+               }
+       }
+
+       ploop_index_wb_proceed(preq);
+}
+
 /* Data write is commited. Now we need to update index. */
 
 void ploop_index_update(struct ploop_request * preq)
@@ -927,6 +945,7 @@ void ploop_index_update(struct ploop_request * preq)
        int old_level;
        struct page * page;
        unsigned long state = READ_ONCE(preq->state);
+       int do_fsync_if_delayed = 0;
 
        /* No way back, we are going to initiate index write. */
 
@@ -985,10 +1004,12 @@ void ploop_index_update(struct ploop_request * preq)
        preq->req_rw &= ~REQ_FLUSH;
 
        /* Relocate requires consistent index update */
-       if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL))
+       if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL)) {
                preq->req_index_update_rw |= (REQ_FLUSH | REQ_FUA);
+               do_fsync_if_delayed = 1;
+       }
 
-       ploop_index_wb_proceed(preq);
+       ploop_index_wb_proceed_or_delay(preq, do_fsync_if_delayed);
        return;
 
 enomem:
@@ -1109,6 +1130,7 @@ static void map_wb_complete(struct map_node * m, int err)
        int delayed = 0;
        unsigned int idx;
        unsigned long rw;
+       int do_fsync_if_delayed = 0;
 
        /* First, complete processing of written back indices,
         * finally instantiate indices in mapping cache.
@@ -1206,8 +1228,10 @@ static void map_wb_complete(struct map_node * m, int err)
 
                        state = READ_ONCE(preq->state);
                        /* Relocate requires consistent index update */
-                       if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL))
+                       if (state & 
(PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL)) {
                                rw |= (REQ_FLUSH | REQ_FUA);
+                               do_fsync_if_delayed = 1;
+                       }
 
                        preq->eng_state = PLOOP_E_INDEX_WB;
                        get_page(page);
@@ -1234,7 +1258,7 @@ static void map_wb_complete(struct map_node * m, int err)
        plo->st.map_multi_writes++;
 
        main_preq->req_index_update_rw = rw;
-       ploop_index_wb_proceed(main_preq);
+       ploop_index_wb_proceed_or_delay(main_preq, do_fsync_if_delayed);
 }
 
 void
diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
index d8e01b6..33733e9 100644
--- a/include/linux/ploop/ploop.h
+++ b/include/linux/ploop/ploop.h
@@ -513,6 +513,7 @@ enum
        PLOOP_E_ZERO_INDEX,     /* Zeroing index of free block; original request
                                   can use .submit on completion */
        PLOOP_E_DELTA_ZERO_INDEX,/* the same but for PLOOP_E_DELTA_READ */
+       PLOOP_E_FSYNC_PENDED,   /* INDEX_WB needs io->ops->sync() to proceed */
 };
 
 #define BIO_BDEV_REUSED        14      /* io_context is stored in bi_bdev */

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to