Maxim Patlasov <mpatla...@virtuozzo.com> writes: > Dima, > > > One week elapsed, still no feedback from you. Do you have something > against this patch? Sorry for delay Max. I was overloaded by pended crap I've collected before vacations, and lost your email. Again sorry.
Whole patch looks good. Thank you for your rede BTW: We defenitely need regression testing for original bug (broken barries and others). I'm working on that. > > > Thanks, > > Maxim > > > On 07/20/2016 11:21 PM, Maxim Patlasov wrote: >> Commit 9f860e606 introduced an engine to delay fsync: doing >> fallocate(FALLOC_FL_CONVERT_UNWRITTEN) dio_post_submit marks >> io as PLOOP_IO_FSYNC_DELAYED to ensure that fsync happens >> later, when incoming FLUSH|FUA comes. >> >> That was deemed as important because (PSBM-47026): >> >>> This optimization becomes more important due to the fact that customers >>> tend to use pcompact heavily => ploop images grow each day. >> Now, we can easily re-use the engine to delay fsync for reloc >> requests as well. As explained in the description of commit >> 5aa3fe09: >> >>> 1->read_data_from_old_post >>> 2->write_to_new_pos >>> ->sumbit_alloc >>> ->submit_pad >>> ->post_submit->convert_unwritten >>> 3->update_index ->write_page with FLUSH|FUA >>> 4->nullify_old_pos >>> 5->issue_flush >> by the time of step 3 extent coversion is not yet stable because >> belongs to uncommitted transaction. But instead of doing fsync >> inside ->post_submit, we can fsync later, as the very first step >> of write_page for index_update. >> >> Changed in v2: >> - process delayed fsync asynchronously, via PLOOP_E_FSYNC_PENDED eng_state >> >> Changed in v3: >> - use extra arg for ploop_index_wb_proceed_or_delay() instead of ad-hoc >> PLOOP_REQ_FSYNC_IF_DELAYED >> >> https://jira.sw.ru/browse/PSBM-47026 >> >> Signed-off-by: Maxim Patlasov <mpatla...@virtuozzo.com> >> --- >> drivers/block/ploop/dev.c | 9 +++++++-- >> drivers/block/ploop/map.c | 32 ++++++++++++++++++++++++++++---- >> include/linux/ploop/ploop.h | 1 + >> 3 files changed, 36 insertions(+), 6 deletions(-) >> >> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c >> index df3eec9..ed60b1f 100644 >> --- a/drivers/block/ploop/dev.c >> +++ b/drivers/block/ploop/dev.c >> @@ -2720,6 +2720,11 @@ restart: >> ploop_index_wb_complete(preq); >> break; >> >> + case PLOOP_E_FSYNC_PENDED: >> + /* fsync done */ >> + ploop_index_wb_proceed(preq); >> + break; >> + >> default: >> BUG(); >> } >> @@ -4106,7 +4111,7 @@ static void ploop_relocate(struct ploop_device * plo) >> preq->bl.tail = preq->bl.head = NULL; >> preq->req_cluster = 0; >> preq->req_size = 0; >> - preq->req_rw = WRITE_SYNC|REQ_FUA; >> + preq->req_rw = WRITE_SYNC; >> preq->eng_state = PLOOP_E_ENTRY; >> preq->state = (1 << PLOOP_REQ_SYNC) | (1 << PLOOP_REQ_RELOC_A); >> preq->error = 0; >> @@ -4410,7 +4415,7 @@ static void ploop_relocblks_process(struct >> ploop_device *plo) >> preq->bl.tail = preq->bl.head = NULL; >> preq->req_cluster = ~0U; /* uninitialized */ >> preq->req_size = 0; >> - preq->req_rw = WRITE_SYNC|REQ_FUA; >> + preq->req_rw = WRITE_SYNC; >> preq->eng_state = PLOOP_E_ENTRY; >> preq->state = (1 << PLOOP_REQ_SYNC) | (1 << PLOOP_REQ_RELOC_S); >> preq->error = 0; >> diff --git a/drivers/block/ploop/map.c b/drivers/block/ploop/map.c >> index 5f7fd66..715dc15 100644 >> --- a/drivers/block/ploop/map.c >> +++ b/drivers/block/ploop/map.c >> @@ -915,6 +915,24 @@ void ploop_index_wb_proceed(struct ploop_request * preq) >> put_page(page); >> } >> >> +static void ploop_index_wb_proceed_or_delay(struct ploop_request * preq, >> + int do_fsync_if_delayed) >> +{ >> + if (do_fsync_if_delayed) { >> + struct map_node * m = preq->map; >> + struct ploop_delta * top_delta = map_top_delta(m->parent); >> + struct ploop_io * top_io = &top_delta->io; >> + >> + if (test_bit(PLOOP_IO_FSYNC_DELAYED, &top_io->io_state)) { >> + preq->eng_state = PLOOP_E_FSYNC_PENDED; >> + ploop_add_req_to_fsync_queue(preq); >> + return; >> + } >> + } >> + >> + ploop_index_wb_proceed(preq); >> +} >> + >> /* Data write is commited. Now we need to update index. */ >> >> void ploop_index_update(struct ploop_request * preq) >> @@ -927,6 +945,7 @@ void ploop_index_update(struct ploop_request * preq) >> int old_level; >> struct page * page; >> unsigned long state = READ_ONCE(preq->state); >> + int do_fsync_if_delayed = 0; >> >> /* No way back, we are going to initiate index write. */ >> >> @@ -985,10 +1004,12 @@ void ploop_index_update(struct ploop_request * preq) >> preq->req_rw &= ~REQ_FLUSH; >> >> /* Relocate requires consistent index update */ >> - if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL)) >> + if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL)) { >> preq->req_index_update_rw |= (REQ_FLUSH | REQ_FUA); >> + do_fsync_if_delayed = 1; >> + } >> >> - ploop_index_wb_proceed(preq); >> + ploop_index_wb_proceed_or_delay(preq, do_fsync_if_delayed); >> return; >> >> enomem: >> @@ -1109,6 +1130,7 @@ static void map_wb_complete(struct map_node * m, int >> err) >> int delayed = 0; >> unsigned int idx; >> unsigned long rw; >> + int do_fsync_if_delayed = 0; >> >> /* First, complete processing of written back indices, >> * finally instantiate indices in mapping cache. >> @@ -1206,8 +1228,10 @@ static void map_wb_complete(struct map_node * m, int >> err) >> >> state = READ_ONCE(preq->state); >> /* Relocate requires consistent index update */ >> - if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL)) >> + if (state & >> (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL)) { >> rw |= (REQ_FLUSH | REQ_FUA); >> + do_fsync_if_delayed = 1; >> + } >> >> preq->eng_state = PLOOP_E_INDEX_WB; >> get_page(page); >> @@ -1234,7 +1258,7 @@ static void map_wb_complete(struct map_node * m, int >> err) >> plo->st.map_multi_writes++; >> >> main_preq->req_index_update_rw = rw; >> - ploop_index_wb_proceed(main_preq); >> + ploop_index_wb_proceed_or_delay(main_preq, do_fsync_if_delayed); >> } >> >> void >> diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h >> index d8e01b6..33733e9 100644 >> --- a/include/linux/ploop/ploop.h >> +++ b/include/linux/ploop/ploop.h >> @@ -513,6 +513,7 @@ enum >> PLOOP_E_ZERO_INDEX, /* Zeroing index of free block; original request >> can use .submit on completion */ >> PLOOP_E_DELTA_ZERO_INDEX,/* the same but for PLOOP_E_DELTA_READ */ >> + PLOOP_E_FSYNC_PENDED, /* INDEX_WB needs io->ops->sync() to proceed */ >> }; >> >> #define BIO_BDEV_REUSED 14 /* io_context is stored in bi_bdev */ >>
signature.asc
Description: PGP signature
_______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel