[Devel] [PATCH rh7 3/3] ploop: io_direct: delay f_op->fsync() until index_update for reloc requests (v3)

2016-07-20 Thread Maxim Patlasov
Commit 9f860e606 introduced an engine to delay fsync: doing
fallocate(FALLOC_FL_CONVERT_UNWRITTEN) dio_post_submit marks
io as PLOOP_IO_FSYNC_DELAYED to ensure that fsync happens
later, when incoming FLUSH|FUA comes.

That was deemed as important because (PSBM-47026):

> This optimization becomes more important due to the fact that customers tend 
> to use pcompact heavily => ploop images grow each day.

Now, we can easily re-use the engine to delay fsync for reloc
requests as well. As explained in the description of commit
5aa3fe09:

> 1->read_data_from_old_post
> 2->write_to_new_pos
>   ->sumbit_alloc
>  ->submit_pad
>  ->post_submit->convert_unwritten
> 3->update_index ->write_page with FLUSH|FUA
> 4->nullify_old_pos
>5->issue_flush

by the time of step 3 extent coversion is not yet stable because
belongs to uncommitted transaction. But instead of doing fsync
inside ->post_submit, we can fsync later, as the very first step
of write_page for index_update.

Changed in v2:
 - process delayed fsync asynchronously, via PLOOP_E_FSYNC_PENDED eng_state

Changed in v3:
 - use extra arg for ploop_index_wb_proceed_or_delay() instead of ad-hoc 
PLOOP_REQ_FSYNC_IF_DELAYED

https://jira.sw.ru/browse/PSBM-47026

Signed-off-by: Maxim Patlasov 
---
 drivers/block/ploop/dev.c   |9 +++--
 drivers/block/ploop/map.c   |   32 
 include/linux/ploop/ploop.h |1 +
 3 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index df3eec9..ed60b1f 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -2720,6 +2720,11 @@ restart:
ploop_index_wb_complete(preq);
break;
 
+   case PLOOP_E_FSYNC_PENDED:
+   /* fsync done */
+   ploop_index_wb_proceed(preq);
+   break;
+
default:
BUG();
}
@@ -4106,7 +4111,7 @@ static void ploop_relocate(struct ploop_device * plo)
preq->bl.tail = preq->bl.head = NULL;
preq->req_cluster = 0;
preq->req_size = 0;
-   preq->req_rw = WRITE_SYNC|REQ_FUA;
+   preq->req_rw = WRITE_SYNC;
preq->eng_state = PLOOP_E_ENTRY;
preq->state = (1 << PLOOP_REQ_SYNC) | (1 << PLOOP_REQ_RELOC_A);
preq->error = 0;
@@ -4410,7 +4415,7 @@ static void ploop_relocblks_process(struct ploop_device 
*plo)
preq->bl.tail = preq->bl.head = NULL;
preq->req_cluster = ~0U; /* uninitialized */
preq->req_size = 0;
-   preq->req_rw = WRITE_SYNC|REQ_FUA;
+   preq->req_rw = WRITE_SYNC;
preq->eng_state = PLOOP_E_ENTRY;
preq->state = (1 << PLOOP_REQ_SYNC) | (1 << PLOOP_REQ_RELOC_S);
preq->error = 0;
diff --git a/drivers/block/ploop/map.c b/drivers/block/ploop/map.c
index 5f7fd66..715dc15 100644
--- a/drivers/block/ploop/map.c
+++ b/drivers/block/ploop/map.c
@@ -915,6 +915,24 @@ void ploop_index_wb_proceed(struct ploop_request * preq)
put_page(page);
 }
 
+static void ploop_index_wb_proceed_or_delay(struct ploop_request * preq,
+   int do_fsync_if_delayed)
+{
+   if (do_fsync_if_delayed) {
+   struct map_node * m = preq->map;
+   struct ploop_delta * top_delta = map_top_delta(m->parent);
+   struct ploop_io * top_io = &top_delta->io;
+
+   if (test_bit(PLOOP_IO_FSYNC_DELAYED, &top_io->io_state)) {
+   preq->eng_state = PLOOP_E_FSYNC_PENDED;
+   ploop_add_req_to_fsync_queue(preq);
+   return;
+   }
+   }
+
+   ploop_index_wb_proceed(preq);
+}
+
 /* Data write is commited. Now we need to update index. */
 
 void ploop_index_update(struct ploop_request * preq)
@@ -927,6 +945,7 @@ void ploop_index_update(struct ploop_request * preq)
int old_level;
struct page * page;
unsigned long state = READ_ONCE(preq->state);
+   int do_fsync_if_delayed = 0;
 
/* No way back, we are going to initiate index write. */
 
@@ -985,10 +1004,12 @@ void ploop_index_update(struct ploop_request * preq)
preq->req_rw &= ~REQ_FLUSH;
 
/* Relocate requires consistent index update */
-   if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL))
+   if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL)) {
preq->req_index_update_rw |= (REQ_FLUSH | REQ_FUA);
+   do_fsync_if_delayed = 1;
+   }
 
-   ploop_index_wb_proceed(preq);
+   ploop_index_wb_proceed_or_delay(preq, do_fsync_if_delayed);
return;
 
 enomem:
@@ -1109,6 +1130,7 @@ static void map_wb_complete(struct map_node * m, int err)
int delayed = 0;
unsigned int idx;
unsigned long rw;
+   int do_fsync_if_delayed = 0;
 
/* First, complete processing of written back indices,
   

Re: [Devel] [PATCH rh7 3/3] ploop: io_direct: delay f_op->fsync() until index_update for reloc requests (v3)

2016-07-27 Thread Maxim Patlasov

Dima,


One week elapsed, still no feedback from you. Do you have something 
against this patch?



Thanks,

Maxim


On 07/20/2016 11:21 PM, Maxim Patlasov wrote:

Commit 9f860e606 introduced an engine to delay fsync: doing
fallocate(FALLOC_FL_CONVERT_UNWRITTEN) dio_post_submit marks
io as PLOOP_IO_FSYNC_DELAYED to ensure that fsync happens
later, when incoming FLUSH|FUA comes.

That was deemed as important because (PSBM-47026):


This optimization becomes more important due to the fact that customers tend to 
use pcompact heavily => ploop images grow each day.

Now, we can easily re-use the engine to delay fsync for reloc
requests as well. As explained in the description of commit
5aa3fe09:


 1->read_data_from_old_post
 2->write_to_new_pos
   ->sumbit_alloc
  ->submit_pad
  ->post_submit->convert_unwritten
 3->update_index ->write_page with FLUSH|FUA
 4->nullify_old_pos
5->issue_flush

by the time of step 3 extent coversion is not yet stable because
belongs to uncommitted transaction. But instead of doing fsync
inside ->post_submit, we can fsync later, as the very first step
of write_page for index_update.

Changed in v2:
  - process delayed fsync asynchronously, via PLOOP_E_FSYNC_PENDED eng_state

Changed in v3:
  - use extra arg for ploop_index_wb_proceed_or_delay() instead of ad-hoc 
PLOOP_REQ_FSYNC_IF_DELAYED

https://jira.sw.ru/browse/PSBM-47026

Signed-off-by: Maxim Patlasov 
---
  drivers/block/ploop/dev.c   |9 +++--
  drivers/block/ploop/map.c   |   32 
  include/linux/ploop/ploop.h |1 +
  3 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index df3eec9..ed60b1f 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -2720,6 +2720,11 @@ restart:
ploop_index_wb_complete(preq);
break;
  
+	case PLOOP_E_FSYNC_PENDED:

+   /* fsync done */
+   ploop_index_wb_proceed(preq);
+   break;
+
default:
BUG();
}
@@ -4106,7 +4111,7 @@ static void ploop_relocate(struct ploop_device * plo)
preq->bl.tail = preq->bl.head = NULL;
preq->req_cluster = 0;
preq->req_size = 0;
-   preq->req_rw = WRITE_SYNC|REQ_FUA;
+   preq->req_rw = WRITE_SYNC;
preq->eng_state = PLOOP_E_ENTRY;
preq->state = (1 << PLOOP_REQ_SYNC) | (1 << PLOOP_REQ_RELOC_A);
preq->error = 0;
@@ -4410,7 +4415,7 @@ static void ploop_relocblks_process(struct ploop_device 
*plo)
preq->bl.tail = preq->bl.head = NULL;
preq->req_cluster = ~0U; /* uninitialized */
preq->req_size = 0;
-   preq->req_rw = WRITE_SYNC|REQ_FUA;
+   preq->req_rw = WRITE_SYNC;
preq->eng_state = PLOOP_E_ENTRY;
preq->state = (1 << PLOOP_REQ_SYNC) | (1 << PLOOP_REQ_RELOC_S);
preq->error = 0;
diff --git a/drivers/block/ploop/map.c b/drivers/block/ploop/map.c
index 5f7fd66..715dc15 100644
--- a/drivers/block/ploop/map.c
+++ b/drivers/block/ploop/map.c
@@ -915,6 +915,24 @@ void ploop_index_wb_proceed(struct ploop_request * preq)
put_page(page);
  }
  
+static void ploop_index_wb_proceed_or_delay(struct ploop_request * preq,

+   int do_fsync_if_delayed)
+{
+   if (do_fsync_if_delayed) {
+   struct map_node * m = preq->map;
+   struct ploop_delta * top_delta = map_top_delta(m->parent);
+   struct ploop_io * top_io = &top_delta->io;
+
+   if (test_bit(PLOOP_IO_FSYNC_DELAYED, &top_io->io_state)) {
+   preq->eng_state = PLOOP_E_FSYNC_PENDED;
+   ploop_add_req_to_fsync_queue(preq);
+   return;
+   }
+   }
+
+   ploop_index_wb_proceed(preq);
+}
+
  /* Data write is commited. Now we need to update index. */
  
  void ploop_index_update(struct ploop_request * preq)

@@ -927,6 +945,7 @@ void ploop_index_update(struct ploop_request * preq)
int old_level;
struct page * page;
unsigned long state = READ_ONCE(preq->state);
+   int do_fsync_if_delayed = 0;
  
  	/* No way back, we are going to initiate index write. */
  
@@ -985,10 +1004,12 @@ void ploop_index_update(struct ploop_request * preq)

preq->req_rw &= ~REQ_FLUSH;
  
  	/* Relocate requires consistent index update */

-   if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL))
+   if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL)) {
preq->req_index_update_rw |= (REQ_FLUSH | REQ_FUA);
+   do_fsync_if_delayed = 1;
+   }
  
-	ploop_index_wb_proceed(preq);

+   ploop_index_wb_proceed_or_delay(preq, do_fsync_if_delayed);
return;
  
  enomem:

@@ -1109,6 +1130,7 @@ static void map_wb_complete(struct map_node * m, int err)
int delayed = 0;
   

Re: [Devel] [PATCH rh7 3/3] ploop: io_direct: delay f_op->fsync() until index_update for reloc requests (v3)

2016-07-29 Thread Dmitry Monakhov
Maxim Patlasov  writes:

> Dima,
>
>
> One week elapsed, still no feedback from you. Do you have something 
> against this patch?
Sorry for delay Max. I was overloaded by pended crap I've collected
before vacations, and lost your email. Again sorry.

Whole patch looks good. Thank you for your rede

BTW: We defenitely need regression testing for original bug (broken
barries and others). I'm working on that.
>
>
> Thanks,
>
> Maxim
>
>
> On 07/20/2016 11:21 PM, Maxim Patlasov wrote:
>> Commit 9f860e606 introduced an engine to delay fsync: doing
>> fallocate(FALLOC_FL_CONVERT_UNWRITTEN) dio_post_submit marks
>> io as PLOOP_IO_FSYNC_DELAYED to ensure that fsync happens
>> later, when incoming FLUSH|FUA comes.
>>
>> That was deemed as important because (PSBM-47026):
>>
>>> This optimization becomes more important due to the fact that customers 
>>> tend to use pcompact heavily => ploop images grow each day.
>> Now, we can easily re-use the engine to delay fsync for reloc
>> requests as well. As explained in the description of commit
>> 5aa3fe09:
>>
>>>  1->read_data_from_old_post
>>>  2->write_to_new_pos
>>>->sumbit_alloc
>>>   ->submit_pad
>>>   ->post_submit->convert_unwritten
>>>  3->update_index ->write_page with FLUSH|FUA
>>>  4->nullify_old_pos
>>> 5->issue_flush
>> by the time of step 3 extent coversion is not yet stable because
>> belongs to uncommitted transaction. But instead of doing fsync
>> inside ->post_submit, we can fsync later, as the very first step
>> of write_page for index_update.
>>
>> Changed in v2:
>>   - process delayed fsync asynchronously, via PLOOP_E_FSYNC_PENDED eng_state
>>
>> Changed in v3:
>>   - use extra arg for ploop_index_wb_proceed_or_delay() instead of ad-hoc 
>> PLOOP_REQ_FSYNC_IF_DELAYED
>>
>> https://jira.sw.ru/browse/PSBM-47026
>>
>> Signed-off-by: Maxim Patlasov 
>> ---
>>   drivers/block/ploop/dev.c   |9 +++--
>>   drivers/block/ploop/map.c   |   32 
>>   include/linux/ploop/ploop.h |1 +
>>   3 files changed, 36 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
>> index df3eec9..ed60b1f 100644
>> --- a/drivers/block/ploop/dev.c
>> +++ b/drivers/block/ploop/dev.c
>> @@ -2720,6 +2720,11 @@ restart:
>>  ploop_index_wb_complete(preq);
>>  break;
>>   
>> +case PLOOP_E_FSYNC_PENDED:
>> +/* fsync done */
>> +ploop_index_wb_proceed(preq);
>> +break;
>> +
>>  default:
>>  BUG();
>>  }
>> @@ -4106,7 +4111,7 @@ static void ploop_relocate(struct ploop_device * plo)
>>  preq->bl.tail = preq->bl.head = NULL;
>>  preq->req_cluster = 0;
>>  preq->req_size = 0;
>> -preq->req_rw = WRITE_SYNC|REQ_FUA;
>> +preq->req_rw = WRITE_SYNC;
>>  preq->eng_state = PLOOP_E_ENTRY;
>>  preq->state = (1 << PLOOP_REQ_SYNC) | (1 << PLOOP_REQ_RELOC_A);
>>  preq->error = 0;
>> @@ -4410,7 +4415,7 @@ static void ploop_relocblks_process(struct 
>> ploop_device *plo)
>>  preq->bl.tail = preq->bl.head = NULL;
>>  preq->req_cluster = ~0U; /* uninitialized */
>>  preq->req_size = 0;
>> -preq->req_rw = WRITE_SYNC|REQ_FUA;
>> +preq->req_rw = WRITE_SYNC;
>>  preq->eng_state = PLOOP_E_ENTRY;
>>  preq->state = (1 << PLOOP_REQ_SYNC) | (1 << PLOOP_REQ_RELOC_S);
>>  preq->error = 0;
>> diff --git a/drivers/block/ploop/map.c b/drivers/block/ploop/map.c
>> index 5f7fd66..715dc15 100644
>> --- a/drivers/block/ploop/map.c
>> +++ b/drivers/block/ploop/map.c
>> @@ -915,6 +915,24 @@ void ploop_index_wb_proceed(struct ploop_request * preq)
>>  put_page(page);
>>   }
>>   
>> +static void ploop_index_wb_proceed_or_delay(struct ploop_request * preq,
>> +int do_fsync_if_delayed)
>> +{
>> +if (do_fsync_if_delayed) {
>> +struct map_node * m = preq->map;
>> +struct ploop_delta * top_delta = map_top_delta(m->parent);
>> +struct ploop_io * top_io = &top_delta->io;
>> +
>> +if (test_bit(PLOOP_IO_FSYNC_DELAYED, &top_io->io_state)) {
>> +preq->eng_state = PLOOP_E_FSYNC_PENDED;
>> +ploop_add_req_to_fsync_queue(preq);
>> +return;
>> +}
>> +}
>> +
>> +ploop_index_wb_proceed(preq);
>> +}
>> +
>>   /* Data write is commited. Now we need to update index. */
>>   
>>   void ploop_index_update(struct ploop_request * preq)
>> @@ -927,6 +945,7 @@ void ploop_index_update(struct ploop_request * preq)
>>  int old_level;
>>  struct page * page;
>>  unsigned long state = READ_ONCE(preq->state);
>> +int do_fsync_if_delayed = 0;
>>   
>>  /* No way back, we are going to initiate index write. */
>>   
>> @@ -985,10 +1004,12 @@ void ploop_index_update(struct ploop_request * preq)
>>  preq->req_rw &= ~REQ_FLUSH;
>>