[Devel] [PATCH 3/3] ploop: fixup FORCE_{FLUSH,FUA} handling v3

2016-06-21 Thread Dmitry Monakhov
barrier code is broken in many ways:
Currently only ->dio_submit() handles PLOOP_REQ_FORCE_{FLUSH,FUA} correctly.
But request also can goes though ->dio_submit_alloc()->dio_submit_pad and 
write_page (for indexes)
So in case of grow_dev we have following sequance:

E_RELOC_DATA_READ:
 ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
  ->delta->allocate
 ->io->submit_allloc: dio_submit_alloc
   ->dio_submit_pad
E_DATA_WBI : data written, time to update index
  ->delta->allocate_complete:ploop_index_update
->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
->write_page
->ploop_map_wb_complete
  ->ploop_wb_complete_post_process
->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
E_RELOC_NULLIFY:

   ->submit()

BUG#2: currecntly kaio write_page silently ignores REQ_FLUSH
BUG#3: io_direct:dio_submit  if fua_delay is not possible we MUST tag all bios 
via REQ_FUA
   not just latest one.
This patch unify barrier handling like follows:
- Get rid of FORCE_{FLUSH,FUA}
- Introduce DELAYED_FLUSH
- fix fua handling for dio_submit
- BUG_ON for REQ_FLUSH in kaio_page_write

This makes reloc sequence optimal:
io_direct
RELOC_S: R1, W2, WBI:FLUSH|FUA
RELOC_A: R1, W2, WBI:FLUSH|FUA, W1:NULLIFY|FUA
io_kaio
RELOC_S: R1, W2:FUA, WBI:FUA
RELOC_A: R1, W2:FUA, WBI:FUA, W1:NULLIFY|FUA

https://jira.sw.ru/browse/PSBM-47107
Signed-off-by: Dmitry Monakhov 
---
 drivers/block/ploop/dev.c   |  8 +---
 drivers/block/ploop/io_direct.c | 30 ++-
 drivers/block/ploop/io_kaio.c   | 23 +
 drivers/block/ploop/map.c   | 45 ++---
 include/linux/ploop/ploop.h | 19 +
 5 files changed, 60 insertions(+), 65 deletions(-)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index 96f7850..fbc5f2f 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -1224,6 +1224,9 @@ static void ploop_complete_request(struct ploop_request * 
preq)
 
__TRACE("Z %p %u\n", preq, preq->req_cluster);
 
+   if (!preq->error) {
+   WARN_ON(test_bit(PLOOP_REQ_DELAYED_FLUSH, &preq->state));
+   }
while (preq->bl.head) {
struct bio * bio = preq->bl.head;
preq->bl.head = bio->bi_next;
@@ -2530,9 +2533,8 @@ restart:
top_delta = ploop_top_delta(plo);
sbl.head = sbl.tail = preq->aux_bio;
 
-   /* Relocated data write required sync before BAT updatee */
-   set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
-
+   /* Relocated data write required sync before BAT updatee
+* this will happen inside index_update */
if (test_bit(PLOOP_REQ_RELOC_S, &preq->state)) {
preq->eng_state = PLOOP_E_DATA_WBI;
plo->st.bio_out++;
diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c
index a6d83fe..303eb70 100644
--- a/drivers/block/ploop/io_direct.c
+++ b/drivers/block/ploop/io_direct.c
@@ -83,28 +83,19 @@ dio_submit(struct ploop_io *io, struct ploop_request * preq,
int err;
struct bio_list_walk bw;
int preflush;
-   int postfua = 0;
+   int fua = 0;
int write = !!(rw & REQ_WRITE);
int bio_num;
 
trace_submit(preq);
 
preflush = !!(rw & REQ_FLUSH);
-
-   if (test_and_clear_bit(PLOOP_REQ_FORCE_FLUSH, &preq->state))
-   preflush = 1;
-
-   if (test_and_clear_bit(PLOOP_REQ_FORCE_FUA, &preq->state))
-   postfua = 1;
-
-   if (!postfua && ploop_req_delay_fua_possible(rw, preq)) {
-
+   fua = !!(rw & REQ_FUA);
+   if (fua && ploop_req_delay_fua_possible(rw, preq)) {
/* Mark req that delayed flush required */
-   set_bit(PLOOP_REQ_FORCE_FLUSH, &preq->state);
-   } else if (rw & REQ_FUA) {
-   postfua = 1;
+   set_bit(PLOOP_REQ_DELAYED_FLUSH, &preq->state);
+   fua = 0;
}
-
rw &= ~(REQ_FLUSH | REQ_FUA);
 
 
@@ -238,8 +229,10 @@ flush_bio:
rw2 |= REQ_FLUSH;
preflush = 0;
}
-   if (unlikely(postfua && !bl.head))
-   rw2 |= (REQ_FUA | ((bio_num) ? REQ_FLUSH : 0));
+   /* Very unlikely, but correct.
+* TODO: Optimize postfua via DELAY_FLUSH for any req state */
+   if (unlikely(fua))
+   rw2 |= REQ_FUA;
 
ploop_acc_ff_out(preq->plo, rw2 | b->bi_rw);
submit_bio(rw2, b);
@@ -1520,15 +1513,14 @@ dio_read_page(struct ploop_io * io, struct 
ploop_request * preq,
 
 static void
 dio_write_page(struct ploop_io * io, struct ploop_request * preq,
-  struct page * page, sector_t sec, int fua)
+  st

Re: [Devel] [PATCH 3/3] ploop: fixup FORCE_{FLUSH,FUA} handling v3

2016-06-21 Thread Maxim Patlasov

Dima,

After more thinking I realized that the whole idea of 
PLOOP_REQ_DELAYED_FLUSH might be bogus: it is possible that we simply do 
not have many enough incoming FUA-s to make delaying lucrative. This 
patch actually mixes three things: 1) fix barriers for RELOC_A|S 
requests, 2) fix barriers for ordinary requests, 3) DELAYED_FLUSH 
optimization. So, please, split the patch into three and make some 
measurements demonstrating that applying "DELAYED_FLUSH optimization" 
patch on top of previous patches improves performance.


I have an idea about how to fix barriers for ordinary requests -- see 
please the patch I'll send soon. The key point is that handling FLUSH-es 
is broken the same way as FUA: if you observe (rw & REQ_FLUSH) and sends 
first bio marked as REQ_FLUSH, it guarantees nothing unless you wait for 
completion before submitting further bio-s! And ploop simply does not 
have the logic of waiting the first before sending others. And, to make 
things worse, not only dio_submit is affected, dio_sibmit_pad and 
dio_io_page to be fixed too.


There are also some inline comments below...

On 06/21/2016 06:55 AM, Dmitry Monakhov wrote:

barrier code is broken in many ways:
Currently only ->dio_submit() handles PLOOP_REQ_FORCE_{FLUSH,FUA} correctly.
But request also can goes though ->dio_submit_alloc()->dio_submit_pad and 
write_page (for indexes)
So in case of grow_dev we have following sequance:

E_RELOC_DATA_READ:
  ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
   ->delta->allocate
  ->io->submit_allloc: dio_submit_alloc
->dio_submit_pad
E_DATA_WBI : data written, time to update index
   ->delta->allocate_complete:ploop_index_update
 ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
 ->write_page
 ->ploop_map_wb_complete
   ->ploop_wb_complete_post_process
 ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
E_RELOC_NULLIFY:

->submit()

BUG#2: currecntly kaio write_page silently ignores REQ_FLUSH
BUG#3: io_direct:dio_submit  if fua_delay is not possible we MUST tag all bios 
via REQ_FUA
not just latest one.
This patch unify barrier handling like follows:
- Get rid of FORCE_{FLUSH,FUA}
- Introduce DELAYED_FLUSH
- fix fua handling for dio_submit
- BUG_ON for REQ_FLUSH in kaio_page_write

This makes reloc sequence optimal:
io_direct
RELOC_S: R1, W2, WBI:FLUSH|FUA
RELOC_A: R1, W2, WBI:FLUSH|FUA, W1:NULLIFY|FUA
io_kaio
RELOC_S: R1, W2:FUA, WBI:FUA
RELOC_A: R1, W2:FUA, WBI:FUA, W1:NULLIFY|FUA

https://jira.sw.ru/browse/PSBM-47107
Signed-off-by: Dmitry Monakhov 
---
  drivers/block/ploop/dev.c   |  8 +---
  drivers/block/ploop/io_direct.c | 30 ++-
  drivers/block/ploop/io_kaio.c   | 23 +
  drivers/block/ploop/map.c   | 45 ++---
  include/linux/ploop/ploop.h | 19 +
  5 files changed, 60 insertions(+), 65 deletions(-)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index 96f7850..fbc5f2f 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -1224,6 +1224,9 @@ static void ploop_complete_request(struct ploop_request * 
preq)
  
  	__TRACE("Z %p %u\n", preq, preq->req_cluster);
  
+	if (!preq->error) {

+   WARN_ON(test_bit(PLOOP_REQ_DELAYED_FLUSH, &preq->state));
+   }
while (preq->bl.head) {
struct bio * bio = preq->bl.head;
preq->bl.head = bio->bi_next;
@@ -2530,9 +2533,8 @@ restart:
top_delta = ploop_top_delta(plo);
sbl.head = sbl.tail = preq->aux_bio;
  
-		/* Relocated data write required sync before BAT updatee */

-   set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
-
+   /* Relocated data write required sync before BAT updatee
+* this will happen inside index_update */
if (test_bit(PLOOP_REQ_RELOC_S, &preq->state)) {
preq->eng_state = PLOOP_E_DATA_WBI;
plo->st.bio_out++;
diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c
index a6d83fe..303eb70 100644
--- a/drivers/block/ploop/io_direct.c
+++ b/drivers/block/ploop/io_direct.c
@@ -83,28 +83,19 @@ dio_submit(struct ploop_io *io, struct ploop_request * preq,
int err;
struct bio_list_walk bw;
int preflush;
-   int postfua = 0;
+   int fua = 0;
int write = !!(rw & REQ_WRITE);
int bio_num;


Your patch obsoletes bio_num. Please remove it.

  
  	trace_submit(preq);
  
  	preflush = !!(rw & REQ_FLUSH);

-
-   if (test_and_clear_bit(PLOOP_REQ_FORCE_FLUSH, &preq->state))
-   preflush = 1;
-
-   if (test_and_clear_bit(PLOOP_REQ_FORCE_FUA, &preq->state))
-   postfua = 1;
-
-   if (!postfua && ploop_req_delay_fua_possible(rw, preq)) {
-
+   fua =

Re: [Devel] [PATCH 3/3] ploop: fixup FORCE_{FLUSH,FUA} handling v3

2016-06-22 Thread Maxim Patlasov

Dima,

I'm uneasy that we still have handling RELOC_A|S broken. It seems we 
have full agreement that for such requests we can do unconditional 
FLUSH|FUA when we call write_page from ploop_index_update() and 
map_wb_complete(). And your idea to implement it by passing FLUSH|FUA 
for io_direct and post_fsync=1 for io_kaio is smart and OK. Will you 
send patch for that (fix barriers for RELOC_A|S requests)?


Thanks,
Maxim

On 06/21/2016 04:56 PM, Maxim Patlasov wrote:

Dima,

After more thinking I realized that the whole idea of 
PLOOP_REQ_DELAYED_FLUSH might be bogus: it is possible that we simply 
do not have many enough incoming FUA-s to make delaying lucrative. 
This patch actually mixes three things: 1) fix barriers for RELOC_A|S 
requests, 2) fix barriers for ordinary requests, 3) DELAYED_FLUSH 
optimization. So, please, split the patch into three and make some 
measurements demonstrating that applying "DELAYED_FLUSH optimization" 
patch on top of previous patches improves performance.


I have an idea about how to fix barriers for ordinary requests -- see 
please the patch I'll send soon. The key point is that handling 
FLUSH-es is broken the same way as FUA: if you observe (rw & 
REQ_FLUSH) and sends first bio marked as REQ_FLUSH, it guarantees 
nothing unless you wait for completion before submitting further 
bio-s! And ploop simply does not have the logic of waiting the first 
before sending others. And, to make things worse, not only dio_submit 
is affected, dio_sibmit_pad and dio_io_page to be fixed too.


There are also some inline comments below...

On 06/21/2016 06:55 AM, Dmitry Monakhov wrote:

barrier code is broken in many ways:
Currently only ->dio_submit() handles PLOOP_REQ_FORCE_{FLUSH,FUA} 
correctly.
But request also can goes though ->dio_submit_alloc()->dio_submit_pad 
and write_page (for indexes)

So in case of grow_dev we have following sequance:

E_RELOC_DATA_READ:
  ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
   ->delta->allocate
  ->io->submit_allloc: dio_submit_alloc
->dio_submit_pad
E_DATA_WBI : data written, time to update index
->delta->allocate_complete:ploop_index_update
 ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
 ->write_page
 ->ploop_map_wb_complete
   ->ploop_wb_complete_post_process
 ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
E_RELOC_NULLIFY:

->submit()

BUG#2: currecntly kaio write_page silently ignores REQ_FLUSH
BUG#3: io_direct:dio_submit  if fua_delay is not possible we MUST tag 
all bios via REQ_FUA

not just latest one.
This patch unify barrier handling like follows:
- Get rid of FORCE_{FLUSH,FUA}
- Introduce DELAYED_FLUSH
- fix fua handling for dio_submit
- BUG_ON for REQ_FLUSH in kaio_page_write

This makes reloc sequence optimal:
io_direct
RELOC_S: R1, W2, WBI:FLUSH|FUA
RELOC_A: R1, W2, WBI:FLUSH|FUA, W1:NULLIFY|FUA
io_kaio
RELOC_S: R1, W2:FUA, WBI:FUA
RELOC_A: R1, W2:FUA, WBI:FUA, W1:NULLIFY|FUA

https://jira.sw.ru/browse/PSBM-47107
Signed-off-by: Dmitry Monakhov 
---
  drivers/block/ploop/dev.c   |  8 +---
  drivers/block/ploop/io_direct.c | 30 ++-
  drivers/block/ploop/io_kaio.c   | 23 +
  drivers/block/ploop/map.c   | 45 
++---

  include/linux/ploop/ploop.h | 19 +
  5 files changed, 60 insertions(+), 65 deletions(-)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index 96f7850..fbc5f2f 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -1224,6 +1224,9 @@ static void ploop_complete_request(struct 
ploop_request * preq)

__TRACE("Z %p %u\n", preq, preq->req_cluster);
  +if (!preq->error) {
+WARN_ON(test_bit(PLOOP_REQ_DELAYED_FLUSH, &preq->state));
+}
  while (preq->bl.head) {
  struct bio * bio = preq->bl.head;
  preq->bl.head = bio->bi_next;
@@ -2530,9 +2533,8 @@ restart:
  top_delta = ploop_top_delta(plo);
  sbl.head = sbl.tail = preq->aux_bio;
  -/* Relocated data write required sync before BAT updatee */
-set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
-
+/* Relocated data write required sync before BAT updatee
+ * this will happen inside index_update */
  if (test_bit(PLOOP_REQ_RELOC_S, &preq->state)) {
  preq->eng_state = PLOOP_E_DATA_WBI;
  plo->st.bio_out++;
diff --git a/drivers/block/ploop/io_direct.c 
b/drivers/block/ploop/io_direct.c

index a6d83fe..303eb70 100644
--- a/drivers/block/ploop/io_direct.c
+++ b/drivers/block/ploop/io_direct.c
@@ -83,28 +83,19 @@ dio_submit(struct ploop_io *io, struct 
ploop_request * preq,

  int err;
  struct bio_list_walk bw;
  int preflush;
-int postfua = 0;
+int fua = 0;
  int write = !!(rw & REQ_WRITE);
  int bio_num;


Your patch obsolet