Re: [Devel] [PATCH rh7 3/4] ploop: wire push_backup into state-machine

2016-05-09 Thread Maxim Patlasov

On 04/30/2016 10:00 AM, Dmitry Monakhov wrote:

Maxim Patlasov  writes:

I can not avoid obsession that this request joggling fully destroys FS
barriers assumptions.

For example: fs does
submit_bio(data_b1)
submit_bio(data_b2)
submit_bio(commit_b3, FLUSH|FUA) journal commit record
wait_for_bio(commit_b3)
But there is no guaranee that data_b1 and data_b2 was completed already.
They can be in pedned list. In case of power-loss we have good commit
record which reference b1 and b2, but  b1 and b2 was not flushed,
which result expose of unitialized data.
In fact ext4/jbd2 will wait b1 and b2 first and only after that it will b3 so
ext4 will works fine.


Any code assuming that completion of FLUSH|FUA guarantees something 
about already-submitted-but-not-yet-completed bio-s is broken.




Otherwise looks good.


When ploop state-machine looks at preq first time, it suspends the preq if
its cluster-block matches pbd->ppb_map -- the copy of CBT mask initially.
To suspend preq we simply put it to pbd->pending_tree and plo->lockout_tree.

Later, when userspace reports that out-of-band processing is done, we
set PLOOP_REQ_PUSH_BACKUP bit in preq->state, re-schedule the preq and
wakeup ploop state-machine. This PLOOP_REQ_PUSH_BACKUP bit lets state-machine
know that given preq is OK and we shouldn't suspend further preq-s for
given cluster-block anymore.

Signed-off-by: Maxim Patlasov 
---
  drivers/block/ploop/dev.c |   32 +++
  drivers/block/ploop/push_backup.c |   62 +
  drivers/block/ploop/push_backup.h |6 
  include/linux/ploop/ploop.h   |1 +
  4 files changed, 101 insertions(+)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index 2a77d2e..c7cc385 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -2021,6 +2021,38 @@ restart:
return;
}
  
+	/* push_backup special processing */

+   if (!test_bit(PLOOP_REQ_LOCKOUT, &preq->state) &&
+   (preq->req_rw & REQ_WRITE) && preq->req_size &&
+   ploop_pb_check_bit(plo->pbd, preq->req_cluster)) {
+   if (ploop_pb_preq_add_pending(plo->pbd, preq)) {
+   /* already reported by userspace push_backup */
+   ploop_pb_clear_bit(plo->pbd, preq->req_cluster);
+   } else {
+   spin_lock_irq(&plo->lock);
+   ploop_add_lockout(preq, 0);
+   spin_unlock_irq(&plo->lock);
+   /*
+* preq IN: preq is in ppb_pending tree waiting for
+* out-of-band push_backup processing by userspace ...
+*/
+   return;
+   }
+   } else if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state) &&
+  test_and_clear_bit(PLOOP_REQ_PUSH_BACKUP, &preq->state)) {
+   /*
+* preq OUT: out-of-band push_backup processing by
+* userspace done; preq was re-scheduled
+*/
+   ploop_pb_clear_bit(plo->pbd, preq->req_cluster);
+
+   spin_lock_irq(&plo->lock);
+   del_lockout(preq);
+   if (!list_empty(&preq->delay_list))
+   list_splice_init(&preq->delay_list, 
plo->ready_queue.prev);
+   spin_unlock_irq(&plo->lock);
+   }
+
if (plo->trans_map) {
err = ploop_find_trans_map(plo->trans_map, preq);
if (err) {
diff --git a/drivers/block/ploop/push_backup.c 
b/drivers/block/ploop/push_backup.c
index 477caf7..488b8fb 100644
--- a/drivers/block/ploop/push_backup.c
+++ b/drivers/block/ploop/push_backup.c
@@ -146,6 +146,32 @@ static void set_bit_in_map(struct page **map, u64 map_max, 
u64 blk)
do_bit_in_map(map, map_max, blk, SET_BIT);
  }
  
+static void clear_bit_in_map(struct page **map, u64 map_max, u64 blk)

+{
+   do_bit_in_map(map, map_max, blk, CLEAR_BIT);
+}
+
+static bool check_bit_in_map(struct page **map, u64 map_max, u64 blk)
+{
+   return do_bit_in_map(map, map_max, blk, CHECK_BIT);
+}
+
+/* intentionally lockless */
+void ploop_pb_clear_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu)
+{
+   BUG_ON(!pbd);
+   clear_bit_in_map(pbd->ppb_map, pbd->ppb_block_max, clu);
+}
+
+/* intentionally lockless */
+bool ploop_pb_check_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu)
+{
+   if (!pbd)
+   return false;
+
+   return check_bit_in_map(pbd->ppb_map, pbd->ppb_block_max, clu);
+}
+
  static int convert_map_to_map(struct ploop_pushbackup_desc *pbd)
  {
struct page **from_map = pbd->cbt_map;
@@ -278,6 +304,12 @@ static void ploop_pb_add_req_to_tree(struct ploop_request 
*preq,
rb_insert_color(&preq->reloc_link, tree);
  }
  
+static void ploop_pb_add_req_to_pending(struct ploop_pushbackup_desc *pbd,

+   struct ploop_request *pre

Re: [Devel] [PATCH rh7 3/4] ploop: wire push_backup into state-machine

2016-04-30 Thread Dmitry Monakhov
Maxim Patlasov  writes:

I can not avoid obsession that this request joggling fully destroys FS
barriers assumptions.

For example: fs does
submit_bio(data_b1)
submit_bio(data_b2) 
submit_bio(commit_b3, FLUSH|FUA) journal commit record
wait_for_bio(commit_b3)
But there is no guaranee that data_b1 and data_b2 was completed already.
They can be in pedned list. In case of power-loss we have good commit
record which reference b1 and b2, but  b1 and b2 was not flushed,
which result expose of unitialized data.
In fact ext4/jbd2 will wait b1 and b2 first and only after that it will b3 so
ext4 will works fine.

Otherwise looks good.

> When ploop state-machine looks at preq first time, it suspends the preq if
> its cluster-block matches pbd->ppb_map -- the copy of CBT mask initially.
> To suspend preq we simply put it to pbd->pending_tree and plo->lockout_tree.
>
> Later, when userspace reports that out-of-band processing is done, we
> set PLOOP_REQ_PUSH_BACKUP bit in preq->state, re-schedule the preq and
> wakeup ploop state-machine. This PLOOP_REQ_PUSH_BACKUP bit lets state-machine
> know that given preq is OK and we shouldn't suspend further preq-s for
> given cluster-block anymore.
>
> Signed-off-by: Maxim Patlasov 
> ---
>  drivers/block/ploop/dev.c |   32 +++
>  drivers/block/ploop/push_backup.c |   62 
> +
>  drivers/block/ploop/push_backup.h |6 
>  include/linux/ploop/ploop.h   |1 +
>  4 files changed, 101 insertions(+)
>
> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
> index 2a77d2e..c7cc385 100644
> --- a/drivers/block/ploop/dev.c
> +++ b/drivers/block/ploop/dev.c
> @@ -2021,6 +2021,38 @@ restart:
>   return;
>   }
>  
> + /* push_backup special processing */
> + if (!test_bit(PLOOP_REQ_LOCKOUT, &preq->state) &&
> + (preq->req_rw & REQ_WRITE) && preq->req_size &&
> + ploop_pb_check_bit(plo->pbd, preq->req_cluster)) {
> + if (ploop_pb_preq_add_pending(plo->pbd, preq)) {
> + /* already reported by userspace push_backup */
> + ploop_pb_clear_bit(plo->pbd, preq->req_cluster);
> + } else {
> + spin_lock_irq(&plo->lock);
> + ploop_add_lockout(preq, 0);
> + spin_unlock_irq(&plo->lock);
> + /*
> +  * preq IN: preq is in ppb_pending tree waiting for
> +  * out-of-band push_backup processing by userspace ...
> +  */
> + return;
> + }
> + } else if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state) &&
> +test_and_clear_bit(PLOOP_REQ_PUSH_BACKUP, &preq->state)) {
> + /*
> +  * preq OUT: out-of-band push_backup processing by
> +  * userspace done; preq was re-scheduled
> +  */
> + ploop_pb_clear_bit(plo->pbd, preq->req_cluster);
> +
> + spin_lock_irq(&plo->lock);
> + del_lockout(preq);
> + if (!list_empty(&preq->delay_list))
> + list_splice_init(&preq->delay_list, 
> plo->ready_queue.prev);
> + spin_unlock_irq(&plo->lock);
> + }
> +
>   if (plo->trans_map) {
>   err = ploop_find_trans_map(plo->trans_map, preq);
>   if (err) {
> diff --git a/drivers/block/ploop/push_backup.c 
> b/drivers/block/ploop/push_backup.c
> index 477caf7..488b8fb 100644
> --- a/drivers/block/ploop/push_backup.c
> +++ b/drivers/block/ploop/push_backup.c
> @@ -146,6 +146,32 @@ static void set_bit_in_map(struct page **map, u64 
> map_max, u64 blk)
>   do_bit_in_map(map, map_max, blk, SET_BIT);
>  }
>  
> +static void clear_bit_in_map(struct page **map, u64 map_max, u64 blk)
> +{
> + do_bit_in_map(map, map_max, blk, CLEAR_BIT);
> +}
> +
> +static bool check_bit_in_map(struct page **map, u64 map_max, u64 blk)
> +{
> + return do_bit_in_map(map, map_max, blk, CHECK_BIT);
> +}
> +
> +/* intentionally lockless */
> +void ploop_pb_clear_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu)
> +{
> + BUG_ON(!pbd);
> + clear_bit_in_map(pbd->ppb_map, pbd->ppb_block_max, clu);
> +}
> +
> +/* intentionally lockless */
> +bool ploop_pb_check_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu)
> +{
> + if (!pbd)
> + return false;
> +
> + return check_bit_in_map(pbd->ppb_map, pbd->ppb_block_max, clu);
> +}
> +
>  static int convert_map_to_map(struct ploop_pushbackup_desc *pbd)
>  {
>   struct page **from_map = pbd->cbt_map;
> @@ -278,6 +304,12 @@ static void ploop_pb_add_req_to_tree(struct 
> ploop_request *preq,
>   rb_insert_color(&preq->reloc_link, tree);
>  }
>  
> +static void ploop_pb_add_req_to_pending(struct ploop_pushbackup_desc *pbd,
> + struct ploop_request *preq)
> +{
> + ploop_pb_add_req_to_tree(preq, &pbd->pending_tree);
> +

[Devel] [PATCH rh7 3/4] ploop: wire push_backup into state-machine

2016-04-29 Thread Maxim Patlasov
When ploop state-machine looks at preq first time, it suspends the preq if
its cluster-block matches pbd->ppb_map -- the copy of CBT mask initially.
To suspend preq we simply put it to pbd->pending_tree and plo->lockout_tree.

Later, when userspace reports that out-of-band processing is done, we
set PLOOP_REQ_PUSH_BACKUP bit in preq->state, re-schedule the preq and
wakeup ploop state-machine. This PLOOP_REQ_PUSH_BACKUP bit lets state-machine
know that given preq is OK and we shouldn't suspend further preq-s for
given cluster-block anymore.

Signed-off-by: Maxim Patlasov 
---
 drivers/block/ploop/dev.c |   32 +++
 drivers/block/ploop/push_backup.c |   62 +
 drivers/block/ploop/push_backup.h |6 
 include/linux/ploop/ploop.h   |1 +
 4 files changed, 101 insertions(+)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index 2a77d2e..c7cc385 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -2021,6 +2021,38 @@ restart:
return;
}
 
+   /* push_backup special processing */
+   if (!test_bit(PLOOP_REQ_LOCKOUT, &preq->state) &&
+   (preq->req_rw & REQ_WRITE) && preq->req_size &&
+   ploop_pb_check_bit(plo->pbd, preq->req_cluster)) {
+   if (ploop_pb_preq_add_pending(plo->pbd, preq)) {
+   /* already reported by userspace push_backup */
+   ploop_pb_clear_bit(plo->pbd, preq->req_cluster);
+   } else {
+   spin_lock_irq(&plo->lock);
+   ploop_add_lockout(preq, 0);
+   spin_unlock_irq(&plo->lock);
+   /*
+* preq IN: preq is in ppb_pending tree waiting for
+* out-of-band push_backup processing by userspace ...
+*/
+   return;
+   }
+   } else if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state) &&
+  test_and_clear_bit(PLOOP_REQ_PUSH_BACKUP, &preq->state)) {
+   /*
+* preq OUT: out-of-band push_backup processing by
+* userspace done; preq was re-scheduled
+*/
+   ploop_pb_clear_bit(plo->pbd, preq->req_cluster);
+
+   spin_lock_irq(&plo->lock);
+   del_lockout(preq);
+   if (!list_empty(&preq->delay_list))
+   list_splice_init(&preq->delay_list, 
plo->ready_queue.prev);
+   spin_unlock_irq(&plo->lock);
+   }
+
if (plo->trans_map) {
err = ploop_find_trans_map(plo->trans_map, preq);
if (err) {
diff --git a/drivers/block/ploop/push_backup.c 
b/drivers/block/ploop/push_backup.c
index 477caf7..488b8fb 100644
--- a/drivers/block/ploop/push_backup.c
+++ b/drivers/block/ploop/push_backup.c
@@ -146,6 +146,32 @@ static void set_bit_in_map(struct page **map, u64 map_max, 
u64 blk)
do_bit_in_map(map, map_max, blk, SET_BIT);
 }
 
+static void clear_bit_in_map(struct page **map, u64 map_max, u64 blk)
+{
+   do_bit_in_map(map, map_max, blk, CLEAR_BIT);
+}
+
+static bool check_bit_in_map(struct page **map, u64 map_max, u64 blk)
+{
+   return do_bit_in_map(map, map_max, blk, CHECK_BIT);
+}
+
+/* intentionally lockless */
+void ploop_pb_clear_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu)
+{
+   BUG_ON(!pbd);
+   clear_bit_in_map(pbd->ppb_map, pbd->ppb_block_max, clu);
+}
+
+/* intentionally lockless */
+bool ploop_pb_check_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu)
+{
+   if (!pbd)
+   return false;
+
+   return check_bit_in_map(pbd->ppb_map, pbd->ppb_block_max, clu);
+}
+
 static int convert_map_to_map(struct ploop_pushbackup_desc *pbd)
 {
struct page **from_map = pbd->cbt_map;
@@ -278,6 +304,12 @@ static void ploop_pb_add_req_to_tree(struct ploop_request 
*preq,
rb_insert_color(&preq->reloc_link, tree);
 }
 
+static void ploop_pb_add_req_to_pending(struct ploop_pushbackup_desc *pbd,
+   struct ploop_request *preq)
+{
+   ploop_pb_add_req_to_tree(preq, &pbd->pending_tree);
+}
+
 static void ploop_pb_add_req_to_reported(struct ploop_pushbackup_desc *pbd,
 struct ploop_request *preq)
 {
@@ -339,6 +371,33 @@ ploop_pb_get_req_from_reported(struct 
ploop_pushbackup_desc *pbd,
return ploop_pb_get_req_from_tree(&pbd->reported_tree, clu);
 }
 
+int ploop_pb_preq_add_pending(struct ploop_pushbackup_desc *pbd,
+  struct ploop_request *preq)
+{
+   BUG_ON(!pbd);
+
+   spin_lock(&pbd->ppb_lock);
+
+   if (!test_bit(PLOOP_S_PUSH_BACKUP, &pbd->plo->state)) {
+   spin_unlock(&pbd->ppb_lock);
+   return -EINTR;
+   }
+
+   /* if (preq matches pbd->reported_map) return -EALREADY; */
+   if (preq->req_cluster < pbd->p