On 04/30/2016 10:00 AM, Dmitry Monakhov wrote:
Maxim Patlasov <mpatla...@virtuozzo.com> writes:

I can not avoid obsession that this request joggling fully destroys FS
barriers assumptions.

For example: fs does
submit_bio(data_b1)
submit_bio(data_b2)
submit_bio(commit_b3, FLUSH|FUA) journal commit record
wait_for_bio(commit_b3)
But there is no guaranee that data_b1 and data_b2 was completed already.
They can be in pedned list. In case of power-loss we have good commit
record which reference b1 and b2, but  b1 and b2 was not flushed,
which result expose of unitialized data.
In fact ext4/jbd2 will wait b1 and b2 first and only after that it will b3 so
ext4 will works fine.

Any code assuming that completion of FLUSH|FUA guarantees something about already-submitted-but-not-yet-completed bio-s is broken.


Otherwise looks good.

When ploop state-machine looks at preq first time, it suspends the preq if
its cluster-block matches pbd->ppb_map -- the copy of CBT mask initially.
To suspend preq we simply put it to pbd->pending_tree and plo->lockout_tree.

Later, when userspace reports that out-of-band processing is done, we
set PLOOP_REQ_PUSH_BACKUP bit in preq->state, re-schedule the preq and
wakeup ploop state-machine. This PLOOP_REQ_PUSH_BACKUP bit lets state-machine
know that given preq is OK and we shouldn't suspend further preq-s for
given cluster-block anymore.

Signed-off-by: Maxim Patlasov <mpatla...@virtuozzo.com>
---
  drivers/block/ploop/dev.c         |   32 +++++++++++++++++++
  drivers/block/ploop/push_backup.c |   62 +++++++++++++++++++++++++++++++++++++
  drivers/block/ploop/push_backup.h |    6 ++++
  include/linux/ploop/ploop.h       |    1 +
  4 files changed, 101 insertions(+)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index 2a77d2e..c7cc385 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -2021,6 +2021,38 @@ restart:
                return;
        }
+ /* push_backup special processing */
+       if (!test_bit(PLOOP_REQ_LOCKOUT, &preq->state) &&
+           (preq->req_rw & REQ_WRITE) && preq->req_size &&
+           ploop_pb_check_bit(plo->pbd, preq->req_cluster)) {
+               if (ploop_pb_preq_add_pending(plo->pbd, preq)) {
+                       /* already reported by userspace push_backup */
+                       ploop_pb_clear_bit(plo->pbd, preq->req_cluster);
+               } else {
+                       spin_lock_irq(&plo->lock);
+                       ploop_add_lockout(preq, 0);
+                       spin_unlock_irq(&plo->lock);
+                       /*
+                        * preq IN: preq is in ppb_pending tree waiting for
+                        * out-of-band push_backup processing by userspace ...
+                        */
+                       return;
+               }
+       } else if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state) &&
+                  test_and_clear_bit(PLOOP_REQ_PUSH_BACKUP, &preq->state)) {
+               /*
+                * preq OUT: out-of-band push_backup processing by
+                * userspace done; preq was re-scheduled
+                */
+               ploop_pb_clear_bit(plo->pbd, preq->req_cluster);
+
+               spin_lock_irq(&plo->lock);
+               del_lockout(preq);
+               if (!list_empty(&preq->delay_list))
+                       list_splice_init(&preq->delay_list, 
plo->ready_queue.prev);
+               spin_unlock_irq(&plo->lock);
+       }
+
        if (plo->trans_map) {
                err = ploop_find_trans_map(plo->trans_map, preq);
                if (err) {
diff --git a/drivers/block/ploop/push_backup.c 
b/drivers/block/ploop/push_backup.c
index 477caf7..488b8fb 100644
--- a/drivers/block/ploop/push_backup.c
+++ b/drivers/block/ploop/push_backup.c
@@ -146,6 +146,32 @@ static void set_bit_in_map(struct page **map, u64 map_max, 
u64 blk)
        do_bit_in_map(map, map_max, blk, SET_BIT);
  }
+static void clear_bit_in_map(struct page **map, u64 map_max, u64 blk)
+{
+       do_bit_in_map(map, map_max, blk, CLEAR_BIT);
+}
+
+static bool check_bit_in_map(struct page **map, u64 map_max, u64 blk)
+{
+       return do_bit_in_map(map, map_max, blk, CHECK_BIT);
+}
+
+/* intentionally lockless */
+void ploop_pb_clear_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu)
+{
+       BUG_ON(!pbd);
+       clear_bit_in_map(pbd->ppb_map, pbd->ppb_block_max, clu);
+}
+
+/* intentionally lockless */
+bool ploop_pb_check_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu)
+{
+       if (!pbd)
+               return false;
+
+       return check_bit_in_map(pbd->ppb_map, pbd->ppb_block_max, clu);
+}
+
  static int convert_map_to_map(struct ploop_pushbackup_desc *pbd)
  {
        struct page **from_map = pbd->cbt_map;
@@ -278,6 +304,12 @@ static void ploop_pb_add_req_to_tree(struct ploop_request 
*preq,
        rb_insert_color(&preq->reloc_link, tree);
  }
+static void ploop_pb_add_req_to_pending(struct ploop_pushbackup_desc *pbd,
+                                       struct ploop_request *preq)
+{
+       ploop_pb_add_req_to_tree(preq, &pbd->pending_tree);
+}
+
  static void ploop_pb_add_req_to_reported(struct ploop_pushbackup_desc *pbd,
                                         struct ploop_request *preq)
  {
@@ -339,6 +371,33 @@ ploop_pb_get_req_from_reported(struct 
ploop_pushbackup_desc *pbd,
        return ploop_pb_get_req_from_tree(&pbd->reported_tree, clu);
  }
+int ploop_pb_preq_add_pending(struct ploop_pushbackup_desc *pbd,
+                              struct ploop_request *preq)
+{
+       BUG_ON(!pbd);
+
+       spin_lock(&pbd->ppb_lock);
+
+       if (!test_bit(PLOOP_S_PUSH_BACKUP, &pbd->plo->state)) {
+               spin_unlock(&pbd->ppb_lock);
+               return -EINTR;
+       }
+
+       /* if (preq matches pbd->reported_map) return -EALREADY; */
+       if (preq->req_cluster < pbd->ppb_offset) {
+               spin_unlock(&pbd->ppb_lock);
+               return -EALREADY;
+       }
+
+       ploop_pb_add_req_to_pending(pbd, preq);
+
+       if (pbd->ppb_waiting)
+               complete(&pbd->ppb_comp);
+
+       spin_unlock(&pbd->ppb_lock);
+       return 0;
+}
+
  unsigned long ploop_pb_stop(struct ploop_pushbackup_desc *pbd)
  {
        if (pbd == NULL)
@@ -428,6 +487,9 @@ void ploop_pb_put_reported(struct ploop_pushbackup_desc 
*pbd,
        else
                n_found++;
+ if (preq)
+               __set_bit(PLOOP_REQ_PUSH_BACKUP, &preq->state);
+
        /*
         * If preq not found above, it's unsolicited report. Then it's
         * enough to have corresponding bit set in reported_map because if
diff --git a/drivers/block/ploop/push_backup.h 
b/drivers/block/ploop/push_backup.h
index 482e070..476ac53 100644
--- a/drivers/block/ploop/push_backup.h
+++ b/drivers/block/ploop/push_backup.h
@@ -11,3 +11,9 @@ int ploop_pb_get_pending(struct ploop_pushbackup_desc *pbd,
                         cluster_t *clu_p, cluster_t *len_p, unsigned n_done);
  void ploop_pb_put_reported(struct ploop_pushbackup_desc *pbd,
                           cluster_t clu, cluster_t len);
+
+void ploop_pb_clear_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu);
+bool ploop_pb_check_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu);
+
+int ploop_pb_preq_add_pending(struct ploop_pushbackup_desc *pbd,
+                              struct ploop_request *preq);
diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
index 09f419d3..762d2fd 100644
--- a/include/linux/ploop/ploop.h
+++ b/include/linux/ploop/ploop.h
@@ -464,6 +464,7 @@ enum
        PLOOP_REQ_FORCE_FLUSH,  /*force flush by engine */
        PLOOP_REQ_KAIO_FSYNC,   /*force image fsync by KAIO module */
        PLOOP_REQ_POST_SUBMIT, /* preq needs post_submit processing */
+       PLOOP_REQ_PUSH_BACKUP, /* preq was ACKed by userspace push_backup */
  };
enum

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to