Re: [Devel] [PATCH rh7 3/4] ploop: wire push_backup into state-machine
On 04/30/2016 10:00 AM, Dmitry Monakhov wrote: Maxim Patlasov writes: I can not avoid obsession that this request joggling fully destroys FS barriers assumptions. For example: fs does submit_bio(data_b1) submit_bio(data_b2) submit_bio(commit_b3, FLUSH|FUA) journal commit record wait_for_bio(commit_b3) But there is no guaranee that data_b1 and data_b2 was completed already. They can be in pedned list. In case of power-loss we have good commit record which reference b1 and b2, but b1 and b2 was not flushed, which result expose of unitialized data. In fact ext4/jbd2 will wait b1 and b2 first and only after that it will b3 so ext4 will works fine. Any code assuming that completion of FLUSH|FUA guarantees something about already-submitted-but-not-yet-completed bio-s is broken. Otherwise looks good. When ploop state-machine looks at preq first time, it suspends the preq if its cluster-block matches pbd->ppb_map -- the copy of CBT mask initially. To suspend preq we simply put it to pbd->pending_tree and plo->lockout_tree. Later, when userspace reports that out-of-band processing is done, we set PLOOP_REQ_PUSH_BACKUP bit in preq->state, re-schedule the preq and wakeup ploop state-machine. This PLOOP_REQ_PUSH_BACKUP bit lets state-machine know that given preq is OK and we shouldn't suspend further preq-s for given cluster-block anymore. Signed-off-by: Maxim Patlasov --- drivers/block/ploop/dev.c | 32 +++ drivers/block/ploop/push_backup.c | 62 + drivers/block/ploop/push_backup.h |6 include/linux/ploop/ploop.h |1 + 4 files changed, 101 insertions(+) diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c index 2a77d2e..c7cc385 100644 --- a/drivers/block/ploop/dev.c +++ b/drivers/block/ploop/dev.c @@ -2021,6 +2021,38 @@ restart: return; } + /* push_backup special processing */ + if (!test_bit(PLOOP_REQ_LOCKOUT, &preq->state) && + (preq->req_rw & REQ_WRITE) && preq->req_size && + ploop_pb_check_bit(plo->pbd, preq->req_cluster)) { + if (ploop_pb_preq_add_pending(plo->pbd, preq)) { + /* already reported by userspace push_backup */ + ploop_pb_clear_bit(plo->pbd, preq->req_cluster); + } else { + spin_lock_irq(&plo->lock); + ploop_add_lockout(preq, 0); + spin_unlock_irq(&plo->lock); + /* +* preq IN: preq is in ppb_pending tree waiting for +* out-of-band push_backup processing by userspace ... +*/ + return; + } + } else if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state) && + test_and_clear_bit(PLOOP_REQ_PUSH_BACKUP, &preq->state)) { + /* +* preq OUT: out-of-band push_backup processing by +* userspace done; preq was re-scheduled +*/ + ploop_pb_clear_bit(plo->pbd, preq->req_cluster); + + spin_lock_irq(&plo->lock); + del_lockout(preq); + if (!list_empty(&preq->delay_list)) + list_splice_init(&preq->delay_list, plo->ready_queue.prev); + spin_unlock_irq(&plo->lock); + } + if (plo->trans_map) { err = ploop_find_trans_map(plo->trans_map, preq); if (err) { diff --git a/drivers/block/ploop/push_backup.c b/drivers/block/ploop/push_backup.c index 477caf7..488b8fb 100644 --- a/drivers/block/ploop/push_backup.c +++ b/drivers/block/ploop/push_backup.c @@ -146,6 +146,32 @@ static void set_bit_in_map(struct page **map, u64 map_max, u64 blk) do_bit_in_map(map, map_max, blk, SET_BIT); } +static void clear_bit_in_map(struct page **map, u64 map_max, u64 blk) +{ + do_bit_in_map(map, map_max, blk, CLEAR_BIT); +} + +static bool check_bit_in_map(struct page **map, u64 map_max, u64 blk) +{ + return do_bit_in_map(map, map_max, blk, CHECK_BIT); +} + +/* intentionally lockless */ +void ploop_pb_clear_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu) +{ + BUG_ON(!pbd); + clear_bit_in_map(pbd->ppb_map, pbd->ppb_block_max, clu); +} + +/* intentionally lockless */ +bool ploop_pb_check_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu) +{ + if (!pbd) + return false; + + return check_bit_in_map(pbd->ppb_map, pbd->ppb_block_max, clu); +} + static int convert_map_to_map(struct ploop_pushbackup_desc *pbd) { struct page **from_map = pbd->cbt_map; @@ -278,6 +304,12 @@ static void ploop_pb_add_req_to_tree(struct ploop_request *preq, rb_insert_color(&preq->reloc_link, tree); } +static void ploop_pb_add_req_to_pending(struct ploop_pushbackup_desc *pbd, + struct ploop_request *pre
Re: [Devel] [PATCH rh7 3/4] ploop: wire push_backup into state-machine
Maxim Patlasov writes: I can not avoid obsession that this request joggling fully destroys FS barriers assumptions. For example: fs does submit_bio(data_b1) submit_bio(data_b2) submit_bio(commit_b3, FLUSH|FUA) journal commit record wait_for_bio(commit_b3) But there is no guaranee that data_b1 and data_b2 was completed already. They can be in pedned list. In case of power-loss we have good commit record which reference b1 and b2, but b1 and b2 was not flushed, which result expose of unitialized data. In fact ext4/jbd2 will wait b1 and b2 first and only after that it will b3 so ext4 will works fine. Otherwise looks good. > When ploop state-machine looks at preq first time, it suspends the preq if > its cluster-block matches pbd->ppb_map -- the copy of CBT mask initially. > To suspend preq we simply put it to pbd->pending_tree and plo->lockout_tree. > > Later, when userspace reports that out-of-band processing is done, we > set PLOOP_REQ_PUSH_BACKUP bit in preq->state, re-schedule the preq and > wakeup ploop state-machine. This PLOOP_REQ_PUSH_BACKUP bit lets state-machine > know that given preq is OK and we shouldn't suspend further preq-s for > given cluster-block anymore. > > Signed-off-by: Maxim Patlasov > --- > drivers/block/ploop/dev.c | 32 +++ > drivers/block/ploop/push_backup.c | 62 > + > drivers/block/ploop/push_backup.h |6 > include/linux/ploop/ploop.h |1 + > 4 files changed, 101 insertions(+) > > diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c > index 2a77d2e..c7cc385 100644 > --- a/drivers/block/ploop/dev.c > +++ b/drivers/block/ploop/dev.c > @@ -2021,6 +2021,38 @@ restart: > return; > } > > + /* push_backup special processing */ > + if (!test_bit(PLOOP_REQ_LOCKOUT, &preq->state) && > + (preq->req_rw & REQ_WRITE) && preq->req_size && > + ploop_pb_check_bit(plo->pbd, preq->req_cluster)) { > + if (ploop_pb_preq_add_pending(plo->pbd, preq)) { > + /* already reported by userspace push_backup */ > + ploop_pb_clear_bit(plo->pbd, preq->req_cluster); > + } else { > + spin_lock_irq(&plo->lock); > + ploop_add_lockout(preq, 0); > + spin_unlock_irq(&plo->lock); > + /* > + * preq IN: preq is in ppb_pending tree waiting for > + * out-of-band push_backup processing by userspace ... > + */ > + return; > + } > + } else if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state) && > +test_and_clear_bit(PLOOP_REQ_PUSH_BACKUP, &preq->state)) { > + /* > + * preq OUT: out-of-band push_backup processing by > + * userspace done; preq was re-scheduled > + */ > + ploop_pb_clear_bit(plo->pbd, preq->req_cluster); > + > + spin_lock_irq(&plo->lock); > + del_lockout(preq); > + if (!list_empty(&preq->delay_list)) > + list_splice_init(&preq->delay_list, > plo->ready_queue.prev); > + spin_unlock_irq(&plo->lock); > + } > + > if (plo->trans_map) { > err = ploop_find_trans_map(plo->trans_map, preq); > if (err) { > diff --git a/drivers/block/ploop/push_backup.c > b/drivers/block/ploop/push_backup.c > index 477caf7..488b8fb 100644 > --- a/drivers/block/ploop/push_backup.c > +++ b/drivers/block/ploop/push_backup.c > @@ -146,6 +146,32 @@ static void set_bit_in_map(struct page **map, u64 > map_max, u64 blk) > do_bit_in_map(map, map_max, blk, SET_BIT); > } > > +static void clear_bit_in_map(struct page **map, u64 map_max, u64 blk) > +{ > + do_bit_in_map(map, map_max, blk, CLEAR_BIT); > +} > + > +static bool check_bit_in_map(struct page **map, u64 map_max, u64 blk) > +{ > + return do_bit_in_map(map, map_max, blk, CHECK_BIT); > +} > + > +/* intentionally lockless */ > +void ploop_pb_clear_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu) > +{ > + BUG_ON(!pbd); > + clear_bit_in_map(pbd->ppb_map, pbd->ppb_block_max, clu); > +} > + > +/* intentionally lockless */ > +bool ploop_pb_check_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu) > +{ > + if (!pbd) > + return false; > + > + return check_bit_in_map(pbd->ppb_map, pbd->ppb_block_max, clu); > +} > + > static int convert_map_to_map(struct ploop_pushbackup_desc *pbd) > { > struct page **from_map = pbd->cbt_map; > @@ -278,6 +304,12 @@ static void ploop_pb_add_req_to_tree(struct > ploop_request *preq, > rb_insert_color(&preq->reloc_link, tree); > } > > +static void ploop_pb_add_req_to_pending(struct ploop_pushbackup_desc *pbd, > + struct ploop_request *preq) > +{ > + ploop_pb_add_req_to_tree(preq, &pbd->pending_tree); > +
[Devel] [PATCH rh7 3/4] ploop: wire push_backup into state-machine
When ploop state-machine looks at preq first time, it suspends the preq if its cluster-block matches pbd->ppb_map -- the copy of CBT mask initially. To suspend preq we simply put it to pbd->pending_tree and plo->lockout_tree. Later, when userspace reports that out-of-band processing is done, we set PLOOP_REQ_PUSH_BACKUP bit in preq->state, re-schedule the preq and wakeup ploop state-machine. This PLOOP_REQ_PUSH_BACKUP bit lets state-machine know that given preq is OK and we shouldn't suspend further preq-s for given cluster-block anymore. Signed-off-by: Maxim Patlasov --- drivers/block/ploop/dev.c | 32 +++ drivers/block/ploop/push_backup.c | 62 + drivers/block/ploop/push_backup.h |6 include/linux/ploop/ploop.h |1 + 4 files changed, 101 insertions(+) diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c index 2a77d2e..c7cc385 100644 --- a/drivers/block/ploop/dev.c +++ b/drivers/block/ploop/dev.c @@ -2021,6 +2021,38 @@ restart: return; } + /* push_backup special processing */ + if (!test_bit(PLOOP_REQ_LOCKOUT, &preq->state) && + (preq->req_rw & REQ_WRITE) && preq->req_size && + ploop_pb_check_bit(plo->pbd, preq->req_cluster)) { + if (ploop_pb_preq_add_pending(plo->pbd, preq)) { + /* already reported by userspace push_backup */ + ploop_pb_clear_bit(plo->pbd, preq->req_cluster); + } else { + spin_lock_irq(&plo->lock); + ploop_add_lockout(preq, 0); + spin_unlock_irq(&plo->lock); + /* +* preq IN: preq is in ppb_pending tree waiting for +* out-of-band push_backup processing by userspace ... +*/ + return; + } + } else if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state) && + test_and_clear_bit(PLOOP_REQ_PUSH_BACKUP, &preq->state)) { + /* +* preq OUT: out-of-band push_backup processing by +* userspace done; preq was re-scheduled +*/ + ploop_pb_clear_bit(plo->pbd, preq->req_cluster); + + spin_lock_irq(&plo->lock); + del_lockout(preq); + if (!list_empty(&preq->delay_list)) + list_splice_init(&preq->delay_list, plo->ready_queue.prev); + spin_unlock_irq(&plo->lock); + } + if (plo->trans_map) { err = ploop_find_trans_map(plo->trans_map, preq); if (err) { diff --git a/drivers/block/ploop/push_backup.c b/drivers/block/ploop/push_backup.c index 477caf7..488b8fb 100644 --- a/drivers/block/ploop/push_backup.c +++ b/drivers/block/ploop/push_backup.c @@ -146,6 +146,32 @@ static void set_bit_in_map(struct page **map, u64 map_max, u64 blk) do_bit_in_map(map, map_max, blk, SET_BIT); } +static void clear_bit_in_map(struct page **map, u64 map_max, u64 blk) +{ + do_bit_in_map(map, map_max, blk, CLEAR_BIT); +} + +static bool check_bit_in_map(struct page **map, u64 map_max, u64 blk) +{ + return do_bit_in_map(map, map_max, blk, CHECK_BIT); +} + +/* intentionally lockless */ +void ploop_pb_clear_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu) +{ + BUG_ON(!pbd); + clear_bit_in_map(pbd->ppb_map, pbd->ppb_block_max, clu); +} + +/* intentionally lockless */ +bool ploop_pb_check_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu) +{ + if (!pbd) + return false; + + return check_bit_in_map(pbd->ppb_map, pbd->ppb_block_max, clu); +} + static int convert_map_to_map(struct ploop_pushbackup_desc *pbd) { struct page **from_map = pbd->cbt_map; @@ -278,6 +304,12 @@ static void ploop_pb_add_req_to_tree(struct ploop_request *preq, rb_insert_color(&preq->reloc_link, tree); } +static void ploop_pb_add_req_to_pending(struct ploop_pushbackup_desc *pbd, + struct ploop_request *preq) +{ + ploop_pb_add_req_to_tree(preq, &pbd->pending_tree); +} + static void ploop_pb_add_req_to_reported(struct ploop_pushbackup_desc *pbd, struct ploop_request *preq) { @@ -339,6 +371,33 @@ ploop_pb_get_req_from_reported(struct ploop_pushbackup_desc *pbd, return ploop_pb_get_req_from_tree(&pbd->reported_tree, clu); } +int ploop_pb_preq_add_pending(struct ploop_pushbackup_desc *pbd, + struct ploop_request *preq) +{ + BUG_ON(!pbd); + + spin_lock(&pbd->ppb_lock); + + if (!test_bit(PLOOP_S_PUSH_BACKUP, &pbd->plo->state)) { + spin_unlock(&pbd->ppb_lock); + return -EINTR; + } + + /* if (preq matches pbd->reported_map) return -EALREADY; */ + if (preq->req_cluster < pbd->p