Maxim Patlasov <mpatla...@virtuozzo.com> writes: I can not avoid obsession that this request joggling fully destroys FS barriers assumptions.
For example: fs does submit_bio(data_b1) submit_bio(data_b2) submit_bio(commit_b3, FLUSH|FUA) journal commit record wait_for_bio(commit_b3) But there is no guaranee that data_b1 and data_b2 was completed already. They can be in pedned list. In case of power-loss we have good commit record which reference b1 and b2, but b1 and b2 was not flushed, which result expose of unitialized data. In fact ext4/jbd2 will wait b1 and b2 first and only after that it will b3 so ext4 will works fine. Otherwise looks good. > When ploop state-machine looks at preq first time, it suspends the preq if > its cluster-block matches pbd->ppb_map -- the copy of CBT mask initially. > To suspend preq we simply put it to pbd->pending_tree and plo->lockout_tree. > > Later, when userspace reports that out-of-band processing is done, we > set PLOOP_REQ_PUSH_BACKUP bit in preq->state, re-schedule the preq and > wakeup ploop state-machine. This PLOOP_REQ_PUSH_BACKUP bit lets state-machine > know that given preq is OK and we shouldn't suspend further preq-s for > given cluster-block anymore. > > Signed-off-by: Maxim Patlasov <mpatla...@virtuozzo.com> > --- > drivers/block/ploop/dev.c | 32 +++++++++++++++++++ > drivers/block/ploop/push_backup.c | 62 > +++++++++++++++++++++++++++++++++++++ > drivers/block/ploop/push_backup.h | 6 ++++ > include/linux/ploop/ploop.h | 1 + > 4 files changed, 101 insertions(+) > > diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c > index 2a77d2e..c7cc385 100644 > --- a/drivers/block/ploop/dev.c > +++ b/drivers/block/ploop/dev.c > @@ -2021,6 +2021,38 @@ restart: > return; > } > > + /* push_backup special processing */ > + if (!test_bit(PLOOP_REQ_LOCKOUT, &preq->state) && > + (preq->req_rw & REQ_WRITE) && preq->req_size && > + ploop_pb_check_bit(plo->pbd, preq->req_cluster)) { > + if (ploop_pb_preq_add_pending(plo->pbd, preq)) { > + /* already reported by userspace push_backup */ > + ploop_pb_clear_bit(plo->pbd, preq->req_cluster); > + } else { > + spin_lock_irq(&plo->lock); > + ploop_add_lockout(preq, 0); > + spin_unlock_irq(&plo->lock); > + /* > + * preq IN: preq is in ppb_pending tree waiting for > + * out-of-band push_backup processing by userspace ... > + */ > + return; > + } > + } else if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state) && > + test_and_clear_bit(PLOOP_REQ_PUSH_BACKUP, &preq->state)) { > + /* > + * preq OUT: out-of-band push_backup processing by > + * userspace done; preq was re-scheduled > + */ > + ploop_pb_clear_bit(plo->pbd, preq->req_cluster); > + > + spin_lock_irq(&plo->lock); > + del_lockout(preq); > + if (!list_empty(&preq->delay_list)) > + list_splice_init(&preq->delay_list, > plo->ready_queue.prev); > + spin_unlock_irq(&plo->lock); > + } > + > if (plo->trans_map) { > err = ploop_find_trans_map(plo->trans_map, preq); > if (err) { > diff --git a/drivers/block/ploop/push_backup.c > b/drivers/block/ploop/push_backup.c > index 477caf7..488b8fb 100644 > --- a/drivers/block/ploop/push_backup.c > +++ b/drivers/block/ploop/push_backup.c > @@ -146,6 +146,32 @@ static void set_bit_in_map(struct page **map, u64 > map_max, u64 blk) > do_bit_in_map(map, map_max, blk, SET_BIT); > } > > +static void clear_bit_in_map(struct page **map, u64 map_max, u64 blk) > +{ > + do_bit_in_map(map, map_max, blk, CLEAR_BIT); > +} > + > +static bool check_bit_in_map(struct page **map, u64 map_max, u64 blk) > +{ > + return do_bit_in_map(map, map_max, blk, CHECK_BIT); > +} > + > +/* intentionally lockless */ > +void ploop_pb_clear_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu) > +{ > + BUG_ON(!pbd); > + clear_bit_in_map(pbd->ppb_map, pbd->ppb_block_max, clu); > +} > + > +/* intentionally lockless */ > +bool ploop_pb_check_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu) > +{ > + if (!pbd) > + return false; > + > + return check_bit_in_map(pbd->ppb_map, pbd->ppb_block_max, clu); > +} > + > static int convert_map_to_map(struct ploop_pushbackup_desc *pbd) > { > struct page **from_map = pbd->cbt_map; > @@ -278,6 +304,12 @@ static void ploop_pb_add_req_to_tree(struct > ploop_request *preq, > rb_insert_color(&preq->reloc_link, tree); > } > > +static void ploop_pb_add_req_to_pending(struct ploop_pushbackup_desc *pbd, > + struct ploop_request *preq) > +{ > + ploop_pb_add_req_to_tree(preq, &pbd->pending_tree); > +} > + > static void ploop_pb_add_req_to_reported(struct ploop_pushbackup_desc *pbd, > struct ploop_request *preq) > { > @@ -339,6 +371,33 @@ ploop_pb_get_req_from_reported(struct > ploop_pushbackup_desc *pbd, > return ploop_pb_get_req_from_tree(&pbd->reported_tree, clu); > } > > +int ploop_pb_preq_add_pending(struct ploop_pushbackup_desc *pbd, > + struct ploop_request *preq) > +{ > + BUG_ON(!pbd); > + > + spin_lock(&pbd->ppb_lock); > + > + if (!test_bit(PLOOP_S_PUSH_BACKUP, &pbd->plo->state)) { > + spin_unlock(&pbd->ppb_lock); > + return -EINTR; > + } > + > + /* if (preq matches pbd->reported_map) return -EALREADY; */ > + if (preq->req_cluster < pbd->ppb_offset) { > + spin_unlock(&pbd->ppb_lock); > + return -EALREADY; > + } > + > + ploop_pb_add_req_to_pending(pbd, preq); > + > + if (pbd->ppb_waiting) > + complete(&pbd->ppb_comp); > + > + spin_unlock(&pbd->ppb_lock); > + return 0; > +} > + > unsigned long ploop_pb_stop(struct ploop_pushbackup_desc *pbd) > { > if (pbd == NULL) > @@ -428,6 +487,9 @@ void ploop_pb_put_reported(struct ploop_pushbackup_desc > *pbd, > else > n_found++; > > + if (preq) > + __set_bit(PLOOP_REQ_PUSH_BACKUP, &preq->state); > + > /* > * If preq not found above, it's unsolicited report. Then it's > * enough to have corresponding bit set in reported_map because if > diff --git a/drivers/block/ploop/push_backup.h > b/drivers/block/ploop/push_backup.h > index 482e070..476ac53 100644 > --- a/drivers/block/ploop/push_backup.h > +++ b/drivers/block/ploop/push_backup.h > @@ -11,3 +11,9 @@ int ploop_pb_get_pending(struct ploop_pushbackup_desc *pbd, > cluster_t *clu_p, cluster_t *len_p, unsigned n_done); > void ploop_pb_put_reported(struct ploop_pushbackup_desc *pbd, > cluster_t clu, cluster_t len); > + > +void ploop_pb_clear_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu); > +bool ploop_pb_check_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu); > + > +int ploop_pb_preq_add_pending(struct ploop_pushbackup_desc *pbd, > + struct ploop_request *preq); > diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h > index 09f419d3..762d2fd 100644 > --- a/include/linux/ploop/ploop.h > +++ b/include/linux/ploop/ploop.h > @@ -464,6 +464,7 @@ enum > PLOOP_REQ_FORCE_FLUSH, /*force flush by engine */ > PLOOP_REQ_KAIO_FSYNC, /*force image fsync by KAIO module */ > PLOOP_REQ_POST_SUBMIT, /* preq needs post_submit processing */ > + PLOOP_REQ_PUSH_BACKUP, /* preq was ACKed by userspace push_backup */ > }; > > enum
signature.asc
Description: PGP signature
_______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel