Maxim Patlasov <mpatla...@virtuozzo.com> writes: > Once we converted extent to initialized it can be part of uncompleted > journal transaction, so we have to force transaction commit at some point. > > Instead of forcing transaction commit immediately, the patch delays it > until an incoming bio with FLUSH|FUA arrives. Then, as the very first > step of processing such a bio, we sends corresponding preq to fsync_thread > to perform f_op->fsync(). > > As a very unlikely case, it is also possible that processing a FLUSH|FUA > bio itself results in converting extents. Then, the patch calls f_op->fsync() > immediately after conversion to preserve FUA semantics. ACK. With minor comments. See below: > > https://jira.sw.ru/browse/PSBM-47026 > > Signed-off-by: Maxim Patlasov <mpatla...@virtuozzo.com> > --- > drivers/block/ploop/dev.c | 70 > ++++++++++++++++++++++++--------------- > drivers/block/ploop/io_direct.c | 28 +++++++++++++++- > include/linux/ploop/ploop.h | 6 +++ > 3 files changed, 76 insertions(+), 28 deletions(-) > > diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c > index 654b60b..03fc289 100644 > --- a/drivers/block/ploop/dev.c > +++ b/drivers/block/ploop/dev.c > @@ -1942,46 +1942,62 @@ err: > > /* Main preq state machine */ > > +static inline bool preq_is_special(struct ploop_request * preq) > +{ > + return test_bit(PLOOP_REQ_MERGE, &preq->state) || > + test_bit(PLOOP_REQ_RELOC_A, &preq->state) || > + test_bit(PLOOP_REQ_RELOC_S, &preq->state) || > + test_bit(PLOOP_REQ_DISCARD, &preq->state) || > + test_bit(PLOOP_REQ_ZERO, &preq->state); Oh. It looks awful. Please use one atomic read here and other places. #define PLOOP_REQ_RELOC_A_FL (1 << PLOOP_REQ_RELOC_A) #define PLOOP_REQ_RELOC_S_FL (1 << PLOOP_REQ_RELOC_S)
unsigned long state = READ_ONCE(preq->state); ... if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_B_FL) ... > +} > + > static void > ploop_entry_request(struct ploop_request * preq) > { > struct ploop_device * plo = preq->plo; > struct ploop_delta * top_delta = ploop_top_delta(plo); > + struct ploop_io * top_io = &top_delta->io; > struct ploop_delta * delta; > int level; > int err; > iblock_t iblk; > > - /* Control request. */ > - if (unlikely(preq->bl.head == NULL && > - !test_bit(PLOOP_REQ_MERGE, &preq->state) && > - !test_bit(PLOOP_REQ_RELOC_A, &preq->state) && > - !test_bit(PLOOP_REQ_RELOC_S, &preq->state) && > - !test_bit(PLOOP_REQ_DISCARD, &preq->state) && > - !test_bit(PLOOP_REQ_ZERO, &preq->state))) { > - complete(plo->quiesce_comp); > - wait_for_completion(&plo->relax_comp); > - ploop_complete_request(preq); > - complete(&plo->relaxed_comp); > - return; > - } > + if (!preq_is_special(preq)) { > + /* Control request */ > + if (unlikely(preq->bl.head == NULL)) { > + complete(plo->quiesce_comp); > + wait_for_completion(&plo->relax_comp); > + ploop_complete_request(preq); > + complete(&plo->relaxed_comp); > + return; > + } > > - /* Empty flush. */ > - if (unlikely(preq->req_size == 0 && > - !test_bit(PLOOP_REQ_MERGE, &preq->state) && > - !test_bit(PLOOP_REQ_RELOC_A, &preq->state) && > - !test_bit(PLOOP_REQ_RELOC_S, &preq->state) && > - !test_bit(PLOOP_REQ_ZERO, &preq->state))) { > - if (preq->req_rw & REQ_FLUSH) { > - if (top_delta->io.ops->issue_flush) { > - top_delta->io.ops->issue_flush(&top_delta->io, > preq); > - return; > - } > + /* Need to fsync before start handling FLUSH */ > + if ((preq->req_rw & REQ_FLUSH) && > + test_bit(PLOOP_IO_FSYNC_DELAYED, &top_io->io_state) && > + !test_bit(PLOOP_REQ_FSYNC_DONE, &preq->state)) { > + spin_lock_irq(&plo->lock); > + list_add_tail(&preq->list, &top_io->fsync_queue); > + if (waitqueue_active(&top_io->fsync_waitq)) > + wake_up_interruptible(&top_io->fsync_waitq); > + spin_unlock_irq(&plo->lock); > + return; > } > > - preq->eng_state = PLOOP_E_COMPLETE; > - ploop_complete_request(preq); > - return; > + /* Empty flush or unknown zero-size request */ Do you know any zero size requests instead of FLUSH? > + if (preq->req_size == 0) { > + if (preq->req_rw & REQ_FLUSH && > + !test_bit(PLOOP_REQ_FSYNC_DONE, &preq->state)) { > + if (top_io->ops->issue_flush) { > + top_io->ops->issue_flush(top_io, preq); > + return; > + } > + } > + > + preq->eng_state = PLOOP_E_COMPLETE; > + ploop_complete_request(preq); > + return; > + } > } > > if (unlikely(test_bit(PLOOP_REQ_SYNC, &preq->state) && > diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c > index 8096110..a0b7a4e 100644 > --- a/drivers/block/ploop/io_direct.c > +++ b/drivers/block/ploop/io_direct.c > @@ -514,17 +514,32 @@ end_write: > static void > dio_post_submit(struct ploop_io *io, struct ploop_request * preq) > { > + struct ploop_device *plo = preq->plo; > sector_t sec = (sector_t)preq->iblock << preq->plo->cluster_log; > loff_t clu_siz = 1 << (preq->plo->cluster_log + 9); > int err; > > file_start_write(io->files.file); > + > + /* Here io->io_count is even ... */ > + spin_lock_irq(&plo->lock); > + io->io_count++; > + set_bit(PLOOP_IO_FSYNC_DELAYED, &io->io_state); > + spin_unlock_irq(&plo->lock); > + > err = io->files.file->f_op->fallocate(io->files.file, > FALLOC_FL_CONVERT_UNWRITTEN, > (loff_t)sec << 9, clu_siz); > - if (!err) > + > + /* highly unlikely case: FUA coming to a block not provisioned yet */ > + if (!err && (preq->req_rw & REQ_FUA)) > err = io->ops->sync(io); > > + spin_lock_irq(&plo->lock); > + io->io_count++; > + spin_unlock_irq(&plo->lock); Double entrance is not possible because ->post_submit is called synchronously from ploop_thread(). Am I correct? > + /* and here io->io_count is even (+2) again. */ > + > file_end_write(io->files.file); > if (err) { > PLOOP_REQ_SET_ERROR(preq, err); > @@ -782,6 +797,7 @@ static int dio_fsync_thread(void * data) > { > struct ploop_io * io = data; > struct ploop_device * plo = io->plo; > + u64 io_count; > > set_user_nice(current, -20); > > @@ -808,6 +824,7 @@ static int dio_fsync_thread(void * data) > > INIT_LIST_HEAD(&list); > list_splice_init(&io->fsync_queue, &list); > + io_count = io->io_count; > spin_unlock_irq(&plo->lock); > > /* filemap_fdatawrite() has been made already */ > @@ -824,12 +841,17 @@ static int dio_fsync_thread(void * data) > > spin_lock_irq(&plo->lock); > > + if (io_count == io->io_count && !(io_count & 1)) > + clear_bit(PLOOP_IO_FSYNC_DELAYED, &io->io_state); > + > while (!list_empty(&list)) { > struct ploop_request * preq; > preq = list_entry(list.next, struct ploop_request, > list); > list_del(&preq->list); > if (err) > PLOOP_REQ_SET_ERROR(preq, err); > + > + __set_bit(PLOOP_REQ_FSYNC_DONE, &preq->state); > list_add_tail(&preq->list, &plo->ready_queue); > io->fsync_qlen--; > } > @@ -1522,6 +1544,10 @@ dio_fastmap(struct ploop_io * io, struct bio * > orig_bio, > struct extent_map * em; > int i; > > + if (unlikely((orig_bio->bi_rw & REQ_FLUSH) && blk layer implicitly treats REQ_FUA as REQ_FUA + later FLUSH. Let's do the same: + if (unlikely((orig_bio->bi_rw & (REQ_FUA|REQ_FLUSH)) && > + test_bit(PLOOP_IO_FSYNC_DELAYED, &io->io_state))) > + return 1; > + > if (orig_bio->bi_size == 0) { > bio->bi_vcnt = 0; > bio->bi_sector = 0; > diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h > index ad36a91..49f3cda 100644 > --- a/include/linux/ploop/ploop.h > +++ b/include/linux/ploop/ploop.h > @@ -87,6 +87,9 @@ struct ploop_file > * This struct describes how we do real IO on particular backing file. > */ > > +enum { > + PLOOP_IO_FSYNC_DELAYED, /* Must f_op->fsync before FLUSH|FUA */ > +}; > > struct ploop_io > { > @@ -108,6 +111,8 @@ struct ploop_io > struct timer_list fsync_timer; > > struct ploop_io_ops *ops; > + unsigned long io_state; > + u64 io_count; > }; > > struct ploop_io_ops > @@ -466,6 +471,7 @@ enum > PLOOP_REQ_POST_SUBMIT, /* preq needs post_submit processing */ > PLOOP_REQ_PUSH_BACKUP, /* preq was ACKed by userspace push_backup */ > PLOOP_REQ_ALLOW_READS, /* READs are allowed for given req_cluster */ > + PLOOP_REQ_FSYNC_DONE, /* fsync_thread() performed f_op->fsync() */ > }; > > enum
signature.asc
Description: PGP signature
_______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel