Maxim Patlasov <mpatla...@virtuozzo.com> writes:

> Once we converted extent to initialized it can be part of uncompleted
> journal transaction, so we have to force transaction commit at some point.
>
> Instead of forcing transaction commit immediately, the patch delays it
> until an incoming bio with FLUSH|FUA arrives. Then, as the very first
> step of processing such a bio, we sends corresponding preq to fsync_thread
> to perform f_op->fsync().
>
> As a very unlikely case, it is also possible that processing a FLUSH|FUA
> bio itself results in converting extents. Then, the patch calls f_op->fsync()
> immediately after conversion to preserve FUA semantics.
ACK. With minor comments. See below:
>
> https://jira.sw.ru/browse/PSBM-47026
>
> Signed-off-by: Maxim Patlasov <mpatla...@virtuozzo.com>
> ---
>  drivers/block/ploop/dev.c       |   70 
> ++++++++++++++++++++++++---------------
>  drivers/block/ploop/io_direct.c |   28 +++++++++++++++-
>  include/linux/ploop/ploop.h     |    6 +++
>  3 files changed, 76 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
> index 654b60b..03fc289 100644
> --- a/drivers/block/ploop/dev.c
> +++ b/drivers/block/ploop/dev.c
> @@ -1942,46 +1942,62 @@ err:
>  
>  /* Main preq state machine */
>  
> +static inline bool preq_is_special(struct ploop_request * preq)
> +{
> +     return test_bit(PLOOP_REQ_MERGE, &preq->state) ||
> +             test_bit(PLOOP_REQ_RELOC_A, &preq->state) ||
> +             test_bit(PLOOP_REQ_RELOC_S, &preq->state) ||
> +             test_bit(PLOOP_REQ_DISCARD, &preq->state) ||
> +             test_bit(PLOOP_REQ_ZERO, &preq->state);
Oh. It looks awful. Please use one atomic read here and other places.
#define PLOOP_REQ_RELOC_A_FL (1 << PLOOP_REQ_RELOC_A)
#define PLOOP_REQ_RELOC_S_FL (1 << PLOOP_REQ_RELOC_S)

unsigned long state = READ_ONCE(preq->state);
...
    if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_B_FL) ...
> +}
> +
>  static void
>  ploop_entry_request(struct ploop_request * preq)
>  {
>       struct ploop_device * plo       = preq->plo;
>       struct ploop_delta  * top_delta = ploop_top_delta(plo);
> +     struct ploop_io     * top_io    = &top_delta->io;
>       struct ploop_delta  * delta;
>       int level;
>       int err;
>       iblock_t iblk;
>  
> -     /* Control request. */
> -     if (unlikely(preq->bl.head == NULL &&
> -                  !test_bit(PLOOP_REQ_MERGE, &preq->state) &&
> -                  !test_bit(PLOOP_REQ_RELOC_A, &preq->state) &&
> -                  !test_bit(PLOOP_REQ_RELOC_S, &preq->state) &&
> -                  !test_bit(PLOOP_REQ_DISCARD, &preq->state) &&
> -                  !test_bit(PLOOP_REQ_ZERO, &preq->state))) {
> -             complete(plo->quiesce_comp);
> -             wait_for_completion(&plo->relax_comp);
> -             ploop_complete_request(preq);
> -             complete(&plo->relaxed_comp);
> -             return;
> -     }
> +     if (!preq_is_special(preq)) {
> +             /* Control request */
> +             if (unlikely(preq->bl.head == NULL)) {
> +                     complete(plo->quiesce_comp);
> +                     wait_for_completion(&plo->relax_comp);
> +                     ploop_complete_request(preq);
> +                     complete(&plo->relaxed_comp);
> +                     return;
> +             }
>  
> -     /* Empty flush. */
> -     if (unlikely(preq->req_size == 0 &&
> -                  !test_bit(PLOOP_REQ_MERGE, &preq->state) &&
> -                  !test_bit(PLOOP_REQ_RELOC_A, &preq->state) &&
> -                  !test_bit(PLOOP_REQ_RELOC_S, &preq->state) &&
> -                  !test_bit(PLOOP_REQ_ZERO, &preq->state))) {
> -             if (preq->req_rw & REQ_FLUSH) {
> -                     if (top_delta->io.ops->issue_flush) {
> -                             top_delta->io.ops->issue_flush(&top_delta->io, 
> preq);
> -                             return;
> -                     }
> +             /* Need to fsync before start handling FLUSH */
> +             if ((preq->req_rw & REQ_FLUSH) &&
> +                 test_bit(PLOOP_IO_FSYNC_DELAYED, &top_io->io_state) &&
> +                 !test_bit(PLOOP_REQ_FSYNC_DONE, &preq->state)) {
> +                     spin_lock_irq(&plo->lock);
> +                     list_add_tail(&preq->list, &top_io->fsync_queue);
> +                     if (waitqueue_active(&top_io->fsync_waitq))
> +                             wake_up_interruptible(&top_io->fsync_waitq);
> +                     spin_unlock_irq(&plo->lock);
> +                     return;
>               }
>  
> -             preq->eng_state = PLOOP_E_COMPLETE;
> -             ploop_complete_request(preq);
> -             return;
> +             /* Empty flush or unknown zero-size request */
Do you know any zero size requests instead of FLUSH?
> +             if (preq->req_size == 0) {
> +                     if (preq->req_rw & REQ_FLUSH &&
> +                         !test_bit(PLOOP_REQ_FSYNC_DONE, &preq->state)) {

> +                             if (top_io->ops->issue_flush) {
> +                                     top_io->ops->issue_flush(top_io, preq);
> +                                     return;
> +                             }
> +                     }
> +
> +                     preq->eng_state = PLOOP_E_COMPLETE;
> +                     ploop_complete_request(preq);
> +                     return;
> +             }
>       }
>  
>       if (unlikely(test_bit(PLOOP_REQ_SYNC, &preq->state) &&
> diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c
> index 8096110..a0b7a4e 100644
> --- a/drivers/block/ploop/io_direct.c
> +++ b/drivers/block/ploop/io_direct.c
> @@ -514,17 +514,32 @@ end_write:
>  static void
>  dio_post_submit(struct ploop_io *io, struct ploop_request * preq)
>  {
> +     struct ploop_device *plo = preq->plo;
>       sector_t sec = (sector_t)preq->iblock << preq->plo->cluster_log;
>       loff_t clu_siz = 1 << (preq->plo->cluster_log + 9);
>       int err;
>  
>       file_start_write(io->files.file);
> +
> +     /* Here io->io_count is even ... */
> +     spin_lock_irq(&plo->lock);
> +     io->io_count++;
> +     set_bit(PLOOP_IO_FSYNC_DELAYED, &io->io_state);
> +     spin_unlock_irq(&plo->lock);
> +
>       err = io->files.file->f_op->fallocate(io->files.file,
>                                             FALLOC_FL_CONVERT_UNWRITTEN,
>                                             (loff_t)sec << 9, clu_siz);
> -     if (!err)
> +
> +     /* highly unlikely case: FUA coming to a block not provisioned yet */
> +     if (!err && (preq->req_rw & REQ_FUA))
>               err = io->ops->sync(io);
>  
> +     spin_lock_irq(&plo->lock);
> +     io->io_count++;
> +     spin_unlock_irq(&plo->lock);
        Double entrance is not possible because ->post_submit is called
        synchronously from ploop_thread(). Am I correct?
> +     /* and here io->io_count is even (+2) again. */
> +
>       file_end_write(io->files.file);
>       if (err) {
>               PLOOP_REQ_SET_ERROR(preq, err);
> @@ -782,6 +797,7 @@ static int dio_fsync_thread(void * data)
>  {
>       struct ploop_io * io = data;
>       struct ploop_device * plo = io->plo;
> +     u64 io_count;
>  
>       set_user_nice(current, -20);
>  
> @@ -808,6 +824,7 @@ static int dio_fsync_thread(void * data)
>  
>               INIT_LIST_HEAD(&list);
>               list_splice_init(&io->fsync_queue, &list);
> +             io_count = io->io_count;
>               spin_unlock_irq(&plo->lock);
>  
>               /* filemap_fdatawrite() has been made already */
> @@ -824,12 +841,17 @@ static int dio_fsync_thread(void * data)
>  
>               spin_lock_irq(&plo->lock);
>  
> +             if (io_count == io->io_count && !(io_count & 1))
> +                     clear_bit(PLOOP_IO_FSYNC_DELAYED, &io->io_state);
> +
>               while (!list_empty(&list)) {
>                       struct ploop_request * preq;
>                       preq = list_entry(list.next, struct ploop_request, 
> list);
>                       list_del(&preq->list);
>                       if (err)
>                               PLOOP_REQ_SET_ERROR(preq, err);
> +
> +                     __set_bit(PLOOP_REQ_FSYNC_DONE, &preq->state);
>                       list_add_tail(&preq->list, &plo->ready_queue);
>                       io->fsync_qlen--;
>               }
> @@ -1522,6 +1544,10 @@ dio_fastmap(struct ploop_io * io, struct bio * 
> orig_bio,
>       struct extent_map * em;
>       int i;
>  
> +     if (unlikely((orig_bio->bi_rw & REQ_FLUSH) &&
blk layer implicitly treats REQ_FUA as REQ_FUA + later FLUSH. Let's do the same:
+       if (unlikely((orig_bio->bi_rw & (REQ_FUA|REQ_FLUSH)) &&
> +                  test_bit(PLOOP_IO_FSYNC_DELAYED, &io->io_state)))
> +             return 1;
> +
>       if (orig_bio->bi_size == 0) {
>               bio->bi_vcnt   = 0;
>               bio->bi_sector = 0;
> diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
> index ad36a91..49f3cda 100644
> --- a/include/linux/ploop/ploop.h
> +++ b/include/linux/ploop/ploop.h
> @@ -87,6 +87,9 @@ struct ploop_file
>   * This struct describes how we do real IO on particular backing file.
>   */
>  
> +enum {
> +     PLOOP_IO_FSYNC_DELAYED,  /* Must f_op->fsync before FLUSH|FUA */
> +};
>  
>  struct ploop_io
>  {
> @@ -108,6 +111,8 @@ struct ploop_io
>       struct timer_list       fsync_timer;
>  
>       struct ploop_io_ops     *ops;
> +     unsigned long           io_state;
> +     u64                     io_count;
>  };
>  
>  struct ploop_io_ops
> @@ -466,6 +471,7 @@ enum
>       PLOOP_REQ_POST_SUBMIT, /* preq needs post_submit processing */
>       PLOOP_REQ_PUSH_BACKUP, /* preq was ACKed by userspace push_backup */
>       PLOOP_REQ_ALLOW_READS, /* READs are allowed for given req_cluster */
> +     PLOOP_REQ_FSYNC_DONE,  /* fsync_thread() performed f_op->fsync() */
>  };
>  
>  enum

Attachment: signature.asc
Description: PGP signature

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to