Ack.

Thanks for this I was under the mistaken impression that FUA requests got split 
by core dm into separate payload and PREFLUSH requests.

I've audited dm-cache and that looks ok.

How did you test this patch?  That missing bio_list_init() in V1 must
have caused memory corruption?

- Joe


On Fri, Feb 15, 2019 at 01:21:38AM +0200, Nikos Tsironis wrote:
> When provisioning a new data block for a virtual block, either because
> the block was previously unallocated or because we are breaking sharing,
> if the whole block of data is being overwritten the bio that triggered
> the provisioning is issued immediately, skipping copying or zeroing of
> the data block.
> 
> When this bio completes the new mapping is inserted in to the pool's
> metadata by process_prepared_mapping(), where the bio completion is
> signaled to the upper layers.
> 
> This completion is signaled without first committing the metadata. If
> the bio in question has the REQ_FUA flag set and the system crashes
> right after its completion and before the next metadata commit, then the
> write is lost despite the REQ_FUA flag requiring that I/O completion for
> this request is only signaled after the data has been committed to
> non-volatile storage.
> 
> Fix this by deferring the completion of overwrite bios, with the REQ_FUA
> flag set, after the metadata has been committed.
> 
> Signed-off-by: Nikos Tsironis <ntsiro...@arrikto.com>
> ---
>  drivers/md/dm-thin.c | 55 
> +++++++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 50 insertions(+), 5 deletions(-)
> 
> Changes in v2:
>   - Add missing bio_list_init() in pool_create()
> 
> v1: https://www.redhat.com/archives/dm-devel/2019-February/msg00064.html
> 
> diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
> index ca8af21bf644..e83b63608262 100644
> --- a/drivers/md/dm-thin.c
> +++ b/drivers/md/dm-thin.c
> @@ -257,6 +257,7 @@ struct pool {
>  
>       spinlock_t lock;
>       struct bio_list deferred_flush_bios;
> +     struct bio_list deferred_flush_completions;
>       struct list_head prepared_mappings;
>       struct list_head prepared_discards;
>       struct list_head prepared_discards_pt2;
> @@ -956,6 +957,39 @@ static void process_prepared_mapping_fail(struct 
> dm_thin_new_mapping *m)
>       mempool_free(m, &m->tc->pool->mapping_pool);
>  }
>  
> +static void complete_overwrite_bio(struct thin_c *tc, struct bio *bio)
> +{
> +     struct pool *pool = tc->pool;
> +     unsigned long flags;
> +
> +     /*
> +      * If the bio has the REQ_FUA flag set we must commit the metadata
> +      * before signaling its completion.
> +      */
> +     if (!bio_triggers_commit(tc, bio)) {
> +             bio_endio(bio);
> +             return;
> +     }
> +
> +     /*
> +      * Complete bio with an error if earlier I/O caused changes to the
> +      * metadata that can't be committed, e.g, due to I/O errors on the
> +      * metadata device.
> +      */
> +     if (dm_thin_aborted_changes(tc->td)) {
> +             bio_io_error(bio);
> +             return;
> +     }
> +
> +     /*
> +      * Batch together any bios that trigger commits and then issue a
> +      * single commit for them in process_deferred_bios().
> +      */
> +     spin_lock_irqsave(&pool->lock, flags);
> +     bio_list_add(&pool->deferred_flush_completions, bio);
> +     spin_unlock_irqrestore(&pool->lock, flags);
> +}
> +
>  static void process_prepared_mapping(struct dm_thin_new_mapping *m)
>  {
>       struct thin_c *tc = m->tc;
> @@ -988,7 +1022,7 @@ static void process_prepared_mapping(struct 
> dm_thin_new_mapping *m)
>        */
>       if (bio) {
>               inc_remap_and_issue_cell(tc, m->cell, m->data_block);
> -             bio_endio(bio);
> +             complete_overwrite_bio(tc, bio);
>       } else {
>               inc_all_io_entry(tc->pool, m->cell->holder);
>               remap_and_issue(tc, m->cell->holder, m->data_block);
> @@ -2317,7 +2351,7 @@ static void process_deferred_bios(struct pool *pool)
>  {
>       unsigned long flags;
>       struct bio *bio;
> -     struct bio_list bios;
> +     struct bio_list bios, bio_completions;
>       struct thin_c *tc;
>  
>       tc = get_first_thin(pool);
> @@ -2328,26 +2362,36 @@ static void process_deferred_bios(struct pool *pool)
>       }
>  
>       /*
> -      * If there are any deferred flush bios, we must commit
> -      * the metadata before issuing them.
> +      * If there are any deferred flush bios, we must commit the metadata
> +      * before issuing them or signaling their completion.
>        */
>       bio_list_init(&bios);
> +     bio_list_init(&bio_completions);
> +
>       spin_lock_irqsave(&pool->lock, flags);
>       bio_list_merge(&bios, &pool->deferred_flush_bios);
>       bio_list_init(&pool->deferred_flush_bios);
> +
> +     bio_list_merge(&bio_completions, &pool->deferred_flush_completions);
> +     bio_list_init(&pool->deferred_flush_completions);
>       spin_unlock_irqrestore(&pool->lock, flags);
>  
> -     if (bio_list_empty(&bios) &&
> +     if (bio_list_empty(&bios) && bio_list_empty(&bio_completions) &&
>           !(dm_pool_changed_this_transaction(pool->pmd) && 
> need_commit_due_to_time(pool)))
>               return;
>  
>       if (commit(pool)) {
> +             bio_list_merge(&bios, &bio_completions);
> +
>               while ((bio = bio_list_pop(&bios)))
>                       bio_io_error(bio);
>               return;
>       }
>       pool->last_commit_jiffies = jiffies;
>  
> +     while ((bio = bio_list_pop(&bio_completions)))
> +             bio_endio(bio);
> +
>       while ((bio = bio_list_pop(&bios)))
>               generic_make_request(bio);
>  }
> @@ -2954,6 +2998,7 @@ static struct pool *pool_create(struct mapped_device 
> *pool_md,
>       INIT_DELAYED_WORK(&pool->no_space_timeout, do_no_space_timeout);
>       spin_lock_init(&pool->lock);
>       bio_list_init(&pool->deferred_flush_bios);
> +     bio_list_init(&pool->deferred_flush_completions);
>       INIT_LIST_HEAD(&pool->prepared_mappings);
>       INIT_LIST_HEAD(&pool->prepared_discards);
>       INIT_LIST_HEAD(&pool->prepared_discards_pt2);
> -- 
> 2.11.0
> 

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

Reply via email to