Ack. Thanks for this I was under the mistaken impression that FUA requests got split by core dm into separate payload and PREFLUSH requests.
I've audited dm-cache and that looks ok. How did you test this patch? That missing bio_list_init() in V1 must have caused memory corruption? - Joe On Fri, Feb 15, 2019 at 01:21:38AM +0200, Nikos Tsironis wrote: > When provisioning a new data block for a virtual block, either because > the block was previously unallocated or because we are breaking sharing, > if the whole block of data is being overwritten the bio that triggered > the provisioning is issued immediately, skipping copying or zeroing of > the data block. > > When this bio completes the new mapping is inserted in to the pool's > metadata by process_prepared_mapping(), where the bio completion is > signaled to the upper layers. > > This completion is signaled without first committing the metadata. If > the bio in question has the REQ_FUA flag set and the system crashes > right after its completion and before the next metadata commit, then the > write is lost despite the REQ_FUA flag requiring that I/O completion for > this request is only signaled after the data has been committed to > non-volatile storage. > > Fix this by deferring the completion of overwrite bios, with the REQ_FUA > flag set, after the metadata has been committed. > > Signed-off-by: Nikos Tsironis <ntsiro...@arrikto.com> > --- > drivers/md/dm-thin.c | 55 > +++++++++++++++++++++++++++++++++++++++++++++++----- > 1 file changed, 50 insertions(+), 5 deletions(-) > > Changes in v2: > - Add missing bio_list_init() in pool_create() > > v1: https://www.redhat.com/archives/dm-devel/2019-February/msg00064.html > > diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c > index ca8af21bf644..e83b63608262 100644 > --- a/drivers/md/dm-thin.c > +++ b/drivers/md/dm-thin.c > @@ -257,6 +257,7 @@ struct pool { > > spinlock_t lock; > struct bio_list deferred_flush_bios; > + struct bio_list deferred_flush_completions; > struct list_head prepared_mappings; > struct list_head prepared_discards; > struct list_head prepared_discards_pt2; > @@ -956,6 +957,39 @@ static void process_prepared_mapping_fail(struct > dm_thin_new_mapping *m) > mempool_free(m, &m->tc->pool->mapping_pool); > } > > +static void complete_overwrite_bio(struct thin_c *tc, struct bio *bio) > +{ > + struct pool *pool = tc->pool; > + unsigned long flags; > + > + /* > + * If the bio has the REQ_FUA flag set we must commit the metadata > + * before signaling its completion. > + */ > + if (!bio_triggers_commit(tc, bio)) { > + bio_endio(bio); > + return; > + } > + > + /* > + * Complete bio with an error if earlier I/O caused changes to the > + * metadata that can't be committed, e.g, due to I/O errors on the > + * metadata device. > + */ > + if (dm_thin_aborted_changes(tc->td)) { > + bio_io_error(bio); > + return; > + } > + > + /* > + * Batch together any bios that trigger commits and then issue a > + * single commit for them in process_deferred_bios(). > + */ > + spin_lock_irqsave(&pool->lock, flags); > + bio_list_add(&pool->deferred_flush_completions, bio); > + spin_unlock_irqrestore(&pool->lock, flags); > +} > + > static void process_prepared_mapping(struct dm_thin_new_mapping *m) > { > struct thin_c *tc = m->tc; > @@ -988,7 +1022,7 @@ static void process_prepared_mapping(struct > dm_thin_new_mapping *m) > */ > if (bio) { > inc_remap_and_issue_cell(tc, m->cell, m->data_block); > - bio_endio(bio); > + complete_overwrite_bio(tc, bio); > } else { > inc_all_io_entry(tc->pool, m->cell->holder); > remap_and_issue(tc, m->cell->holder, m->data_block); > @@ -2317,7 +2351,7 @@ static void process_deferred_bios(struct pool *pool) > { > unsigned long flags; > struct bio *bio; > - struct bio_list bios; > + struct bio_list bios, bio_completions; > struct thin_c *tc; > > tc = get_first_thin(pool); > @@ -2328,26 +2362,36 @@ static void process_deferred_bios(struct pool *pool) > } > > /* > - * If there are any deferred flush bios, we must commit > - * the metadata before issuing them. > + * If there are any deferred flush bios, we must commit the metadata > + * before issuing them or signaling their completion. > */ > bio_list_init(&bios); > + bio_list_init(&bio_completions); > + > spin_lock_irqsave(&pool->lock, flags); > bio_list_merge(&bios, &pool->deferred_flush_bios); > bio_list_init(&pool->deferred_flush_bios); > + > + bio_list_merge(&bio_completions, &pool->deferred_flush_completions); > + bio_list_init(&pool->deferred_flush_completions); > spin_unlock_irqrestore(&pool->lock, flags); > > - if (bio_list_empty(&bios) && > + if (bio_list_empty(&bios) && bio_list_empty(&bio_completions) && > !(dm_pool_changed_this_transaction(pool->pmd) && > need_commit_due_to_time(pool))) > return; > > if (commit(pool)) { > + bio_list_merge(&bios, &bio_completions); > + > while ((bio = bio_list_pop(&bios))) > bio_io_error(bio); > return; > } > pool->last_commit_jiffies = jiffies; > > + while ((bio = bio_list_pop(&bio_completions))) > + bio_endio(bio); > + > while ((bio = bio_list_pop(&bios))) > generic_make_request(bio); > } > @@ -2954,6 +2998,7 @@ static struct pool *pool_create(struct mapped_device > *pool_md, > INIT_DELAYED_WORK(&pool->no_space_timeout, do_no_space_timeout); > spin_lock_init(&pool->lock); > bio_list_init(&pool->deferred_flush_bios); > + bio_list_init(&pool->deferred_flush_completions); > INIT_LIST_HEAD(&pool->prepared_mappings); > INIT_LIST_HEAD(&pool->prepared_discards); > INIT_LIST_HEAD(&pool->prepared_discards_pt2); > -- > 2.11.0 > -- dm-devel mailing list dm-devel@redhat.com https://www.redhat.com/mailman/listinfo/dm-devel