On Tue, Nov 18, 2014 at 05:19:41PM -0500, Josef Bacik wrote:
> Liu Bo pointed out that my previous fix would lose the generation update in 
> the
> scenario I described.  It is actually much worse than that, we could lose the
> entire extent if we lose power right after the transaction commits.  Consider
> the following
> 
> write extent 0-4k
> log extent in log tree
> commit transaction
>       < power fail happens here
> ordered extent completes
> 
> We would lose the 0-4k extent because it hasn't updated the actual fs tree, 
> and
> the transaction commit will reset the log so it isn't replayed.  If we lose
> power before the transaction commit we are save, otherwise we are not.
> 
> Fix this by keeping track of all extents we logged in this transaction.  Then
> when we go to commit the transaction make sure we wait for all of those 
> ordered
> extents to complete before proceeding.  This will make sure that if we lose
> power after the transaction commit we still have our data.  This also fixes 
> the
> problem of the improperly updated extent generation.  Thanks,

This looks saner.

Reviewed-by: Liu Bo <bo.li....@oracle.com>

thanks,
-liubo

> 
> cc: sta...@vger.kernel.org
> Signed-off-by: Josef Bacik <jba...@fb.com>
> ---
>  fs/btrfs/ordered-data.c |  6 ++++--
>  fs/btrfs/ordered-data.h |  6 +++++-
>  fs/btrfs/transaction.c  | 33 +++++++++++++++++++++++++++++++++
>  fs/btrfs/transaction.h  |  2 ++
>  fs/btrfs/tree-log.c     |  6 +++---
>  5 files changed, 47 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
> index ac734ec..7c2dd7a 100644
> --- a/fs/btrfs/ordered-data.c
> +++ b/fs/btrfs/ordered-data.c
> @@ -220,6 +220,7 @@ static int __btrfs_add_ordered_extent(struct inode 
> *inode, u64 file_offset,
>       INIT_LIST_HEAD(&entry->work_list);
>       init_completion(&entry->completion);
>       INIT_LIST_HEAD(&entry->log_list);
> +     INIT_LIST_HEAD(&entry->trans_list);
>  
>       trace_btrfs_ordered_extent_add(inode, entry);
>  
> @@ -472,7 +473,8 @@ void btrfs_submit_logged_extents(struct list_head 
> *logged_list,
>       spin_unlock_irq(&log->log_extents_lock[index]);
>  }
>  
> -void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid)
> +void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
> +                            struct btrfs_root *log, u64 transid)
>  {
>       struct btrfs_ordered_extent *ordered;
>       int index = transid % 2;
> @@ -497,7 +499,7 @@ void btrfs_wait_logged_extents(struct btrfs_root *log, 
> u64 transid)
>               wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
>                                                  &ordered->flags));
>  
> -             btrfs_put_ordered_extent(ordered);
> +             list_add_tail(&ordered->trans_list, &trans->ordered);
>               spin_lock_irq(&log->log_extents_lock[index]);
>       }
>       spin_unlock_irq(&log->log_extents_lock[index]);
> diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
> index d81a274..171a841 100644
> --- a/fs/btrfs/ordered-data.h
> +++ b/fs/btrfs/ordered-data.h
> @@ -121,6 +121,9 @@ struct btrfs_ordered_extent {
>       /* If we need to wait on this to be done */
>       struct list_head log_list;
>  
> +     /* If the transaction needs to wait on this ordered extent */
> +     struct list_head trans_list;
> +
>       /* used to wait for the BTRFS_ORDERED_COMPLETE bit */
>       wait_queue_head_t wait;
>  
> @@ -197,7 +200,8 @@ void btrfs_get_logged_extents(struct inode *inode,
>  void btrfs_put_logged_extents(struct list_head *logged_list);
>  void btrfs_submit_logged_extents(struct list_head *logged_list,
>                                struct btrfs_root *log);
> -void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid);
> +void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
> +                            struct btrfs_root *log, u64 transid);
>  void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid);
>  int __init ordered_data_init(void);
>  void ordered_data_exit(void);
> diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
> index dcaae36..63c6d05 100644
> --- a/fs/btrfs/transaction.c
> +++ b/fs/btrfs/transaction.c
> @@ -220,6 +220,7 @@ loop:
>       INIT_LIST_HEAD(&cur_trans->pending_snapshots);
>       INIT_LIST_HEAD(&cur_trans->pending_chunks);
>       INIT_LIST_HEAD(&cur_trans->switch_commits);
> +     INIT_LIST_HEAD(&cur_trans->pending_ordered);
>       list_add_tail(&cur_trans->list, &fs_info->trans_list);
>       extent_io_tree_init(&cur_trans->dirty_pages,
>                            fs_info->btree_inode->i_mapping);
> @@ -488,6 +489,7 @@ again:
>       h->sync = false;
>       INIT_LIST_HEAD(&h->qgroup_ref_list);
>       INIT_LIST_HEAD(&h->new_bgs);
> +     INIT_LIST_HEAD(&h->ordered);
>  
>       smp_mb();
>       if (cur_trans->state >= TRANS_STATE_BLOCKED &&
> @@ -719,6 +721,12 @@ static int __btrfs_end_transaction(struct 
> btrfs_trans_handle *trans,
>       if (!list_empty(&trans->new_bgs))
>               btrfs_create_pending_block_groups(trans, root);
>  
> +     if (!list_empty(&trans->ordered)) {
> +             spin_lock(&info->trans_lock);
> +             list_splice(&trans->ordered, &cur_trans->pending_ordered);
> +             spin_unlock(&info->trans_lock);
> +     }
> +
>       trans->delayed_ref_updates = 0;
>       if (!trans->sync) {
>               must_run_delayed_refs =
> @@ -1652,6 +1660,28 @@ static inline void btrfs_wait_delalloc_flush(struct 
> btrfs_fs_info *fs_info)
>               btrfs_wait_ordered_roots(fs_info, -1);
>  }
>  
> +static inline void
> +btrfs_wait_pending_ordered(struct btrfs_transaction *cur_trans,
> +                        struct btrfs_fs_info *fs_info)
> +{
> +     struct btrfs_ordered_extent *ordered;
> +
> +     spin_lock(&fs_info->trans_lock);
> +     while (!list_empty(&cur_trans->pending_ordered)) {
> +             ordered = list_first_entry(&cur_trans->pending_ordered,
> +                                        struct btrfs_ordered_extent,
> +                                        trans_list);
> +             list_del_init(&ordered->trans_list);
> +             spin_unlock(&fs_info->trans_lock);
> +
> +             wait_event(ordered->wait, test_bit(BTRFS_ORDERED_COMPLETE,
> +                                                &ordered->flags));
> +             btrfs_put_ordered_extent(ordered);
> +             spin_lock(&fs_info->trans_lock);
> +     }
> +     spin_unlock(&fs_info->trans_lock);
> +}
> +
>  int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
>                            struct btrfs_root *root)
>  {
> @@ -1702,6 +1732,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle 
> *trans,
>       }
>  
>       spin_lock(&root->fs_info->trans_lock);
> +     list_splice(&trans->ordered, &cur_trans->pending_ordered);
>       if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
>               spin_unlock(&root->fs_info->trans_lock);
>               atomic_inc(&cur_trans->use_count);
> @@ -1754,6 +1785,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle 
> *trans,
>  
>       btrfs_wait_delalloc_flush(root->fs_info);
>  
> +     btrfs_wait_pending_ordered(cur_trans, root->fs_info);
> +
>       btrfs_scrub_pause(root);
>       /*
>        * Ok now we need to make sure to block out any other joins while we
> diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
> index d8f40e1..1ba9c3e 100644
> --- a/fs/btrfs/transaction.h
> +++ b/fs/btrfs/transaction.h
> @@ -56,6 +56,7 @@ struct btrfs_transaction {
>       wait_queue_head_t commit_wait;
>       struct list_head pending_snapshots;
>       struct list_head pending_chunks;
> +     struct list_head pending_ordered;
>       struct list_head switch_commits;
>       struct btrfs_delayed_ref_root delayed_refs;
>       int aborted;
> @@ -105,6 +106,7 @@ struct btrfs_trans_handle {
>        */
>       struct btrfs_root *root;
>       struct seq_list delayed_ref_elem;
> +     struct list_head ordered;
>       struct list_head qgroup_ref_list;
>       struct list_head new_bgs;
>  };
> diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
> index 70f99b1..337e4bc 100644
> --- a/fs/btrfs/tree-log.c
> +++ b/fs/btrfs/tree-log.c
> @@ -2600,7 +2600,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
>       if (atomic_read(&log_root_tree->log_commit[index2])) {
>               blk_finish_plug(&plug);
>               btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
> -             btrfs_wait_logged_extents(log, log_transid);
> +             btrfs_wait_logged_extents(trans, log, log_transid);
>               wait_log_commit(trans, log_root_tree,
>                               root_log_ctx.log_transid);
>               mutex_unlock(&log_root_tree->log_mutex);
> @@ -2645,7 +2645,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
>       btrfs_wait_marked_extents(log_root_tree,
>                                 &log_root_tree->dirty_log_pages,
>                                 EXTENT_NEW | EXTENT_DIRTY);
> -     btrfs_wait_logged_extents(log, log_transid);
> +     btrfs_wait_logged_extents(trans, log, log_transid);
>  
>       btrfs_set_super_log_root(root->fs_info->super_for_commit,
>                               log_root_tree->node->start);
> @@ -3766,7 +3766,7 @@ static int log_one_extent(struct btrfs_trans_handle 
> *trans,
>       fi = btrfs_item_ptr(leaf, path->slots[0],
>                           struct btrfs_file_extent_item);
>  
> -     btrfs_set_token_file_extent_generation(leaf, fi, em->generation,
> +     btrfs_set_token_file_extent_generation(leaf, fi, trans->transid,
>                                              &token);
>       if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
>               btrfs_set_token_file_extent_type(leaf, fi,
> -- 
> 1.8.3.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to