Hi, Filipe Manana > -----Original Message----- > From: Filipe Manana [mailto:fdman...@gmail.com] > Sent: Friday, October 23, 2015 11:17 PM > To: Jeff Mahoney <je...@suse.com> > Cc: Zhao Lei <zhao...@cn.fujitsu.com>; linux-btrfs@vger.kernel.org > Subject: Re: [PATCH] btrfs: Remove code for no-cow in scrub/replace > > On Fri, Oct 23, 2015 at 4:11 PM, Jeff Mahoney <je...@suse.com> wrote: > > -----BEGIN PGP SIGNED MESSAGE----- > > Hash: SHA1 > > > > On 10/23/15 4:03 AM, Zhao Lei wrote: > >> Since we set source bg to readonly in scrub/replace, we don't need to > >> consider confliction of no-cow write in scrub/replace operaion. > > > > What happens if there's a read failure? IIRC the initial purpose of > > this code was to correct read failures during scrub and device > > replacement by fetching the bad extent from another device if one is > > available. > > And we don't have xfstests, or any other automated test suite, to test those > code paths. > So completely useless to run xfstests in a loop for 5 days, especially the > generic > tests which never trigger scrub runs... > Completely agree you.
Something which was not writen in patch's comment, I also have a custom test, which include scrub on bad-block device: [root@ZLLINUX custom_tests]# ls -l total 36 -rwxr-xr-x 1 root root 15931 Oct 9 21:08 btrfs_maintance.sh -rwxr-xr-x 1 root root 2000 Apr 30 18:23 btrfs_out_of_space.sh -rwxr-xr-x 1 root root 5588 May 26 09:47 btrfs_replace_sumerr.sh -rwxr-xr-x 1 root root 1042 Sep 18 17:05 __loop_custom.sh -rwxr-xr-x 1 root root 939 Jul 14 18:15 xfstests.sh [root@ZLLINUX custom_tests]# # grep '()' btrfs_maintance.sh ... redundancy_func_test() scrub_func_test() replace_func_test() many_faildisk_test() [root@ZLLINUX custom_tests]# And this patch also tested in above script: # ./__loop_custom.sh ./btrfs_maintance.sh Loop 0 replace_func_test: raidtype=raid1 mkfs_opt= mount_opt=-o nodatacow max_disk= mkfs and mount: raid=raid1 dev_cnt=2 mkfs_opt= mount_opt=-o nodatacow: Writting some data: du: 17, writting 1M file du: 18, writting 2M file du: 20, writting 4M file du: 24, writting 8M file du: 32, writting 16M file reach enough space: 48% Start replace /dev/vdc -> /dev/vde OO:/dev/vdc OO:/dev/vdd #X:/dev/vde #O:/dev/vdc OO:/dev/vdd OO:/dev/vde Replace start in dmesg found Replace finish in dmesg found dmesg: OK file contents before remount: same dmesg: OK file contents after remount: same Check prune /dev/vdc #O:/dev/vdc OO:/dev/vdd OO:/dev/vde prune dsk /dev/vdc #X:/dev/vdc OO:/dev/vdd OO:/dev/vde dmesg: OK fs contents: same Check prune /dev/vdd #X:/dev/vdc OO:/dev/vdd OO:/dev/vde prune dsk /dev/vdd #X:/dev/vdc OX:/dev/vdd OO:/dev/vde dmesg: OK fs contents: same Start replace /dev/vde -> /dev/vdc #X:/dev/vdc OX:/dev/vdd OO:/dev/vde OO:/dev/vdc OX:/dev/vdd #O:/dev/vde Replace start in dmesg found Replace finish in dmesg found dmesg: OK file contents before remount: same .... I'll add above script into xfstests when it is complete and stable. Thanks Zhaolei > > > > > See commit 0ef8e45158f (btrfs scrub: add fixup code for errors on > > nodatasum files) > > > > - -Jeff > > > >> This patch removes special code for no-cow mode in scrub/replace, > >> reduced 670 lines. > >> > >> Tested by continuous xfstests in 5 days, include generic and btrfs > >> groups with 10 mount options include nodatacow. > >> > >> Signed-off-by: Zhao Lei <zhao...@cn.fujitsu.com> --- > >> fs/btrfs/ctree.h | 1 - fs/btrfs/scrub.c | 669 > >> ------------------------------------------------------- 2 files > >> changed, 670 deletions(-) > >> > >> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index > >> 938efe3..3387509 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h > >> @@ -1688,7 +1688,6 @@ struct btrfs_fs_info { int > >> scrub_workers_refcnt; struct btrfs_workqueue *scrub_workers; struct > >> btrfs_workqueue *scrub_wr_completion_workers; - struct > >> btrfs_workqueue *scrub_nocow_workers; struct btrfs_workqueue > >> *scrub_parity_workers; > >> > >> #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY diff --git a/fs/btrfs/scrub.c > >> b/fs/btrfs/scrub.c index d64f557..6027679 > >> 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -205,32 > >> +205,6 @@ struct scrub_ctx { atomic_t refs; }; > >> > >> -struct scrub_fixup_nodatasum { - struct scrub_ctx *sctx; - > struct > >> btrfs_device *dev; - u64 logical; - struct > btrfs_root *root; - > >> struct btrfs_work work; - int mirror_num; -}; - > -struct > >> scrub_nocow_inode { - u64 inum; - u64 > offset; - u64 root; - > >> struct list_head list; -}; - -struct scrub_copy_nocow_ctx { - > >> struct scrub_ctx *sctx; - u64 > logical; - u64 len; - int > >> mirror_num; - u64 physical_for_dev_replace; - > struct list_head > >> inodes; - struct btrfs_work work; -}; - struct scrub_warning { > >> struct btrfs_path *path; u64 > extent_item_size; @@ -242,8 +216,6 > >> @@ struct scrub_warning { > >> > >> static void scrub_pending_bio_inc(struct scrub_ctx *sctx); static > >> void scrub_pending_bio_dec(struct scrub_ctx *sctx); -static void > >> scrub_pending_trans_workers_inc(struct scrub_ctx *sctx); -static void > >> scrub_pending_trans_workers_dec(struct scrub_ctx *sctx); static int > >> scrub_handle_errored_block(struct scrub_block *sblock_to_check); > >> static int scrub_setup_recheck_block(struct scrub_block > >> *original_sblock, struct scrub_block *sblocks_for_recheck); @@ > >> -298,13 +270,6 @@ static int scrub_add_page_to_wr_bio(struct > >> scrub_ctx *sctx, static void scrub_wr_submit(struct scrub_ctx *sctx); > >> static void scrub_wr_bio_end_io(struct bio *bio, int err); static > >> void scrub_wr_bio_end_io_worker(struct btrfs_work *work); -static int > >> write_page_nocow(struct scrub_ctx *sctx, - > u64 > >> physical_for_dev_replace, struct page *page); -static int > >> copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, - struct > >> scrub_copy_nocow_ctx *ctx); -static int copy_nocow_pages(struct > >> scrub_ctx *sctx, u64 logical, u64 len, - int mirror_num, u64 > >> physical_for_dev_replace); -static void > >> copy_nocow_pages_worker(struct btrfs_work *work); static void > >> __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); static void > >> scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); static void > >> scrub_put_ctx(struct scrub_ctx *sctx); @@ -355,60 +320,6 @@ static > >> void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info) > >> scrub_pause_off(fs_info); } > >> > >> -/* - * used for workers that require transaction commits (i.e., for > >> the - * NOCOW case) - */ -static void > >> scrub_pending_trans_workers_inc(struct scrub_ctx *sctx) -{ - struct > >> btrfs_fs_info *fs_info = sctx->dev_root->fs_info; - - > >> atomic_inc(&sctx->refs); - /* - * increment scrubs_running to > >> prevent cancel requests from - * completing as long as a worker > is > >> running. we must also - * increment scrubs_paused to prevent > >> deadlocking on pause - * requests used for transactions commits > >> (as the worker uses a - * transaction context). it is safe to > >> regard the worker - * as paused for all matters practical. > >> effectively, we only - * avoid cancellation requests from > >> completing. - */ - mutex_lock(&fs_info->scrub_lock); - > >> atomic_inc(&fs_info->scrubs_running); - > >> atomic_inc(&fs_info->scrubs_paused); - > >> mutex_unlock(&fs_info->scrub_lock); - - /* - * check if > >> @scrubs_running=@scrubs_paused condition - * inside wait_event() > >> is not an atomic operation. - * which means we may inc/dec > >> @scrub_running/paused - * at any time. Let's wake up > >> @scrub_pause_wait as - * much as we can to let commit > transaction > >> blocked less. - */ - wake_up(&fs_info->scrub_pause_wait); - - > >> atomic_inc(&sctx->workers_pending); -} - -/* used for workers that > >> require transaction commits */ -static void > >> scrub_pending_trans_workers_dec(struct scrub_ctx *sctx) -{ - struct > >> btrfs_fs_info *fs_info = sctx->dev_root->fs_info; - - /* - * see > >> scrub_pending_trans_workers_inc() why we're pretending - * to be > >> paused in the scrub counters - */ - > >> mutex_lock(&fs_info->scrub_lock); - > >> atomic_dec(&fs_info->scrubs_running); - > >> atomic_dec(&fs_info->scrubs_paused); - > >> mutex_unlock(&fs_info->scrub_lock); - > >> atomic_dec(&sctx->workers_pending); - > >> wake_up(&fs_info->scrub_pause_wait); - > wake_up(&sctx->list_wait); - > >> scrub_put_ctx(sctx); -} - static void scrub_free_csums(struct > >> scrub_ctx *sctx) { while (!list_empty(&sctx->csum_list)) { @@ > >> -686,194 +597,6 @@ out: btrfs_free_path(path); } > >> > >> -static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, > >> void *fixup_ctx) -{ - struct page *page = NULL; - unsigned long > >> index; - struct scrub_fixup_nodatasum *fixup = fixup_ctx; - int > >> ret; - int corrected = 0; - struct btrfs_key key; - struct inode > >> *inode = NULL; - struct btrfs_fs_info *fs_info; - u64 end = > offset > >> + PAGE_SIZE - 1; - struct btrfs_root *local_root; - int > >> srcu_index; - - key.objectid = root; - key.type = > >> BTRFS_ROOT_ITEM_KEY; - key.offset = (u64)-1; - - fs_info = > >> fixup->root->fs_info; - srcu_index = > >> srcu_read_lock(&fs_info->subvol_srcu); - - local_root = > >> btrfs_read_fs_root_no_name(fs_info, &key); - if > >> (IS_ERR(local_root)) { - > srcu_read_unlock(&fs_info->subvol_srcu, > >> srcu_index); - return PTR_ERR(local_root); - } - - > key.type = > >> BTRFS_INODE_ITEM_KEY; - key.objectid = inum; - key.offset = 0; - > >> inode = btrfs_iget(fs_info->sb, &key, local_root, NULL); - > >> srcu_read_unlock(&fs_info->subvol_srcu, srcu_index); - if > >> (IS_ERR(inode)) - return PTR_ERR(inode); - - index = > offset >> > >> PAGE_CACHE_SHIFT; - - page = find_or_create_page(inode->i_mapping, > >> index, GFP_NOFS); - if (!page) { - ret = -ENOMEM; - > goto out; - > >> } - - if (PageUptodate(page)) { - if (PageDirty(page)) { - > /* - > >> * we need to write the data to the defect sector. the - > * data > >> that was in that sector is not in memory, - * > because the page > >> was modified. we must not write the - * modified page > to that > >> sector. - * - * TODO: > what could be done here: wait for the > >> delalloc - * runner to write out that page > (might involve > >> - * COW) and see whether the sector is > still - * > >> referenced afterwards. - * - > * For the meantime, we'll treat > >> this error - * incorrectable, although there is a > chance that a > >> - * later scrub will find the bad sector again and > that - * > >> there's no dirty page in memory, then. - */ - > ret = -EIO; - > >> goto out; - } - ret = repair_io_failure(inode, > offset, > >> PAGE_SIZE, - fixup->logical, page, - > offset - > >> page_offset(page), - > fixup->mirror_num); - unlock_page(page); > >> - corrected = !ret; - } else { - /* - > * we need to get good > >> data first. the general readpage path - * will call > >> repair_io_failure for us, we just have to make - * sure we > read > >> the bad mirror. - */ - ret = > >> set_extent_bits(&BTRFS_I(inode)->io_tree, offset, end, - > >> EXTENT_DAMAGED, GFP_NOFS); - if (ret) { - > /* set_extent_bits > >> should give proper error */ - WARN_ON(ret > 0); - > if (ret > 0) > >> - ret = -EFAULT; - > goto out; - } - - ret = > >> extent_read_full_page(&BTRFS_I(inode)->io_tree, page, - > >> btrfs_get_extent, - > fixup->mirror_num); - > >> wait_on_page_locked(page); - - corrected = > >> !test_range_bit(&BTRFS_I(inode)->io_tree, offset, - > end, > >> EXTENT_DAMAGED, 0, NULL); - if (!corrected) - > >> clear_extent_bits(&BTRFS_I(inode)->io_tree, offset, end, - > >> EXTENT_DAMAGED, GFP_NOFS); - } - -out: - if (page) - > >> put_page(page); - - iput(inode); - - if (ret < 0) - > >> return > ret; - > >> - if (ret == 0 && corrected) { - /* - * we only > need to call > >> readpage for one of the inodes belonging - * to this extent. > so > >> make iterate_extent_inodes stop - */ - return > 1; - } - - return > >> -EIO; -} - -static void scrub_fixup_nodatasum(struct btrfs_work > >> *work) -{ - int ret; - struct scrub_fixup_nodatasum *fixup; - > >> struct scrub_ctx *sctx; - struct btrfs_trans_handle *trans = NULL; > >> - struct btrfs_path *path; - int uncorrectable = 0; - - fixup > = > >> container_of(work, struct scrub_fixup_nodatasum, work); - sctx = > >> fixup->sctx; - - path = btrfs_alloc_path(); - if (!path) { - > >> spin_lock(&sctx->stat_lock); - > ++sctx->stat.malloc_errors; - > >> spin_unlock(&sctx->stat_lock); - uncorrectable = 1; - > goto out; - > >> } - - trans = btrfs_join_transaction(fixup->root); - if > >> (IS_ERR(trans)) { - uncorrectable = 1; - goto out; > - } - - /* - > >> * the idea is to trigger a regular read through the standard path. > >> we - * read a page from the (failed) logical address by specifying > >> the - * corresponding copynum of the failed sector. thus, that > >> readpage is - * expected to fail. - * that is the point where > >> on-the-fly error correction will kick in - * (once it's finished) > >> and rewrite the failed sector if a good copy - * can be found. - > >> */ - ret = iterate_inodes_from_logical(fixup->logical, > >> fixup->root->fs_info, - > path, scrub_fixup_readpage, - > >> fixup); - if (ret < 0) { - uncorrectable = 1; - > goto out; - } - > >> WARN_ON(ret != 1); - - spin_lock(&sctx->stat_lock); - > >> ++sctx->stat.corrected_errors; - spin_unlock(&sctx->stat_lock); - > >> -out: - if (trans && !IS_ERR(trans)) - > >> btrfs_end_transaction(trans, fixup->root); - if (uncorrectable) { - > >> spin_lock(&sctx->stat_lock); - > ++sctx->stat.uncorrectable_errors; > >> - spin_unlock(&sctx->stat_lock); - > btrfs_dev_replace_stats_inc( - > >> &sctx->dev_root->fs_info->dev_replace. - > >> num_uncorrectable_read_errors); - > >> printk_ratelimited_in_rcu(KERN_ERR "BTRFS: " - > "unable to > >> fixup (nodatasum) error at logical %llu on dev %s\n", - > >> fixup->logical, rcu_str_deref(fixup->dev->name)); - } - - > >> btrfs_free_path(path); - kfree(fixup); - - > >> scrub_pending_trans_workers_dec(sctx); -} - static inline void > >> scrub_get_recover(struct scrub_recover *recover) { > >> atomic_inc(&recover->refs); @@ -940,11 +663,6 @@ static int > >> scrub_handle_errored_block(struct scrub_block *sblock_to_check) csum > >> = sblock_to_check->pagev[0]->csum; dev = > >> sblock_to_check->pagev[0]->dev; > >> > >> - if (sctx->is_dev_replace && !is_metadata && !have_csum) { - > >> sblocks_for_recheck = NULL; - goto nodatasum_case; - } - /* * > read > >> all mirrors one after the other. This includes to * re-read the > >> extent or metadata block that failed (that was @@ -1058,36 +776,6 @@ > >> static int scrub_handle_errored_block(struct scrub_block > >> *sblock_to_check) goto out; } > >> > >> - if (!is_metadata && !have_csum) { - struct > scrub_fixup_nodatasum > >> *fixup_nodatasum; - - WARN_ON(sctx->is_dev_replace); - > >> -nodatasum_case: - - /* - * !is_metadata > and !have_csum, this > >> means that the data - * might not be COW'ed, that it might be > >> modified - * concurrently. The general strategy to work on > the - > >> * commit root does not help in the case when COW is not - > * > >> used. - */ - fixup_nodatasum = > kzalloc(sizeof(*fixup_nodatasum), > >> GFP_NOFS); - if (!fixup_nodatasum) - goto > >> did_not_correct_error; - fixup_nodatasum->sctx = sctx; - > >> fixup_nodatasum->dev = dev; - fixup_nodatasum->logical = > logical; > >> - fixup_nodatasum->root = fs_info->extent_root; - > >> fixup_nodatasum->mirror_num = failed_mirror_index + 1; - > >> scrub_pending_trans_workers_inc(sctx); - > >> btrfs_init_work(&fixup_nodatasum->work, btrfs_scrub_helper, - > >> scrub_fixup_nodatasum, NULL, NULL); - > >> btrfs_queue_work(fs_info->scrub_workers, - > >> &fixup_nodatasum->work); - goto out; - } - /* * now > build and > >> submit the bios for the other mirrors, check * checksums. @@ > >> -2575,23 +2263,9 @@ static int scrub_extent(struct scrub_ctx *sctx, > >> u64 logical, u64 len, while (len) { u64 l = min_t(u64, len, > >> blocksize); int have_csum = 0; - - if (flags & > >> BTRFS_EXTENT_FLAG_DATA) { - /* push csums to sbio > */ - > >> have_csum = scrub_find_csum(sctx, logical, l, csum); - > if > >> (have_csum == 0) - ++sctx->stat.no_csum; > - if > >> (sctx->is_dev_replace && !have_csum) { - > ret = > >> copy_nocow_pages(sctx, logical, l, - > mirror_num, - > >> physical_for_dev_replace); - goto > behind_scrub_pages; - } - > >> } ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen, > >> mirror_num, have_csum ? csum : NULL, 0, physical_for_dev_replace); > >> -behind_scrub_pages: if (ret) return ret; len -= l; @@ -3762,10 > >> +3436,6 @@ static noinline_for_stack int scrub_workers_get(struct > >> btrfs_fs_info *fs_info, if (!fs_info->scrub_wr_completion_workers) > >> goto fail_scrub_wr_completion_workers; > >> > >> - fs_info->scrub_nocow_workers = - > >> btrfs_alloc_workqueue("btrfs-scrubnc", flags, 1, 0); - if > >> (!fs_info->scrub_nocow_workers) - goto > fail_scrub_nocow_workers; > >> fs_info->scrub_parity_workers = > >> btrfs_alloc_workqueue("btrfs-scrubparity", flags, max_active, 2); @@ > >> -3776,8 +3446,6 @@ static noinline_for_stack int > >> scrub_workers_get(struct btrfs_fs_info *fs_info, return 0; > >> > >> fail_scrub_parity_workers: - > >> btrfs_destroy_workqueue(fs_info->scrub_nocow_workers); > >> -fail_scrub_nocow_workers: > >> btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers); > >> fail_scrub_wr_completion_workers: > >> btrfs_destroy_workqueue(fs_info->scrub_workers); @@ -3790,7 +3458,6 > >> @@ static noinline_for_stack void scrub_workers_put(struct > >> btrfs_fs_info *fs_info) if (--fs_info->scrub_workers_refcnt == 0) { > >> btrfs_destroy_workqueue(fs_info->scrub_workers); > >> btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers); - > >> btrfs_destroy_workqueue(fs_info->scrub_nocow_workers); > >> btrfs_destroy_workqueue(fs_info->scrub_parity_workers); } > >> WARN_ON(fs_info->scrub_workers_refcnt < 0); @@ -4081,339 +3748,3 > @@ > >> static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx) > >> mutex_unlock(&wr_ctx->wr_lock); } > >> > >> -static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, > >> u64 len, - int mirror_num, u64 > physical_for_dev_replace) -{ - > >> struct scrub_copy_nocow_ctx *nocow_ctx; - struct btrfs_fs_info > >> *fs_info = sctx->dev_root->fs_info; - - nocow_ctx = > >> kzalloc(sizeof(*nocow_ctx), GFP_NOFS); - if (!nocow_ctx) { - > >> spin_lock(&sctx->stat_lock); - > sctx->stat.malloc_errors++; - > >> spin_unlock(&sctx->stat_lock); - return -ENOMEM; > - } - - > >> scrub_pending_trans_workers_inc(sctx); - - nocow_ctx->sctx = sctx; > >> - nocow_ctx->logical = logical; - nocow_ctx->len = len; - > >> nocow_ctx->mirror_num = mirror_num; - > >> nocow_ctx->physical_for_dev_replace = physical_for_dev_replace; - > >> btrfs_init_work(&nocow_ctx->work, btrfs_scrubnc_helper, - > >> copy_nocow_pages_worker, NULL, NULL); - > >> INIT_LIST_HEAD(&nocow_ctx->inodes); - > >> btrfs_queue_work(fs_info->scrub_nocow_workers, - > >> &nocow_ctx->work); - - return 0; -} - -static int > >> record_inode_for_nocow(u64 inum, u64 offset, u64 root, void *ctx) -{ > >> - struct scrub_copy_nocow_ctx *nocow_ctx = ctx; - struct > >> scrub_nocow_inode *nocow_inode; - - nocow_inode = > >> kzalloc(sizeof(*nocow_inode), GFP_NOFS); - if (!nocow_inode) - > >> return -ENOMEM; - nocow_inode->inum = inum; - > nocow_inode->offset = > >> offset; - nocow_inode->root = root; - > >> list_add_tail(&nocow_inode->list, &nocow_ctx->inodes); - return 0; > >> -} - -#define COPY_COMPLETE 1 - -static void > >> copy_nocow_pages_worker(struct btrfs_work *work) -{ - struct > >> scrub_copy_nocow_ctx *nocow_ctx = - container_of(work, > struct > >> scrub_copy_nocow_ctx, work); - struct scrub_ctx *sctx = > >> nocow_ctx->sctx; - u64 logical = nocow_ctx->logical; - u64 len = > >> nocow_ctx->len; - int mirror_num = nocow_ctx->mirror_num; - > u64 > >> physical_for_dev_replace = nocow_ctx->physical_for_dev_replace; - > >> int ret; - struct btrfs_trans_handle *trans = NULL; - struct > >> btrfs_fs_info *fs_info; - struct btrfs_path *path; - struct > >> btrfs_root *root; - int not_written = 0; - - fs_info = > >> sctx->dev_root->fs_info; - root = fs_info->extent_root; - - path > = > >> btrfs_alloc_path(); - if (!path) { - spin_lock(&sctx->stat_lock); > >> - sctx->stat.malloc_errors++; - > spin_unlock(&sctx->stat_lock); - > >> not_written = 1; - goto out; - } - - trans = > >> btrfs_join_transaction(root); - if (IS_ERR(trans)) { - > not_written > >> = 1; - goto out; - } - - ret = > >> iterate_inodes_from_logical(logical, fs_info, path, - > >> record_inode_for_nocow, nocow_ctx); - if (ret != 0 && ret != > >> -ENOENT) { - btrfs_warn(fs_info, "iterate_inodes_from_logical() > >> failed: log %llu, " - "phys %llu, len %llu, mir %u, ret > >> %d", > - > >> logical, physical_for_dev_replace, len, mirror_num, - > ret); - > >> not_written = 1; - goto out; - } - - > btrfs_end_transaction(trans, > >> root); - trans = NULL; - while (!list_empty(&nocow_ctx->inodes)) { > >> - struct scrub_nocow_inode *entry; - entry > = > >> list_first_entry(&nocow_ctx->inodes, - > struct > >> scrub_nocow_inode, - list); - > list_del_init(&entry->list); - > >> ret = copy_nocow_pages_for_inode(entry->inum, entry->offset, - > >> entry->root, nocow_ctx); - kfree(entry); - if (ret == > >> COPY_COMPLETE) { - ret = 0; - > break; - } else if (ret) { - > >> break; - } - } -out: - while > (!list_empty(&nocow_ctx->inodes)) { - > >> struct scrub_nocow_inode *entry; - entry = > >> list_first_entry(&nocow_ctx->inodes, - > struct > >> scrub_nocow_inode, - list); - > list_del_init(&entry->list); - > >> kfree(entry); - } - if (trans && !IS_ERR(trans)) - > >> btrfs_end_transaction(trans, root); - if (not_written) - > >> btrfs_dev_replace_stats_inc(&fs_info->dev_replace. - > >> num_uncorrectable_read_errors); - - btrfs_free_path(path); - > >> kfree(nocow_ctx); - - scrub_pending_trans_workers_dec(sctx); -} - > >> -static int check_extent_to_block(struct inode *inode, u64 start, > >> u64 len, - u64 logical) -{ - struct > extent_state *cached_state > >> = NULL; - struct btrfs_ordered_extent *ordered; - struct > >> extent_io_tree *io_tree; - struct extent_map *em; - u64 > lockstart = > >> start, lockend = start + len - 1; - int ret = 0; - - io_tree = > >> &BTRFS_I(inode)->io_tree; - - lock_extent_bits(io_tree, lockstart, > >> lockend, 0, &cached_state); - ordered = > >> btrfs_lookup_ordered_range(inode, lockstart, len); - if (ordered) > >> { - btrfs_put_ordered_extent(ordered); - ret = 1; > - goto > >> out_unlock; - } - - em = btrfs_get_extent(inode, NULL, 0, start, > >> len, 0); - if (IS_ERR(em)) { - ret = PTR_ERR(em); - > goto > >> out_unlock; - } - - /* - * This extent does not actually cover the > >> logical extent anymore, - * move on to the next inode. - */ - if > >> (em->block_start > logical || - em->block_start + > em->block_len > >> < logical + len) { - free_extent_map(em); - ret = 1; - > goto > >> out_unlock; - } - free_extent_map(em); - -out_unlock: - > >> unlock_extent_cached(io_tree, lockstart, lockend, &cached_state, - > >> GFP_NOFS); - return ret; -} - -static int > >> copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, - struct > >> scrub_copy_nocow_ctx *nocow_ctx) -{ - struct btrfs_fs_info > >> *fs_info = nocow_ctx->sctx->dev_root->fs_info; - struct btrfs_key > >> key; - struct inode *inode; - struct page *page; - struct > >> btrfs_root *local_root; - struct extent_io_tree *io_tree; - u64 > >> physical_for_dev_replace; - u64 nocow_ctx_logical; - u64 len = > >> nocow_ctx->len; - unsigned long index; - int srcu_index; - int > ret > >> = 0; - int err = 0; - - key.objectid = root; - key.type = > >> BTRFS_ROOT_ITEM_KEY; - key.offset = (u64)-1; - - > srcu_index = > >> srcu_read_lock(&fs_info->subvol_srcu); - - local_root = > >> btrfs_read_fs_root_no_name(fs_info, &key); - if > >> (IS_ERR(local_root)) { - > srcu_read_unlock(&fs_info->subvol_srcu, > >> srcu_index); - return PTR_ERR(local_root); - } - - > key.type = > >> BTRFS_INODE_ITEM_KEY; - key.objectid = inum; - key.offset = 0; - > >> inode = btrfs_iget(fs_info->sb, &key, local_root, NULL); - > >> srcu_read_unlock(&fs_info->subvol_srcu, srcu_index); - if > >> (IS_ERR(inode)) - return PTR_ERR(inode); - - /* Avoid > >> truncate/dio/punch hole.. */ - mutex_lock(&inode->i_mutex); - > >> inode_dio_wait(inode); - - physical_for_dev_replace = > >> nocow_ctx->physical_for_dev_replace; - io_tree = > >> &BTRFS_I(inode)->io_tree; - nocow_ctx_logical = > >> nocow_ctx->logical; - - ret = check_extent_to_block(inode, offset, > >> len, nocow_ctx_logical); - if (ret) { - ret = ret > 0 ? 0 : > >> ret; - > >> goto out; - } - - while (len >= PAGE_CACHE_SIZE) { - > index = > >> offset >> PAGE_CACHE_SHIFT; -again: - page = > >> find_or_create_page(inode->i_mapping, index, GFP_NOFS); - > if > >> (!page) { - btrfs_err(fs_info, "find_or_create_page() > failed"); - > >> ret = -ENOMEM; - goto out; - } - - > if (PageUptodate(page)) { - > >> if (PageDirty(page)) - goto next_page; > - } else { - > >> ClearPageError(page); - err = > extent_read_full_page(io_tree, > >> page, - > btrfs_get_extent, - > >> nocow_ctx->mirror_num); - if (err) { - > ret = err; - goto > >> next_page; - } - - > lock_page(page); - /* - * If > the page > >> has been remove from the page cache, - * the > data on it is > >> meaningless, because it may be - * old one, the > new data may be > >> written into the new - * page in the page > cache. - */ - if > >> (page->mapping != inode->i_mapping) { - > unlock_page(page); - > >> page_cache_release(page); - goto again; > - } - if > >> (!PageUptodate(page)) { - ret = -EIO; - > goto next_page; - > >> } - } - - ret = check_extent_to_block(inode, offset, > len, - > >> nocow_ctx_logical); - if (ret) { - ret = ret > > 0 ? 0 : ret; - > >> goto next_page; - } - - err = > write_page_nocow(nocow_ctx->sctx, - > >> physical_for_dev_replace, page); - if (err) - > ret = err; > >> -next_page: - unlock_page(page); - > page_cache_release(page); - - > >> if (ret) - break; - - offset += > PAGE_CACHE_SIZE; - > >> physical_for_dev_replace += PAGE_CACHE_SIZE; - > nocow_ctx_logical > >> += PAGE_CACHE_SIZE; - len -= PAGE_CACHE_SIZE; - } - > ret = > >> COPY_COMPLETE; -out: - mutex_unlock(&inode->i_mutex); - > >> iput(inode); - return ret; -} - -static int write_page_nocow(struct > >> scrub_ctx *sctx, - u64 > physical_for_dev_replace, struct page > >> *page) -{ - struct bio *bio; - struct btrfs_device *dev; - int > >> ret; - - dev = sctx->wr_ctx.tgtdev; - if (!dev) - > return -EIO; - > >> if (!dev->bdev) { - printk_ratelimited(KERN_WARNING - > "BTRFS: > >> scrub write_page_nocow(bdev == NULL) is unexpected!\n"); - > return > >> -EIO; - } - bio = btrfs_io_bio_alloc(GFP_NOFS, 1); - if > (!bio) { - > >> spin_lock(&sctx->stat_lock); - > sctx->stat.malloc_errors++; - > >> spin_unlock(&sctx->stat_lock); - return -ENOMEM; > - } - > >> bio->bi_iter.bi_size = 0; - bio->bi_iter.bi_sector = > >> physical_for_dev_replace >> 9; - bio->bi_bdev = dev->bdev; - ret > = > >> bio_add_page(bio, page, PAGE_CACHE_SIZE, 0); - if (ret != > >> PAGE_CACHE_SIZE) { -leave_with_eio: - bio_put(bio); - > >> btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); - > >> return -EIO; - } - - if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) > >> - goto leave_with_eio; - - bio_put(bio); - return 0; -} > >> > > > > > > - -- > > Jeff Mahoney > > SUSE Labs > > -----BEGIN PGP SIGNATURE----- > > Version: GnuPG/MacGPG2 v2.0.19 (Darwin) > > > > > iQIcBAEBAgAGBQJWKk4SAAoJEB57S2MheeWyVwkP/0jftdMvTiXNe+7pzcnyoim > A > > > lYDb0pXTE+/F7utsRMFN1z/NHMLJaTzfmeBVffG+FlEPKakzsXqfTKuQ1MO3F+NK > > m8djSfGd88rQFfIU3kdhw3g+3LIp4JG10/IMKjESS7ra4zFymIOrHAuK9JxsqCO5 > > VQ4f6rgqpeXGfi3F3huS6JVhDJJrXQcNCgbwUbNajzM052AsOsxfxUj7cLERgJ9Q > > > 72GyA+cNstMMoHb5XqR3P+LlYdBJmQTQBnYSMP55WY+jZy6Eyt969Tkwr+T+S7 > j1 > > s6PePS7ak8sZNpXhLxMtipIPF7b764iMTZrauf4uqa7woO4cAq5b/El+CvYrJA4B > > fgxHRyCAJVdJrmdJuFP/ddY4OzzWByXyoT2cvv8+eysAUlRxrYB4/lE5FfDxUhC0 > > > wwUDruFbk27pYdCZNxnLkJ8oMaJLaYjcNSu5jgD3omNmWOSa6ZN17Ha7VPtLL > A4W > > jUZXy/k4J4wKTR+/BfpsE52wcSEgFT6C3n+2A+y+/tS10fTw2UgTW3SwiYZlmYHi > > > amDXa8NcbrMNaZql1ykzkIpKHfkkNNScZTI8u4GWyVCs30npf8YOOmBNVRshp5 > WQ > > > BnBPqgHcJFw+5autuBrtw58Z1j8IVlGdASt1d/VcBmgMJPCvIm/U/YfBHXslcUHU > > MNOl/0NrfnfAdRZbAFeL > > =wNWX > > -----END PGP SIGNATURE----- > > -- > > To unsubscribe from this list: send the line "unsubscribe linux-btrfs" > > in the body of a message to majord...@vger.kernel.org More majordomo > > info at http://vger.kernel.org/majordomo-info.html > > > > -- > Filipe David Manana, > > "Reasonable men adapt themselves to the world. > Unreasonable men adapt the world to themselves. > That's why all progress depends on unreasonable men." -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html