[PATCH] Btrfs: refactor btrfs_extent_same() slightly
From: Omar SandovalThis was originally a prep patch for changing the behavior on len=0, but we went another direction with that. This still makes the function slightly easier to follow. Reviewed-by: Qu Wenruo Signed-off-by: Omar Sandoval --- Qu thought this would still be a worthwhile cleanup. I'm fine either way. Applies to Dave's for-next branch. fs/btrfs/ioctl.c | 33 - 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index bf7227d43b5d..3542e3d2 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3127,26 +3127,27 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, int ret; u64 len = olen; struct cmp_pages cmp; - int same_inode = 0; + bool same_inode = (src == dst); u64 same_lock_start = 0; u64 same_lock_len = 0; - if (src == dst) - same_inode = 1; - if (len == 0) return 0; - if (same_inode) { + if (same_inode) inode_lock(src); + else + btrfs_double_inode_lock(src, dst); - ret = extent_same_check_offsets(src, loff, , olen); - if (ret) - goto out_unlock; - ret = extent_same_check_offsets(src, dst_loff, , olen); - if (ret) - goto out_unlock; + ret = extent_same_check_offsets(src, loff, , olen); + if (ret) + goto out_unlock; + ret = extent_same_check_offsets(dst, dst_loff, , olen); + if (ret) + goto out_unlock; + + if (same_inode) { /* * Single inode case wants the same checks, except we * don't want our length pushed out past i_size as @@ -3174,16 +3175,6 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, same_lock_start = min_t(u64, loff, dst_loff); same_lock_len = max_t(u64, loff, dst_loff) + len - same_lock_start; - } else { - btrfs_double_inode_lock(src, dst); - - ret = extent_same_check_offsets(src, loff, , olen); - if (ret) - goto out_unlock; - - ret = extent_same_check_offsets(dst, dst_loff, , olen); - if (ret) - goto out_unlock; } /* don't make the dst file partly checksummed */ -- 2.11.0 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] Btrfs: constify struct btrfs_{,disk_}key wherever possible
From: Omar SandovalIn a lot of places, it's unclear when it's safe to reuse a struct btrfs_key after it has been passed to a helper function. Constify these arguments wherever possible to make it obvious. Signed-off-by: Omar Sandoval --- This applies to Dave's for-next branch. If it's too intrusive of a change, it can wait, but I think it's a nice cleanup. fs/btrfs/ctree.c | 58 +--- fs/btrfs/ctree.h | 60 ++ fs/btrfs/extent-tree.c | 9 fs/btrfs/root-tree.c | 6 ++--- 4 files changed, 69 insertions(+), 64 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 146b2dc0d2cf..72dd200f0478 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -28,9 +28,9 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level); -static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_key *ins_key, - struct btrfs_path *path, int data_size, int extend); +static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, + const struct btrfs_key *ins_key, struct btrfs_path *path, + int data_size, int extend); static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, struct extent_buffer *dst, @@ -1580,7 +1580,8 @@ static int close_blocks(u64 blocknr, u64 other, u32 blocksize) /* * compare two keys in a memcmp fashion */ -static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) +static int comp_keys(const struct btrfs_disk_key *disk, +const struct btrfs_key *k2) { struct btrfs_key k1; @@ -1592,7 +1593,7 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) /* * same as comp_keys only with two btrfs_key's */ -int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2) +int btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2) { if (k1->objectid > k2->objectid) return 1; @@ -1732,8 +1733,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, * slot may point to max if the key is bigger than all of the keys */ static noinline int generic_bin_search(struct extent_buffer *eb, - unsigned long p, - int item_size, struct btrfs_key *key, + unsigned long p, int item_size, + const struct btrfs_key *key, int max, int *slot) { int low = 0; @@ -1802,7 +1803,7 @@ static noinline int generic_bin_search(struct extent_buffer *eb, * simple bin_search frontend that does the right thing for * leaves vs nodes */ -static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, +static int bin_search(struct extent_buffer *eb, const struct btrfs_key *key, int level, int *slot) { if (level == 0) @@ -1819,7 +1820,7 @@ static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, slot); } -int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, +int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key, int level, int *slot) { return bin_search(eb, key, level, slot); @@ -2440,7 +2441,7 @@ static int read_block_for_search(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *p, struct extent_buffer **eb_ret, int level, int slot, - struct btrfs_key *key, u64 time_seq) + const struct btrfs_key *key, u64 time_seq) { struct btrfs_fs_info *fs_info = root->fs_info; u64 blocknr; @@ -2587,7 +2588,7 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans, } static void key_search_validate(struct extent_buffer *b, - struct btrfs_key *key, + const struct btrfs_key *key, int level) { #ifdef CONFIG_BTRFS_ASSERT @@ -2606,7 +2607,7 @@ static void key_search_validate(struct extent_buffer *b, #endif } -static int key_search(struct extent_buffer *b, struct btrfs_key *key, +static int key_search(struct extent_buffer *b, const struct btrfs_key *key, int level, int *prev_cmp, int *slot) { if (*prev_cmp != 0) { @@ -2668,9 +2669,9 @@ int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path, * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if * possible) */ -int btrfs_search_slot(struct btrfs_trans_handle
gdb log of crashed "btrfs-image -s"
Christoph Groth wrote: Chris Murphy wrote: On Tue, Jan 17, 2017 at 1:25 PM, Christoph Grothwrote: Any ideas on what could be done? If you need help to debug the problem with btrfs-image, please tell me what I should do. I can keep the broken file system around until an image can be created at some later time. Try 4.9, or even 4.8.5, tons of bugs have been fixed since 4.7.3 although I don't know off hand if this particular bug is fixed. I did recently do a btrfs-image with btrfs-progs v4.9 with -s and did not get a segfault. I compiled btrfs-image.static from btrfs-tools 4.9 (from git) and started it from Debian testing's initramfs. The exact command that I use is: /mnt/btrfs-image.static -c3 -s /dev/sda2 /mnt/mim-s.bim It runs for a couple of seconds (enough to write 20263936 bytes of output) and then quits with *** Error in `/mnt/btrfs-image.static`: double free or corruption (!prev): 0x009f0940 *** == Backtrace: == [0x45fb97] [0x465442] [0x465c1e] [0x402694] [0x402dcb] [0x4031fe] [0x4050ff] [0x405783] [0x44cb73] [0x44cdfe] [0x400b2a] (I had to type the above off the other screen, but I double checked that there are no errors.) The executable that I used can be downloaded from http://groth.fr/btrfs-image.static Its md5sum is 48abbc82ac6d3c0cb88cba1e5edb85fd. I hope that this can help someone to see what's going on. I ran the same executable under gdb from a live system. The log is attached. btrfs-image.log Description: Binary data signature.asc Description: PGP signature
Re: [PATCH] xfstests: btrfs/047: check btrfs-convert with extent and non-extent source
On Wed, Jan 18, 2017 at 07:17:02AM +0530, Lakshmipathi.G wrote: > Signed-off-by: Lakshmipathi.GNeed detailed test description in commit log too. > --- > tests/btrfs/047 | 108 > > tests/btrfs/047.out | 1 + > tests/btrfs/group | 1 + > 3 files changed, 110 insertions(+) > create mode 100755 tests/btrfs/047 > create mode 100644 tests/btrfs/047.out > > diff --git a/tests/btrfs/047 b/tests/btrfs/047 > new file mode 100755 > index 000..0c4b2c7 > --- /dev/null > +++ b/tests/btrfs/047 > @@ -0,0 +1,108 @@ > +#! /bin/bash > +# FS QA Test 047 > +# > +# Test btrfs-convert > +# Trailing whitespace in above line. > +# 1) create ext3 filesystem & populate it. > +# 2) update ext3 filesystem to ext4. > +# 3) populate data. > +# 4) source has combination of non-extent and extent files. > +# 5) convert it btrfs, mount and verify contents. > +#--- > +# Copyright (c) 2017 Lakshmipathi.G All Rights Reserved. > +# > +# This program is free software; you can redistribute it and/or > +# modify it under the terms of the GNU General Public License as > +# published by the Free Software Foundation. > +# > +# This program is distributed in the hope that it would be useful, > +# but WITHOUT ANY WARRANTY; without even the implied warranty of > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > +# GNU General Public License for more details. > +# > +# You should have received a copy of the GNU General Public License > +# along with this program; if not, write the Free Software Foundation, > +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA > +#--- > +# > + > +seq=`basename $0` > +seqres=$RESULT_DIR/$seq > +echo "QA output created by $seq" > + > +here=`pwd` > +tmp=/tmp/$$ > +status=1 # failure is the default! > +trap "_cleanup; exit \$status" 0 1 2 3 15 > + > +_cleanup() > +{ > + cd / > + rm -f $tmp.* > +} > + > +# get standard environment, filters and checks > +. ./common/rc > +. ./common/filter > + > +# remove previous $seqres.full before test > +rm -f $seqres.full > + > +# real QA test starts here > + > +# Modify as appropriate. > +_supported_fs btrfs > +_supported_os Linux > +_require_scratch_nocheck > + > +BTRFS_CONVERT_PROG="`set_prog_path btrfs-convert`" > +E2FSCK_PROG="`set_prog_path e2fsck`" > +TUNE2FS_PROG="`set_prog_path tune2fs`" These should go to common/config. Can you please update btrfs/012 as well to move such defines to common/config? > + > +_require_command "$BTRFS_CONVERT_PROG" btrfs-convert > +_require_command "$MKFS_EXT4_PROG" mkfs.ext4 > +_require_command "$E2FSCK_PROG" e2fsck > +_require_command "$TUNE2FS_PROG" tune2fs > + > +rm -f $seqres.full > + > +BLOCK_SIZE=`_get_block_size $TEST_DIR` > + > +# Create & populate an ext3 filesystem > +$MKFS_EXT4_PROG -t ext3 -b $BLOCK_SIZE $SCRATCH_DEV > $seqres.full 2>&1 || \ > + _notrun "Could not create ext3 filesystem" Better to add "-F" option to mkfs to force mkfs so it won't stop when there's an existing fs on SCRATCH_DEV. > + > +# mount and populate non-extent file > +mount -t ext3 $SCRATCH_DEV $SCRATCH_MNT > +dd if=/dev/urandom of=$SCRATCH_MNT/f1.txt bs=1MB count=10 >> $seqres.full > 2>&1 > +NON_EXTENT_MD5=`md5sum $SCRATCH_MNT/f1.txt | awk '{print $1}' ` Better to have different files with different file sizes and different types, e.g. run fsstress to create such a fs structure. > +_scratch_unmount > + > +# Upgrade it to ext4. > +$TUNE2FS_PROG -O extents,uninit_bg,dir_index $SCRATCH_DEV >> $seqres.full > 2>&1 > +$E2FSCK_PROG -fyD $SCRATCH_DEV >> $seqres.full 2>&1 Why is this e2fsck needed? Add some comments? Or it just can be removed? > + > +# mount and populate extent file > +mount -t ext4 $SCRATCH_DEV $SCRATCH_MNT > +dd if=/dev/urandom of=$SCRATCH_MNT/f2.txt bs=1MB count=10 >> $seqres.full > 2>&1 > +EXTENT_MD5=`md5sum $SCRATCH_MNT/f2.txt | awk '{print $1}'` > +_scratch_unmount > + > +# Convert non-extent & extent data to btrfs, mount it, verify the data > +$BTRFS_CONVERT_PROG $SCRATCH_DEV >> $seqres.full 2>&1 || \ > + _fail "btrfs-convert failed" > +_scratch_mount || _fail "Could not mount new btrfs fs" > + > +F1_MD5=`md5sum $SCRATCH_MNT/f1.txt | awk '{print $1}'` > +F2_MD5=`md5sum $SCRATCH_MNT/f2.txt | awk '{print $1}'` > +if [ $NON_EXTENT_MD5 != $F1_MD5 ] ; then Trailing whitespace in above line. > +_fail "ext3 file mismatch." No need to _fail, just echo this message to break golden image. And need indention inside "if-then-fi" > +fi > + > +if [ $EXTENT_MD5 != $F2_MD5 ] ; then Trailing whitespace. > +_fail "ext4 file mismatch." Same here. Use echo and indention. > +fi > + Trailing whitespace. > +# success, all done > +status=0 > +exit > diff --git a/tests/btrfs/047.out b/tests/btrfs/047.out > new file mode 100644 > index 000..58e2353 >
[PATCH] btrfs-progs: lowmem-check: Fix false alert on dropped leaf
For btrfs-progs test case 021-partially-dropped-snapshot-case, if the first leaf is already dropped, btrfs check low-memory mode will report false alert: checking fs roots checksum verify failed on 29917184 found E4E3BDB6 wanted checksum verify failed on 29917184 found E4E3BDB6 wanted checksum verify failed on 29917184 found E4E3BDB6 wanted checksum verify failed on 29917184 found E4E3BDB6 wanted This is caused by we are calling check_fs_first_inode() function, unlike the rest part of check_fs_root_v2(), it doesn't have enough check on dropping progress, and caused the false alert. Fix it by checking dropping progress before searching slot. Signed-off-by: Qu Wenruo--- cmds-check.c | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/cmds-check.c b/cmds-check.c index 1dba2985..25247fd9 100644 --- a/cmds-check.c +++ b/cmds-check.c @@ -4939,11 +4939,18 @@ static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref) int err = 0; int ret; - btrfs_init_path(); key.objectid = BTRFS_FIRST_FREE_OBJECTID; key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; + /* For root being dropped, we don't need to check first inode */ + if (btrfs_root_refs(>root_item) == 0 && + btrfs_disk_key_objectid(>root_item.drop_progress) >= + key.objectid) + return 0; + + btrfs_init_path(); + ret = btrfs_search_slot(NULL, root, , , 0, 0); if (ret < 0) goto out; -- 2.11.0 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3] btrfs-progs: Fix disable backtrace assert error
Due to commit 00e769d04c2c83029d6c71(btrfs-progs: Correct value printed by assertions/BUG_ON/WARN_ON), which changed the assert_trace() parameter, the condition passed to assert/WARN_ON/BUG_ON are logical notted for backtrace enabled and disabled case. Such behavior makes us easier to pass value wrong, and in fact it did cause us to pass wrong condition for ASSERT(). Instead of passing different conditions for ASSERT/WARN_ON/BUG_ON() manually, this patch will use ASSERT() to implement the resting ASSERT/WARN_ON/BUG(), so we don't need to pass 3 different conditions but only one. Also, move WARN_ON() out of the ifdef branch, as it's completely the same for both branches. Cc: Goldwyn RodriguesSigned-off-by: Qu Wenruo --- Sorry for late update, being digging the dev-replace/scrub bug v2: Keep ASSERT() outputing meaningful error string, use ASSERT() to implement BUG_ON() so only the abused BUG_ON() output is affected. Suggested by David. v3: Update commit message, since we use ASSERT() instead of BUG_ON() as main assert function now. --- kerncompat.h | 13 + 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/kerncompat.h b/kerncompat.h index 19ed3fc0..fe23774e 100644 --- a/kerncompat.h +++ b/kerncompat.h @@ -291,18 +291,15 @@ static inline void assert_trace(const char *assertion, const char *filename, abort(); exit(1); } - -#define BUG_ON(c) assert_trace(#c, __FILE__, __func__, __LINE__, (long)(c)) -#define WARN_ON(c) warning_trace(#c, __FILE__, __func__, __LINE__, (long)(c)) #defineASSERT(c) assert_trace(#c, __FILE__, __func__, __LINE__, (long)!(c)) -#define BUG() assert_trace(NULL, __FILE__, __func__, __LINE__, 1) #else -#define BUG_ON(c) assert(!(c)) -#define WARN_ON(c) warning_trace(#c, __FILE__, __func__, __LINE__, (long)(c)) -#define ASSERT(c) assert(!(c)) -#define BUG() assert(0) +#define ASSERT(c) assert(c) #endif +#define BUG_ON(c) ASSERT(!(c)) +#define BUG() BUG_ON(1) +#define WARN_ON(c) warning_trace(#c, __FILE__, __func__, __LINE__, (long)(c)) + #define container_of(ptr, type, member) ({ \ const typeof( ((type *)0)->member ) *__mptr = (ptr);\ (type *)( (char *)__mptr - offsetof(type,member) );}) -- 2.11.0 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2] btrfs-progs: Fix disable backtrace assert error
Due to commit 00e769d04c2c83029d6c71(btrfs-progs: Correct value printed by assertions/BUG_ON/WARN_ON), which changed the assert_trace() parameter, the condition passed to assert/WARN_ON/BUG_ON are logical notted for backtrace enabled and disabled case. Such behavior makes us easier to pass value wrong, and in fact it did cause us to pass wrong condition for ASSERT(). Instead of passing different conditions for ASSERT/WARN_ON/BUG_ON() manually, this patch will use BUG_ON() to implement the resting ASSERT/WARN_ON/BUG(), so we don't need to pass 3 different conditions but only one. And to further info the review for the fact that the condition should be different, rename "assert_trace" to "bugon_trace", as unlike assert, we will only trigger the bug when condition is true. Also, move WARN_ON() out of the ifdef branch, as it's completely the same for both branches. Cc: Goldwyn RodriguesSigned-off-by: Qu Wenruo --- Sorry for late update, being digging the dev-replace/scrub bug v2: Keep ASSERT() outputing meaningful error string, use ASSERT() to implement BUG_ON() so only the abused BUG_ON() output is affected. Suggested by David. --- kerncompat.h | 13 + 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/kerncompat.h b/kerncompat.h index 19ed3fc0..fe23774e 100644 --- a/kerncompat.h +++ b/kerncompat.h @@ -291,18 +291,15 @@ static inline void assert_trace(const char *assertion, const char *filename, abort(); exit(1); } - -#define BUG_ON(c) assert_trace(#c, __FILE__, __func__, __LINE__, (long)(c)) -#define WARN_ON(c) warning_trace(#c, __FILE__, __func__, __LINE__, (long)(c)) #defineASSERT(c) assert_trace(#c, __FILE__, __func__, __LINE__, (long)!(c)) -#define BUG() assert_trace(NULL, __FILE__, __func__, __LINE__, 1) #else -#define BUG_ON(c) assert(!(c)) -#define WARN_ON(c) warning_trace(#c, __FILE__, __func__, __LINE__, (long)(c)) -#define ASSERT(c) assert(!(c)) -#define BUG() assert(0) +#define ASSERT(c) assert(c) #endif +#define BUG_ON(c) ASSERT(!(c)) +#define BUG() BUG_ON(1) +#define WARN_ON(c) warning_trace(#c, __FILE__, __func__, __LINE__, (long)(c)) + #define container_of(ptr, type, member) ({ \ const typeof( ((type *)0)->member ) *__mptr = (ptr);\ (type *)( (char *)__mptr - offsetof(type,member) );}) -- 2.11.0 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2] btrfs-progs: Fix disable backtrace assert error
Due to commit 00e769d04c2c83029d6c71(btrfs-progs: Correct value printed by assertions/BUG_ON/WARN_ON), which changed the assert_trace() parameter, the condition passed to assert/WARN_ON/BUG_ON are logical notted for backtrace enabled and disabled case. Such behavior makes us easier to pass value wrong, and in fact it did cause us to pass wrong condition for ASSERT(). Instead of passing different conditions for ASSERT/WARN_ON/BUG_ON() manually, this patch will use BUG_ON() to implement the resting ASSERT/WARN_ON/BUG(), so we don't need to pass 3 different conditions but only one. And to further info the review for the fact that the condition should be different, rename "assert_trace" to "bugon_trace", as unlike assert, we will only trigger the bug when condition is true. Also, move WARN_ON() out of the ifdef branch, as it's completely the same for both branches. Cc: Goldwyn RodriguesSigned-off-by: Qu Wenruo --- Sorry for late update, being digging the dev-replace/scrub bug v2: Keep ASSERT() outputing meaningful error string, use ASSERT() to implement BUG_ON() so only the abused BUG_ON() output is affected. Suggested by David. --- kerncompat.h | 13 + 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/kerncompat.h b/kerncompat.h index 19ed3fc0..fe23774e 100644 --- a/kerncompat.h +++ b/kerncompat.h @@ -291,18 +291,15 @@ static inline void assert_trace(const char *assertion, const char *filename, abort(); exit(1); } - -#define BUG_ON(c) assert_trace(#c, __FILE__, __func__, __LINE__, (long)(c)) -#define WARN_ON(c) warning_trace(#c, __FILE__, __func__, __LINE__, (long)(c)) #defineASSERT(c) assert_trace(#c, __FILE__, __func__, __LINE__, (long)!(c)) -#define BUG() assert_trace(NULL, __FILE__, __func__, __LINE__, 1) #else -#define BUG_ON(c) assert(!(c)) -#define WARN_ON(c) warning_trace(#c, __FILE__, __func__, __LINE__, (long)(c)) -#define ASSERT(c) assert(!(c)) -#define BUG() assert(0) +#define ASSERT(c) assert(c) #endif +#define BUG_ON(c) ASSERT(!(c)) +#define BUG() BUG_ON(1) +#define WARN_ON(c) warning_trace(#c, __FILE__, __func__, __LINE__, (long)(c)) + #define container_of(ptr, type, member) ({ \ const typeof( ((type *)0)->member ) *__mptr = (ptr);\ (type *)( (char *)__mptr - offsetof(type,member) );}) -- 2.11.0 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] xfstests: btrfs/047: check btrfs-convert with extent and non-extent source
Signed-off-by: Lakshmipathi.G--- tests/btrfs/047 | 108 tests/btrfs/047.out | 1 + tests/btrfs/group | 1 + 3 files changed, 110 insertions(+) create mode 100755 tests/btrfs/047 create mode 100644 tests/btrfs/047.out diff --git a/tests/btrfs/047 b/tests/btrfs/047 new file mode 100755 index 000..0c4b2c7 --- /dev/null +++ b/tests/btrfs/047 @@ -0,0 +1,108 @@ +#! /bin/bash +# FS QA Test 047 +# +# Test btrfs-convert +# +# 1) create ext3 filesystem & populate it. +# 2) update ext3 filesystem to ext4. +# 3) populate data. +# 4) source has combination of non-extent and extent files. +# 5) convert it btrfs, mount and verify contents. +#--- +# Copyright (c) 2017 Lakshmipathi.G All Rights Reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +#--- +# + +seq=`basename $0` +seqres=$RESULT_DIR/$seq +echo "QA output created by $seq" + +here=`pwd` +tmp=/tmp/$$ +status=1 # failure is the default! +trap "_cleanup; exit \$status" 0 1 2 3 15 + +_cleanup() +{ + cd / + rm -f $tmp.* +} + +# get standard environment, filters and checks +. ./common/rc +. ./common/filter + +# remove previous $seqres.full before test +rm -f $seqres.full + +# real QA test starts here + +# Modify as appropriate. +_supported_fs btrfs +_supported_os Linux +_require_scratch_nocheck + +BTRFS_CONVERT_PROG="`set_prog_path btrfs-convert`" +E2FSCK_PROG="`set_prog_path e2fsck`" +TUNE2FS_PROG="`set_prog_path tune2fs`" + +_require_command "$BTRFS_CONVERT_PROG" btrfs-convert +_require_command "$MKFS_EXT4_PROG" mkfs.ext4 +_require_command "$E2FSCK_PROG" e2fsck +_require_command "$TUNE2FS_PROG" tune2fs + +rm -f $seqres.full + +BLOCK_SIZE=`_get_block_size $TEST_DIR` + +# Create & populate an ext3 filesystem +$MKFS_EXT4_PROG -t ext3 -b $BLOCK_SIZE $SCRATCH_DEV > $seqres.full 2>&1 || \ + _notrun "Could not create ext3 filesystem" + +# mount and populate non-extent file +mount -t ext3 $SCRATCH_DEV $SCRATCH_MNT +dd if=/dev/urandom of=$SCRATCH_MNT/f1.txt bs=1MB count=10 >> $seqres.full 2>&1 +NON_EXTENT_MD5=`md5sum $SCRATCH_MNT/f1.txt | awk '{print $1}' ` +_scratch_unmount + +# Upgrade it to ext4. +$TUNE2FS_PROG -O extents,uninit_bg,dir_index $SCRATCH_DEV >> $seqres.full 2>&1 +$E2FSCK_PROG -fyD $SCRATCH_DEV >> $seqres.full 2>&1 + +# mount and populate extent file +mount -t ext4 $SCRATCH_DEV $SCRATCH_MNT +dd if=/dev/urandom of=$SCRATCH_MNT/f2.txt bs=1MB count=10 >> $seqres.full 2>&1 +EXTENT_MD5=`md5sum $SCRATCH_MNT/f2.txt | awk '{print $1}'` +_scratch_unmount + +# Convert non-extent & extent data to btrfs, mount it, verify the data +$BTRFS_CONVERT_PROG $SCRATCH_DEV >> $seqres.full 2>&1 || \ + _fail "btrfs-convert failed" +_scratch_mount || _fail "Could not mount new btrfs fs" + +F1_MD5=`md5sum $SCRATCH_MNT/f1.txt | awk '{print $1}'` +F2_MD5=`md5sum $SCRATCH_MNT/f2.txt | awk '{print $1}'` +if [ $NON_EXTENT_MD5 != $F1_MD5 ] ; then +_fail "ext3 file mismatch." +fi + +if [ $EXTENT_MD5 != $F2_MD5 ] ; then +_fail "ext4 file mismatch." +fi + +# success, all done +status=0 +exit diff --git a/tests/btrfs/047.out b/tests/btrfs/047.out new file mode 100644 index 000..58e2353 --- /dev/null +++ b/tests/btrfs/047.out @@ -0,0 +1 @@ +QA output created by 047 diff --git a/tests/btrfs/group b/tests/btrfs/group index 3fbf706..224a082 100644 --- a/tests/btrfs/group +++ b/tests/btrfs/group @@ -49,6 +49,7 @@ 044 auto quick send 045 auto quick send 046 auto quick send +047 auto convert 048 auto quick 049 auto quick 050 auto quick send -- 2.7.4 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/2] btrfs-progs: quota: fix printing during wait mode
From: Jeff MahoneyIf we call "btrfs quota rescan -w", it will attempt to start the rescan operation, wait for it, and then print the "quota rescan started" message. The wait could last an arbitrary amount of time, so printing it after the wait isn't very helpful. This patch reworks how we print the rescan started message as well as the printing of the messages, including adding an error message for status query failures (which could be EPERM/EFAULT/ENOMEM, not just no rescan in progress) and wait failures. Signed-off-by: Jeff Mahoney --- cmds-quota.c | 40 +++- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/cmds-quota.c b/cmds-quota.c index 75c032b..f9b422d 100644 --- a/cmds-quota.c +++ b/cmds-quota.c @@ -154,28 +154,42 @@ static int cmd_quota_rescan(int argc, char **argv) ret = ioctl(fd, ioctlnum, ); e = errno; - if (wait_for_completion && (ret == 0 || e == EINPROGRESS)) { - ret = ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_WAIT, ); - e = errno; - } - close_file_or_dir(fd, dirstream); - - if (ioctlnum == BTRFS_IOC_QUOTA_RESCAN) { + if (ioctlnum == BTRFS_IOC_QUOTA_RESCAN_STATUS) { + close_file_or_dir(fd, dirstream); if (ret < 0) { - error("quota rescan failed: %s", strerror(e)); + error("could not obtain quota rescan status: %s", + strerror(e)); return 1; - } else { - printf("quota rescan started\n"); } - } else { - if (!args.flags) { + if (!args.flags) printf("no rescan operation in progress\n"); - } else { + else printf("rescan operation running (current key %lld)\n", args.progress); + return 0; + } + + if (ret == 0) { + printf("quota rescan started\n"); + fflush(stdout); + } else if (ret < 0 && (!wait_for_completion || e != EINPROGRESS)) { + error("quota rescan failed: %s", strerror(e)); + close_file_or_dir(fd, dirstream); + return 1; + } + + if (wait_for_completion) { + ret = ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_WAIT, ); + e = errno; + if (ret < 0) { + error("quota rescan wait failed: %s", + strerror(e)); + close_file_or_dir(fd, dirstream); + return 1; } } + close_file_or_dir(fd, dirstream); return 0; } -- 2.7.1 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/2] btrfs-progs: quota: Add -W option to rescan to wait without starting rescan
From: Jeff MahoneyThis patch adds a new -W option to wait for a rescan without starting a new operation. This is useful for things like xfstests where we want do to do a "btrfs quota enable" and not continue until the subsequent rescan has finished. In addition to documenting the new option in the man page, I've cleaned up the rescan entry to document the -w option a bit better. Signed-off-by: Jeff Mahoney --- Documentation/btrfs-quota.asciidoc | 10 +++--- cmds-quota.c | 21 +++-- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/Documentation/btrfs-quota.asciidoc b/Documentation/btrfs-quota.asciidoc index 33c3bfd..7b29a97 100644 --- a/Documentation/btrfs-quota.asciidoc +++ b/Documentation/btrfs-quota.asciidoc @@ -222,15 +222,19 @@ Disable subvolume quota support for a filesystem. *enable* :: Enable subvolume quota support for a filesystem. -*rescan* [-s] :: +*rescan* [-s|-w|-W] :: Trash all qgroup numbers and scan the metadata again with the current config. + `Options` + -s -show status of a running rescan operation. +Show status of a running rescan operation. + -w -wait for rescan operation to finish(can be already in progress). +Start rescan operation and wait until it has finished before exiting. If a rescan is already running, wait until it finishes and then exit without starting a new one. + +-W +Wait for rescan operation to finish and then exit. If a rescan is not already running, exit silently. EXIT STATUS --- diff --git a/cmds-quota.c b/cmds-quota.c index f9b422d..a6df839 100644 --- a/cmds-quota.c +++ b/cmds-quota.c @@ -121,14 +121,20 @@ static int cmd_quota_rescan(int argc, char **argv) int wait_for_completion = 0; while (1) { - int c = getopt(argc, argv, "sw"); + int c = getopt(argc, argv, "swW"); if (c < 0) break; switch (c) { case 's': ioctlnum = BTRFS_IOC_QUOTA_RESCAN_STATUS; break; + case 'W': + ioctlnum = 0; + wait_for_completion = 1; + break; case 'w': + /* Reset it in case the user did both -W and -w */ + ioctlnum = BTRFS_IOC_QUOTA_RESCAN; wait_for_completion = 1; break; default: @@ -136,8 +142,9 @@ static int cmd_quota_rescan(int argc, char **argv) } } - if (ioctlnum != BTRFS_IOC_QUOTA_RESCAN && wait_for_completion) { - error("switch -w cannot be used with -s"); + if (ioctlnum == BTRFS_IOC_QUOTA_RESCAN_STATUS && wait_for_completion) { + error("switch -%c cannot be used with -s", + ioctlnum ? 'w' : 'W'); return 1; } @@ -151,8 +158,10 @@ static int cmd_quota_rescan(int argc, char **argv) if (fd < 0) return 1; - ret = ioctl(fd, ioctlnum, ); - e = errno; + if (ioctlnum) { + ret = ioctl(fd, ioctlnum, ); + e = errno; + } if (ioctlnum == BTRFS_IOC_QUOTA_RESCAN_STATUS) { close_file_or_dir(fd, dirstream); @@ -169,7 +178,7 @@ static int cmd_quota_rescan(int argc, char **argv) return 0; } - if (ret == 0) { + if (ioctlnum == BTRFS_IOC_QUOTA_RESCAN && ret == 0) { printf("quota rescan started\n"); fflush(stdout); } else if (ret < 0 && (!wait_for_completion || e != EINPROGRESS)) { -- 2.7.1 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: corruption: yet another one after deleting a ro snapshot
On Wed, 2017-01-18 at 08:41 +0800, Qu Wenruo wrote: > Since we have your extent tree and root tree dump, I think we should > be > able to build a image to reproduce the case. +1 > BTW, your fs is too large for us to really do some verification or > other > work. Sure I know... but that's simply the one which I work the most with and where I stumble over such things. I have e.g. a smaller one (well still 1TB in total 500GB used) which is the root-fs from my notebook... but not really any issues with that so far ^^ Cheers, Chris. smime.p7s Description: S/MIME cryptographic signature
Re: [PATCH 2/2] btrfs: replace: Use ref counts to avoid destroying target device when canceled
At 01/18/2017 06:47 AM, Josef Bacik wrote: On Mon, Jan 16, 2017 at 5:10 PM, Qu Wenruowrote: When dev-replace and scrub are run at the same time, dev-replace can be canceled by scrub. It's quite common for btrfs/069. While in that case, target device can be destroyed at cancel time, leading to a user-after-free bug: Process A (dev-replace) | Process B(scrub) -- |(Any RW is OK) |scrub_setup_recheck_block() ||- btrfs_map_sblock() | Got a bbio with tgtdev btrfs_dev_replace_finishing()| |- btrfs_destory_dev_replace_tgtdev()| |- call_rcu(free_device) | |- __free_device() | |- kfree(device)| | Scrub worker: | Access bbio->stripes[], which | contains tgtdev. | This triggers general protection. The bug is mostly obvious for RAID5/6 since raid56 choose to keep old rbio and rbio->bbio for later steal, this hugely enlarged the race window and makes it much easier to trigger the bug. This patch introduces 'tgtdev_refs' and 'tgtdev_wait' for btrfs_device to wait for all its user released the target device. Signed-off-by: Qu Wenruo --- fs/btrfs/dev-replace.c | 7 ++- fs/btrfs/volumes.c | 36 +++- fs/btrfs/volumes.h | 10 ++ 3 files changed, 51 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 5de280b9ad73..794a6a0bedf2 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -558,7 +558,6 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, rcu_str_deref(src_device->name), src_device->devid, rcu_str_deref(tgt_device->name)); -tgt_device->is_tgtdev_for_dev_replace = 0; tgt_device->devid = src_device->devid; src_device->devid = BTRFS_DEV_REPLACE_DEVID; memcpy(uuid_tmp, tgt_device->uuid, sizeof(uuid_tmp)); @@ -579,6 +578,12 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, btrfs_dev_replace_unlock(dev_replace, 1); +/* + * Only change is_tgtdev_for_dev_replace flag after all its + * users get released. + */ +wait_target_device(tgt_device); +tgt_device->is_tgtdev_for_dev_replace = 0; btrfs_rm_dev_replace_blocked(fs_info); btrfs_rm_dev_replace_remove_srcdev(fs_info, src_device); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index bb8592e1a364..74a6ee981b78 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2064,6 +2064,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, WARN_ON(!tgtdev); mutex_lock(_info->fs_devices->device_list_mutex); +wait_target_device(tgtdev); btrfs_sysfs_rm_device_link(fs_info->fs_devices, tgtdev); if (tgtdev->bdev) @@ -2598,6 +2599,8 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, device->is_tgtdev_for_dev_replace = 1; device->mode = FMODE_EXCL; device->dev_stats_valid = 1; +atomic_set(>tgtdev_refs, 0); +init_waitqueue_head(>tgtdev_wait); set_blocksize(device->bdev, 4096); device->fs_devices = fs_info->fs_devices; list_add(>dev_list, _info->fs_devices->devices); @@ -2624,6 +2627,8 @@ void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info, tgtdev->sector_size = sectorsize; tgtdev->fs_info = fs_info; tgtdev->in_fs_metadata = 1; +atomic_set(>tgtdev_refs, 0); +init_waitqueue_head(>tgtdev_wait); } static noinline int btrfs_update_device(struct btrfs_trans_handle *trans, @@ -5302,6 +5307,32 @@ static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes) return bbio; } +static void pin_bbio_target_device(struct btrfs_bio *bbio) +{ +int i; + +for (i = 0; i < bbio->num_stripes; i++) { +struct btrfs_device *device = bbio->stripes[i].dev; + +if (device->is_tgtdev_for_dev_replace) +atomic_inc(>tgtdev_refs); +} +} Can we just do this at the map time? So when we add a new stripe we go ahead and take the ref then, and the same at complete time? Thanks, Josef Thanks for the review. But I'm not quite sure what you mean here. This pin_bbio_target_device() is called inside the dev_replace lock protection, so it's called at map time. And we must call it after patching mirror inside __btrfs_map_block(), so I can't find a better timing to call it. Would you please give me some hint? Thanks, Qu -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org
Re: corruption: yet another one after deleting a ro snapshot
At 01/17/2017 06:39 PM, Christoph Anton Mitterer wrote: Am 17. Januar 2017 09:53:19 MEZ schrieb Qu Wenruo: Just lowmem false alert, as extent-tree dump shows complete fine result. I'll CC you and adds your reported-by tag when there is any update on this case. Fine, just one thing left right more from my side on this issue: Do you want me to leave the fs untouched until I could verify a lowmem mode fix? Or is it ok to go on using it (and running backups on it)? Cheers, Chris. Since we have your extent tree and root tree dump, I think we should be able to build a image to reproduce the case. So you're OK to go on using it. BTW, your fs is too large for us to really do some verification or other work. Thanks, Qu -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Unocorrectable errors with RAID1
Goldwyn Rodrigues wrote: As Chris mentioned, try a later version. If you are familiar with git, you could even try the devel version. Looking at the commits in current devel (2f4a73f9a612876116) since v4.9, there doesn't seem to be anything relevant, but I can retry, if you think it's worth. signature.asc Description: PGP signature
Re: Unocorrectable errors with RAID1
Chris Murphy wrote: On Tue, Jan 17, 2017 at 1:25 PM, Christoph Grothwrote: Any ideas on what could be done? If you need help to debug the problem with btrfs-image, please tell me what I should do. I can keep the broken file system around until an image can be created at some later time. Try 4.9, or even 4.8.5, tons of bugs have been fixed since 4.7.3 although I don't know off hand if this particular bug is fixed. I did recently do a btrfs-image with btrfs-progs v4.9 with -s and did not get a segfault. I compiled btrfs-image.static from btrfs-tools 4.9 (from git) and started it from Debian testing's initramfs. The exact command that I use is: /mnt/btrfs-image.static -c3 -s /dev/sda2 /mnt/mim-s.bim It runs for a couple of seconds (enough to write 20263936 bytes of output) and then quits with *** Error in `/mnt/btrfs-image.static`: double free or corruption (!prev): 0x009f0940 *** == Backtrace: == [0x45fb97] [0x465442] [0x465c1e] [0x402694] [0x402dcb] [0x4031fe] [0x4050ff] [0x405783] [0x44cb73] [0x44cdfe] [0x400b2a] (I had to type the above off the other screen, but I double checked that there are no errors.) The executable that I used can be downloaded from http://groth.fr/btrfs-image.static Its md5sum is 48abbc82ac6d3c0cb88cba1e5edb85fd. I hope that this can help someone to see what's going on. signature.asc Description: PGP signature
Re: Unocorrectable errors with RAID1
On 01/17/2017 02:25 PM, Christoph Groth wrote: > Goldwyn Rodrigues wrote: >> On 01/17/2017 02:44 AM, Christoph Groth wrote: >>> Goldwyn Rodrigues wrote: >>> Would you be able to upload a btrfs-image for me to examine. This is a core ctree error where most probably item size is incorrectly registered. >>> >>> Sure, I can do that. I'd like to use the -s option, will this be fine? >> >> Yes, I think that should be fine. > > Unfortunately, giving -s causes btrfs-image to segfault. I tried both > btrfs-progs 4.7.3 and 4.4. I also tried different compression levels. > > Without -s it works, but since this file system contains the complete > digital life of our family, I would rather not share even the file names. > > Any ideas on what could be done? If you need help to debug the problem > with btrfs-image, please tell me what I should do. I can keep the > broken file system around until an image can be created at some later time. As Chris mentioned, try a later version. If you are familiar with git, you could even try the devel version. -- Goldwyn signature.asc Description: OpenPGP digital signature
Re: [PATCH 2/2] btrfs: replace: Use ref counts to avoid destroying target device when canceled
On Mon, Jan 16, 2017 at 5:10 PM, Qu Wenruowrote: When dev-replace and scrub are run at the same time, dev-replace can be canceled by scrub. It's quite common for btrfs/069. While in that case, target device can be destroyed at cancel time, leading to a user-after-free bug: Process A (dev-replace) | Process B(scrub) -- |(Any RW is OK) |scrub_setup_recheck_block() ||- btrfs_map_sblock() | Got a bbio with tgtdev btrfs_dev_replace_finishing()| |- btrfs_destory_dev_replace_tgtdev()| |- call_rcu(free_device) | |- __free_device() | |- kfree(device)| | Scrub worker: | Access bbio->stripes[], which | contains tgtdev. | This triggers general protection. The bug is mostly obvious for RAID5/6 since raid56 choose to keep old rbio and rbio->bbio for later steal, this hugely enlarged the race window and makes it much easier to trigger the bug. This patch introduces 'tgtdev_refs' and 'tgtdev_wait' for btrfs_device to wait for all its user released the target device. Signed-off-by: Qu Wenruo --- fs/btrfs/dev-replace.c | 7 ++- fs/btrfs/volumes.c | 36 +++- fs/btrfs/volumes.h | 10 ++ 3 files changed, 51 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 5de280b9ad73..794a6a0bedf2 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -558,7 +558,6 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, rcu_str_deref(src_device->name), src_device->devid, rcu_str_deref(tgt_device->name)); - tgt_device->is_tgtdev_for_dev_replace = 0; tgt_device->devid = src_device->devid; src_device->devid = BTRFS_DEV_REPLACE_DEVID; memcpy(uuid_tmp, tgt_device->uuid, sizeof(uuid_tmp)); @@ -579,6 +578,12 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, btrfs_dev_replace_unlock(dev_replace, 1); + /* +* Only change is_tgtdev_for_dev_replace flag after all its +* users get released. +*/ + wait_target_device(tgt_device); + tgt_device->is_tgtdev_for_dev_replace = 0; btrfs_rm_dev_replace_blocked(fs_info); btrfs_rm_dev_replace_remove_srcdev(fs_info, src_device); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index bb8592e1a364..74a6ee981b78 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2064,6 +2064,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, WARN_ON(!tgtdev); mutex_lock(_info->fs_devices->device_list_mutex); + wait_target_device(tgtdev); btrfs_sysfs_rm_device_link(fs_info->fs_devices, tgtdev); if (tgtdev->bdev) @@ -2598,6 +2599,8 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, device->is_tgtdev_for_dev_replace = 1; device->mode = FMODE_EXCL; device->dev_stats_valid = 1; + atomic_set(>tgtdev_refs, 0); + init_waitqueue_head(>tgtdev_wait); set_blocksize(device->bdev, 4096); device->fs_devices = fs_info->fs_devices; list_add(>dev_list, _info->fs_devices->devices); @@ -2624,6 +2627,8 @@ void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info, tgtdev->sector_size = sectorsize; tgtdev->fs_info = fs_info; tgtdev->in_fs_metadata = 1; + atomic_set(>tgtdev_refs, 0); + init_waitqueue_head(>tgtdev_wait); } static noinline int btrfs_update_device(struct btrfs_trans_handle *trans, @@ -5302,6 +5307,32 @@ static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes) return bbio; } +static void pin_bbio_target_device(struct btrfs_bio *bbio) +{ + int i; + + for (i = 0; i < bbio->num_stripes; i++) { + struct btrfs_device *device = bbio->stripes[i].dev; + + if (device->is_tgtdev_for_dev_replace) + atomic_inc(>tgtdev_refs); + } +} Can we just do this at the map time? So when we add a new stripe we go ahead and take the ref then, and the same at complete time? Thanks, Josef -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCHv2 06/24] btrfs: Make btrfs_del_dir_entries_in_log take btrfs_inode
Signed-off-by: Nikolay Borisov--- fs/btrfs/inode.c| 2 +- fs/btrfs/tree-log.c | 10 +- fs/btrfs/tree-log.h | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 41b1e2ed63b4..ebfeabafe1b1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4075,7 +4075,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, } ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, - dir, index); + BTRFS_I(dir), index); if (ret == -ENOENT) ret = 0; else if (ret) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index df822908f2be..caa8d886b4ae 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3084,7 +3084,7 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, -struct inode *dir, u64 index) +struct btrfs_inode *dir, u64 index) { struct btrfs_root *log; struct btrfs_dir_item *di; @@ -3092,16 +3092,16 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, int ret; int err = 0; int bytes_del = 0; - u64 dir_ino = btrfs_ino(BTRFS_I(dir)); + u64 dir_ino = btrfs_ino(dir); - if (BTRFS_I(dir)->logged_trans < trans->transid) + if (dir->logged_trans < trans->transid) return 0; ret = join_running_log_trans(root); if (ret) return 0; - mutex_lock(_I(dir)->log_mutex); + mutex_lock(>log_mutex); log = root->log_root; path = btrfs_alloc_path(); @@ -3176,7 +3176,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, fail: btrfs_free_path(path); out_unlock: - mutex_unlock(_I(dir)->log_mutex); + mutex_unlock(>log_mutex); if (ret == -ENOSPC) { btrfs_set_log_full_commit(root->fs_info, trans); ret = 0; diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 2bcbac7efa9c..6c2b316b28e0 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h @@ -72,7 +72,7 @@ int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, -struct inode *dir, u64 index); +struct btrfs_inode *dir, u64 index); int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, -- 2.7.4 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCHv2 17/24] btrfs: Make log_new_dir_dentries take btrfs_inode
Signed-off-by: Nikolay Borisov--- fs/btrfs/tree-log.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 38cda7869bf9..b0cc56fe86e9 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -5155,7 +5155,7 @@ struct btrfs_dir_list { */ static int log_new_dir_dentries(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct inode *start_inode, + struct btrfs_inode *start_inode, struct btrfs_log_ctx *ctx) { struct btrfs_fs_info *fs_info = root->fs_info; @@ -5174,7 +5174,7 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans, btrfs_free_path(path); return -ENOMEM; } - dir_elem->ino = btrfs_ino(BTRFS_I(start_inode)); + dir_elem->ino = btrfs_ino(start_inode); list_add_tail(_elem->list, _list); while (!list_empty(_list)) { @@ -5368,7 +5368,7 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, ret = 1; if (!ret && ctx && ctx->log_new_dentries) ret = log_new_dir_dentries(trans, root, - dir_inode, ctx); + BTRFS_I(dir_inode), ctx); iput(dir_inode); if (ret) goto out; @@ -5531,7 +5531,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, old_parent = parent; } if (log_dentries) - ret = log_new_dir_dentries(trans, root, orig_inode, ctx); + ret = log_new_dir_dentries(trans, root, BTRFS_I(orig_inode), ctx); else ret = 0; end_trans: -- 2.7.4 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCHv2 22/24] btrfs: Make btrfs_log_inode take btrfs_inode
Signed-off-by: Nikolay Borisov--- fs/btrfs/tree-log.c | 92 ++--- 1 file changed, 45 insertions(+), 47 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 1348ab5e3229..8c110d0e16c3 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -97,7 +97,7 @@ #define LOG_WALK_REPLAY_ALL 3 static int btrfs_log_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct inode *inode, + struct btrfs_root *root, struct btrfs_inode *inode, int inode_only, const loff_t start, const loff_t end, @@ -4589,7 +4589,7 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb, * This handles both files and directories. */ static int btrfs_log_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct inode *inode, + struct btrfs_root *root, struct btrfs_inode *inode, int inode_only, const loff_t start, const loff_t end, @@ -4610,8 +4610,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, int ins_start_slot = 0; int ins_nr; bool fast_search = false; - u64 ino = btrfs_ino(BTRFS_I(inode)); - struct extent_map_tree *em_tree = _I(inode)->extent_tree; + u64 ino = btrfs_ino(inode); + struct extent_map_tree *em_tree = >extent_tree; u64 logged_isize = 0; bool need_log_inode_item = true; @@ -4632,10 +4632,10 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, /* today the code can only do partial logging of directories */ - if (S_ISDIR(inode->i_mode) || + if (S_ISDIR(inode->vfs_inode.i_mode) || (!test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, - _I(inode)->runtime_flags) && -inode_only >= LOG_INODE_EXISTS)) + >runtime_flags) && +inode_only == LOG_INODE_EXISTS)) max_key.type = BTRFS_XATTR_ITEM_KEY; else max_key.type = (u8)-1; @@ -4647,11 +4647,11 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, * order for the log replay code to mark inodes for link count * fixup (create temporary BTRFS_TREE_LOG_FIXUP_OBJECTID items). */ - if (S_ISDIR(inode->i_mode) || - BTRFS_I(inode)->generation > fs_info->last_trans_committed) - ret = btrfs_commit_inode_delayed_items(trans, BTRFS_I(inode)); + if (S_ISDIR(inode->vfs_inode.i_mode) || + inode->generation > fs_info->last_trans_committed) + ret = btrfs_commit_inode_delayed_items(trans, inode); else - ret = btrfs_commit_inode_delayed_inode(BTRFS_I(inode)); + ret = btrfs_commit_inode_delayed_inode(inode); if (ret) { btrfs_free_path(path); @@ -4661,17 +4661,16 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, if (inode_only == LOG_OTHER_INODE) { inode_only = LOG_INODE_EXISTS; - mutex_lock_nested(_I(inode)->log_mutex, - SINGLE_DEPTH_NESTING); + mutex_lock_nested(>log_mutex, SINGLE_DEPTH_NESTING); } else { - mutex_lock(_I(inode)->log_mutex); + mutex_lock(>log_mutex); } /* * a brute force approach to making sure we get the most uptodate * copies of everything. */ - if (S_ISDIR(inode->i_mode)) { + if (S_ISDIR(inode->vfs_inode.i_mode)) { int max_key_type = BTRFS_DIR_LOG_INDEX_KEY; if (inode_only == LOG_INODE_EXISTS) @@ -4692,31 +4691,30 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, * (zeroes), as if an expanding truncate happened, * instead of getting a file of 4Kb only. */ - err = logged_inode_size(log, BTRFS_I(inode), path, - _isize); + err = logged_inode_size(log, inode, path, _isize); if (err) goto out_unlock; } if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, -_I(inode)->runtime_flags)) { +>runtime_flags)) { if (inode_only == LOG_INODE_EXISTS) { max_key.type = BTRFS_XATTR_ITEM_KEY; ret = drop_objectid_items(trans, log, path, ino, max_key.type); } else {
[PATCHv2 11/24] btrfs: Make btrfs_log_all_xattrs take btrfs_inode
Signed-off-by: Nikolay Borisov--- fs/btrfs/tree-log.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 12872bf492bd..1301c517c2f0 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4277,13 +4277,13 @@ static int logged_inode_size(struct btrfs_root *log, struct btrfs_inode *inode, */ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct inode *inode, + struct btrfs_inode *inode, struct btrfs_path *path, struct btrfs_path *dst_path) { int ret; struct btrfs_key key; - const u64 ino = btrfs_ino(BTRFS_I(inode)); + const u64 ino = btrfs_ino(inode); int ins_nr = 0; int start_slot = 0; @@ -4304,7 +4304,7 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, if (ins_nr > 0) { u64 last_extent = 0; - ret = copy_items(trans, BTRFS_I(inode), dst_path, path, + ret = copy_items(trans, inode, dst_path, path, _extent, start_slot, ins_nr, 1, 0); /* can't be 1, extent items aren't processed */ @@ -4334,7 +4334,7 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, if (ins_nr > 0) { u64 last_extent = 0; - ret = copy_items(trans, BTRFS_I(inode), dst_path, path, + ret = copy_items(trans, inode, dst_path, path, _extent, start_slot, ins_nr, 1, 0); /* can't be 1, extent items aren't processed */ @@ -4919,7 +4919,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, btrfs_release_path(path); btrfs_release_path(dst_path); - err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path); + err = btrfs_log_all_xattrs(trans, root, BTRFS_I(inode), path, dst_path); if (err) goto out_unlock; if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) { -- 2.7.4 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCHv2 04/24] btrfs: Make btrfs_inode_in_log take btrfs_inode
Signed-off-by: Nikolay Borisov--- fs/btrfs/btrfs_inode.h | 16 +++- fs/btrfs/file.c| 2 +- fs/btrfs/inode.c | 16 fs/btrfs/tree-log.c| 4 ++-- 4 files changed, 18 insertions(+), 20 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 4fed080545c6..b2dde0efebc0 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -255,16 +255,14 @@ static inline bool btrfs_is_free_space_inode(struct inode *inode) return false; } -static inline int btrfs_inode_in_log(struct inode *inode, u64 generation) +static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation) { int ret = 0; - spin_lock(_I(inode)->lock); - if (BTRFS_I(inode)->logged_trans == generation && - BTRFS_I(inode)->last_sub_trans <= - BTRFS_I(inode)->last_log_commit && - BTRFS_I(inode)->last_sub_trans <= - BTRFS_I(inode)->root->last_log_commit) { + spin_lock(>lock); + if (inode->logged_trans == generation && + inode->last_sub_trans <= inode->last_log_commit && + inode->last_sub_trans <= inode->root->last_log_commit) { /* * After a ranged fsync we might have left some extent maps * (that fall outside the fsync's range). So return false @@ -272,10 +270,10 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation) * will be called and process those extent maps. */ smp_mb(); - if (list_empty(_I(inode)->extent_tree.modified_extents)) + if (list_empty(>extent_tree.modified_extents)) ret = 1; } - spin_unlock(_I(inode)->lock); + spin_unlock(>lock); return ret; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 0d32f45cef28..149b79b3aaf8 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2062,7 +2062,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) * commit does not start nor waits for ordered extents to complete. */ smp_mb(); - if (btrfs_inode_in_log(inode, fs_info->generation) || + if (btrfs_inode_in_log(BTRFS_I(inode), fs_info->generation) || (full_sync && BTRFS_I(inode)->last_trans <= fs_info->last_trans_committed) || (!btrfs_have_ordered_extents_in_range(inode, start, len) && diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a8374f1d8c61..9442c80fe551 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9683,11 +9683,11 @@ static int btrfs_rename_exchange(struct inode *old_dir, * allow the tasks to sync it. */ if (ret && (root_log_pinned || dest_log_pinned)) { - if (btrfs_inode_in_log(old_dir, fs_info->generation) || - btrfs_inode_in_log(new_dir, fs_info->generation) || - btrfs_inode_in_log(old_inode, fs_info->generation) || + if (btrfs_inode_in_log(BTRFS_I(old_dir), fs_info->generation) || + btrfs_inode_in_log(BTRFS_I(new_dir), fs_info->generation) || + btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) || (new_inode && -btrfs_inode_in_log(new_inode, fs_info->generation))) +btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation))) btrfs_set_log_full_commit(fs_info, trans); if (root_log_pinned) { @@ -9959,11 +9959,11 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, * allow the tasks to sync it. */ if (ret && log_pinned) { - if (btrfs_inode_in_log(old_dir, fs_info->generation) || - btrfs_inode_in_log(new_dir, fs_info->generation) || - btrfs_inode_in_log(old_inode, fs_info->generation) || + if (btrfs_inode_in_log(BTRFS_I(old_dir), fs_info->generation) || + btrfs_inode_in_log(BTRFS_I(new_dir), fs_info->generation) || + btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) || (new_inode && -btrfs_inode_in_log(new_inode, fs_info->generation))) +btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation))) btrfs_set_log_full_commit(fs_info, trans); btrfs_end_log_trans(root); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 581d31171683..37adad5dabd6 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -5237,7 +5237,7 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans, goto next_dir_inode; } - if (btrfs_inode_in_log(di_inode, trans->transid)) { + if
[PATCHv2 20/24] btrfs: Make __add_inode_ref take btrfs_inode
Signed-off-by: Nikolay Borisov--- fs/btrfs/tree-log.c | 26 -- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 35434d686653..d919cd4252ba 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -991,7 +991,7 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_root *log_root, - struct inode *dir, struct inode *inode, + struct btrfs_inode *dir, struct btrfs_inode *inode, struct extent_buffer *eb, u64 inode_objectid, u64 parent_objectid, u64 ref_index, char *name, int namelen, @@ -1047,12 +1047,11 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans, parent_objectid, victim_name, victim_name_len)) { - inc_nlink(inode); + inc_nlink(>vfs_inode); btrfs_release_path(path); - ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir), -BTRFS_I(inode), victim_name, -victim_name_len); + ret = btrfs_unlink_inode(trans, root, dir, inode, + victim_name, victim_name_len); kfree(victim_name); if (ret) return ret; @@ -1114,15 +1113,14 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans, parent_objectid, victim_name, victim_name_len)) { ret = -ENOENT; - victim_parent = read_one_inode(root, - parent_objectid); + victim_parent = read_one_inode(root, parent_objectid); if (victim_parent) { - inc_nlink(inode); + inc_nlink(>vfs_inode); btrfs_release_path(path); ret = btrfs_unlink_inode(trans, root, BTRFS_I(victim_parent), -BTRFS_I(inode), +inode, victim_name, victim_name_len); if (!ret) @@ -1148,20 +1146,20 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans, btrfs_release_path(path); /* look for a conflicting sequence number */ - di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(BTRFS_I(dir)), + di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir), ref_index, name, namelen, 0); if (di && !IS_ERR(di)) { - ret = drop_one_dir_item(trans, root, path, BTRFS_I(dir), di); + ret = drop_one_dir_item(trans, root, path, dir, di); if (ret) return ret; } btrfs_release_path(path); /* look for a conflicing name */ - di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(BTRFS_I(dir)), + di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir), name, namelen, 0); if (di && !IS_ERR(di)) { - ret = drop_one_dir_item(trans, root, path, BTRFS_I(dir), di); + ret = drop_one_dir_item(trans, root, path, dir, di); if (ret) return ret; } @@ -1307,7 +1305,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, if (!search_done) { ret = __add_inode_ref(trans, root, path, log, - dir, inode, eb, + BTRFS_I(dir), BTRFS_I(inode), eb, inode_objectid, parent_objectid,
[PATCHv2 05/24] btrfs: Make btrfs_log_new_name take btrfs_inode
Signed-off-by: Nikolay Borisov--- fs/btrfs/inode.c| 8 fs/btrfs/tree-log.c | 18 -- fs/btrfs/tree-log.h | 2 +- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9442c80fe551..41b1e2ed63b4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6600,7 +6600,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, goto fail; } d_instantiate(dentry, inode); - btrfs_log_new_name(trans, inode, NULL, parent); + btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent); } btrfs_balance_delayed_items(fs_info); @@ -9660,13 +9660,13 @@ static int btrfs_rename_exchange(struct inode *old_dir, if (root_log_pinned) { parent = new_dentry->d_parent; - btrfs_log_new_name(trans, old_inode, old_dir, parent); + btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir), parent); btrfs_end_log_trans(root); root_log_pinned = false; } if (dest_log_pinned) { parent = old_dentry->d_parent; - btrfs_log_new_name(trans, new_inode, new_dir, parent); + btrfs_log_new_name(trans, BTRFS_I(new_inode), BTRFS_I(new_dir), parent); btrfs_end_log_trans(dest); dest_log_pinned = false; } @@ -9932,7 +9932,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (log_pinned) { struct dentry *parent = new_dentry->d_parent; - btrfs_log_new_name(trans, old_inode, old_dir, parent); + btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir), parent); btrfs_end_log_trans(root); log_pinned = false; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 37adad5dabd6..df822908f2be 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -5809,30 +5809,28 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans, * full transaction commit is required. */ int btrfs_log_new_name(struct btrfs_trans_handle *trans, - struct inode *inode, struct inode *old_dir, + struct btrfs_inode *inode, struct btrfs_inode *old_dir, struct dentry *parent) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - struct btrfs_root * root = BTRFS_I(inode)->root; + struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); + struct btrfs_root * root = inode->root; /* * this will force the logging code to walk the dentry chain * up for the file */ - if (S_ISREG(inode->i_mode)) - BTRFS_I(inode)->last_unlink_trans = trans->transid; + if (S_ISREG(inode->vfs_inode.i_mode)) + inode->last_unlink_trans = trans->transid; /* * if this inode hasn't been logged and directory we're renaming it * from hasn't been logged, we don't need to log it */ - if (BTRFS_I(inode)->logged_trans <= - fs_info->last_trans_committed && - (!old_dir || BTRFS_I(old_dir)->logged_trans <= - fs_info->last_trans_committed)) + if (inode->logged_trans <= fs_info->last_trans_committed && + (!old_dir || old_dir->logged_trans <= fs_info->last_trans_committed)) return 0; - return btrfs_log_inode_parent(trans, root, inode, parent, 0, + return btrfs_log_inode_parent(trans, root, >vfs_inode, parent, 0, LLONG_MAX, 1, NULL); } diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index e08ce78b2ad4..2bcbac7efa9c 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h @@ -85,6 +85,6 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans, struct btrfs_inode *dir); int btrfs_log_new_name(struct btrfs_trans_handle *trans, - struct inode *inode, struct inode *old_dir, + struct btrfs_inode *inode, struct btrfs_inode *old_dir, struct dentry *parent); #endif -- 2.7.4 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCHv2 24/24] btrfs: Make count_inode_refs take btrfs_inode
Signed-off-by: Nikolay Borisov--- fs/btrfs/tree-log.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 47e4f3610348..a16da4a3ab63 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1402,7 +1402,7 @@ static int count_inode_extrefs(struct btrfs_root *root, } static int count_inode_refs(struct btrfs_root *root, - struct inode *inode, struct btrfs_path *path) + struct btrfs_inode *inode, struct btrfs_path *path) { int ret; struct btrfs_key key; @@ -1410,7 +1410,7 @@ static int count_inode_refs(struct btrfs_root *root, unsigned long ptr; unsigned long ptr_end; int name_len; - u64 ino = btrfs_ino(BTRFS_I(inode)); + u64 ino = btrfs_ino(inode); key.objectid = ino; key.type = BTRFS_INODE_REF_KEY; @@ -1481,7 +1481,7 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; - ret = count_inode_refs(root, inode, path); + ret = count_inode_refs(root, BTRFS_I(inode), path); if (ret < 0) goto out; -- 2.7.4 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCHv2 21/24] btrfs: Make log_inode_item take btrfs_inode
Signed-off-by: Nikolay Borisov--- fs/btrfs/tree-log.c | 9 - 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index d919cd4252ba..1348ab5e3229 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3592,19 +3592,18 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, static int log_inode_item(struct btrfs_trans_handle *trans, struct btrfs_root *log, struct btrfs_path *path, - struct inode *inode) + struct btrfs_inode *inode) { struct btrfs_inode_item *inode_item; int ret; ret = btrfs_insert_empty_item(trans, log, path, - _I(inode)->location, - sizeof(*inode_item)); + >location, sizeof(*inode_item)); if (ret && ret != -EEXIST) return ret; inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_inode_item); - fill_inode_item(trans, path->nodes[0], inode_item, inode, 0, 0); + fill_inode_item(trans, path->nodes[0], inode_item, >vfs_inode, 0, 0); btrfs_release_path(path); return 0; } @@ -4930,7 +4929,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, btrfs_release_path(path); btrfs_release_path(dst_path); if (need_log_inode_item) { - err = log_inode_item(trans, log, dst_path, inode); + err = log_inode_item(trans, log, dst_path, BTRFS_I(inode)); if (err) goto out_unlock; } -- 2.7.4 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCHv2 07/24] btrfs: Make btrfs_del_inode_ref take btrfs_inode
Signed-off-by: Nikolay Borisov--- fs/btrfs/inode.c| 2 +- fs/btrfs/tree-log.c | 10 +- fs/btrfs/tree-log.h | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ebfeabafe1b1..e86b08eabb82 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4068,7 +4068,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, } ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, -inode, dir_ino); +BTRFS_I(inode), dir_ino); if (ret != 0 && ret != -ENOENT) { btrfs_abort_transaction(trans, ret); goto err; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index caa8d886b4ae..a7705173150e 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3192,25 +3192,25 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, - struct inode *inode, u64 dirid) + struct btrfs_inode *inode, u64 dirid) { struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_root *log; u64 index; int ret; - if (BTRFS_I(inode)->logged_trans < trans->transid) + if (inode->logged_trans < trans->transid) return 0; ret = join_running_log_trans(root); if (ret) return 0; log = root->log_root; - mutex_lock(_I(inode)->log_mutex); + mutex_lock(>log_mutex); - ret = btrfs_del_inode_ref(trans, log, name, name_len, btrfs_ino(BTRFS_I(inode)), + ret = btrfs_del_inode_ref(trans, log, name, name_len, btrfs_ino(inode), dirid, ); - mutex_unlock(_I(inode)->log_mutex); + mutex_unlock(>log_mutex); if (ret == -ENOSPC) { btrfs_set_log_full_commit(fs_info, trans); ret = 0; diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 6c2b316b28e0..bc50f128c6be 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h @@ -76,7 +76,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, - struct inode *inode, u64 dirid); + struct btrfs_inode *inode, u64 dirid); void btrfs_end_log_trans(struct btrfs_root *root); int btrfs_pin_log_trans(struct btrfs_root *root); void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, -- 2.7.4 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCHv2 16/24] btrfs: Make log_directory_changes take btrfs_inode
Signed-off-by: Nikolay Borisov--- fs/btrfs/tree-log.c | 9 - 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 8d7197a0eceb..38cda7869bf9 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3450,7 +3450,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, * key logged by this transaction. */ static noinline int log_directory_changes(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct inode *inode, + struct btrfs_root *root, struct btrfs_inode *inode, struct btrfs_path *path, struct btrfs_path *dst_path, struct btrfs_log_ctx *ctx) @@ -3464,9 +3464,8 @@ static noinline int log_directory_changes(struct btrfs_trans_handle *trans, min_key = 0; max_key = 0; while (1) { - ret = log_dir_items(trans, root, BTRFS_I(inode), path, - dst_path, key_type, ctx, min_key, - _key); + ret = log_dir_items(trans, root, inode, path, dst_path, key_type, + ctx, min_key, _key); if (ret) return ret; if (max_key == (u64)-1) @@ -4977,7 +4976,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, } if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { - ret = log_directory_changes(trans, root, inode, path, dst_path, + ret = log_directory_changes(trans, root, BTRFS_I(inode), path, dst_path, ctx); if (ret) { err = ret; -- 2.7.4 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCHv2 19/24] btrfs: Make drop_one_dir_item take btrfs_inode
Signed-off-by: Nikolay Borisov--- fs/btrfs/tree-log.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index b2c0a30811f6..35434d686653 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -843,7 +843,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, - struct inode *dir, + struct btrfs_inode *dir, struct btrfs_dir_item *di) { struct btrfs_fs_info *fs_info = root->fs_info; @@ -875,7 +875,7 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, if (ret) goto out; - ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir), BTRFS_I(inode), name, name_len); + ret = btrfs_unlink_inode(trans, root, dir, BTRFS_I(inode), name, name_len); if (ret) goto out; else @@ -1151,7 +1151,7 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans, di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(BTRFS_I(dir)), ref_index, name, namelen, 0); if (di && !IS_ERR(di)) { - ret = drop_one_dir_item(trans, root, path, dir, di); + ret = drop_one_dir_item(trans, root, path, BTRFS_I(dir), di); if (ret) return ret; } @@ -1161,7 +1161,7 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans, di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(BTRFS_I(dir)), name, namelen, 0); if (di && !IS_ERR(di)) { - ret = drop_one_dir_item(trans, root, path, dir, di); + ret = drop_one_dir_item(trans, root, path, BTRFS_I(dir), di); if (ret) return ret; } @@ -1769,7 +1769,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, if (!exists) goto out; - ret = drop_one_dir_item(trans, root, path, dir, dst_di); + ret = drop_one_dir_item(trans, root, path, BTRFS_I(dir), dst_di); if (ret) goto out; -- 2.7.4 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCHv2 01/24] btrfs: Make btrfs_must_commit_transaction take btrfs_inode
Signed-off-by: Nikolay Borisov--- fs/btrfs/tree-log.c | 18 +- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index b814cd7bbe70..a2a822a993af 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -5021,13 +5021,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, * we logged the inode or it might have also done the unlink). */ static bool btrfs_must_commit_transaction(struct btrfs_trans_handle *trans, - struct inode *inode) + struct btrfs_inode *inode) { - struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; + struct btrfs_fs_info *fs_info = inode->root->fs_info; bool ret = false; - mutex_lock(_I(inode)->log_mutex); - if (BTRFS_I(inode)->last_unlink_trans > fs_info->last_trans_committed) { + mutex_lock(>log_mutex); + if (inode->last_unlink_trans > fs_info->last_trans_committed) { /* * Make sure any commits to the log are forced to be full * commits. @@ -5035,7 +5035,7 @@ static bool btrfs_must_commit_transaction(struct btrfs_trans_handle *trans, btrfs_set_log_full_commit(fs_info, trans); ret = true; } - mutex_unlock(_I(inode)->log_mutex); + mutex_unlock(>log_mutex); return ret; } @@ -5084,7 +5084,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, BTRFS_I(inode)->logged_trans = trans->transid; smp_mb(); - if (btrfs_must_commit_transaction(trans, inode)) { + if (btrfs_must_commit_transaction(trans, BTRFS_I(inode))) { ret = 1; break; } @@ -5094,7 +5094,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, if (IS_ROOT(parent)) { inode = d_inode(parent); - if (btrfs_must_commit_transaction(trans, inode)) + if (btrfs_must_commit_transaction(trans, BTRFS_I(inode))) ret = 1; break; } @@ -5248,7 +5248,7 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans, ret = btrfs_log_inode(trans, root, di_inode, log_mode, 0, LLONG_MAX, ctx); if (!ret && - btrfs_must_commit_transaction(trans, di_inode)) + btrfs_must_commit_transaction(trans, BTRFS_I(di_inode))) ret = 1; iput(di_inode); if (ret) @@ -5368,7 +5368,7 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, ret = btrfs_log_inode(trans, root, dir_inode, LOG_INODE_ALL, 0, LLONG_MAX, ctx); if (!ret && - btrfs_must_commit_transaction(trans, dir_inode)) + btrfs_must_commit_transaction(trans, BTRFS_I(dir_inode))) ret = 1; if (!ret && ctx && ctx->log_new_dentries) ret = log_new_dir_dentries(trans, root, -- 2.7.4 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCHv2 09/24] btrfs: Make btrfs_check_ref_name_override take btrfs_inode
Signed-off-by: Nikolay Borisov--- fs/btrfs/tree-log.c | 9 - 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 20718cfebf89..7669e95be423 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4495,7 +4495,7 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, static int btrfs_check_ref_name_override(struct extent_buffer *eb, const int slot, const struct btrfs_key *key, -struct inode *inode, +struct btrfs_inode *inode, u64 *other_ino) { int ret; @@ -4551,9 +4551,8 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb, } read_extent_buffer(eb, name, name_ptr, this_name_len); - di = btrfs_lookup_dir_item(NULL, BTRFS_I(inode)->root, - search_path, parent, - name, this_name_len, 0); + di = btrfs_lookup_dir_item(NULL, inode->root, search_path, + parent, name, this_name_len, 0); if (di && !IS_ERR(di)) { struct btrfs_key di_key; @@ -4769,7 +4768,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, ret = btrfs_check_ref_name_override(path->nodes[0], path->slots[0], - _key, inode, + _key, BTRFS_I(inode), _ino); if (ret < 0) { err = ret; -- 2.7.4 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCHv2 02/24] btrfs: Make btrfs_record_unlink_dir take btrfs_inode
Signed-off-by: Nikolay Borisov--- fs/btrfs/inode.c| 8 fs/btrfs/tree-log.c | 18 +- fs/btrfs/tree-log.h | 2 +- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d49c3b78e2fb..a8374f1d8c61 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4142,7 +4142,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) if (IS_ERR(trans)) return PTR_ERR(trans); - btrfs_record_unlink_dir(trans, dir, d_inode(dentry), 0); + btrfs_record_unlink_dir(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)), 0); ret = btrfs_unlink_inode(trans, root, dir, d_inode(dentry), dentry->d_name.name, dentry->d_name.len); @@ -9593,8 +9593,8 @@ static int btrfs_rename_exchange(struct inode *old_dir, new_inode->i_ctime = ctime; if (old_dentry->d_parent != new_dentry->d_parent) { - btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); - btrfs_record_unlink_dir(trans, new_dir, new_inode, 1); + btrfs_record_unlink_dir(trans, BTRFS_I(old_dir), BTRFS_I(old_inode), 1); + btrfs_record_unlink_dir(trans, BTRFS_I(new_dir), BTRFS_I(new_inode), 1); } /* src is a subvolume */ @@ -9873,7 +9873,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, old_inode->i_ctime = current_time(old_dir); if (old_dentry->d_parent != new_dentry->d_parent) - btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); + btrfs_record_unlink_dir(trans, BTRFS_I(old_dir), BTRFS_I(old_inode), 1); if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) { root_objectid = BTRFS_I(old_inode)->root->root_key.objectid; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index a2a822a993af..6f9a3beb7050 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -5730,7 +5730,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) * inodes, etc) are done. */ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, -struct inode *dir, struct inode *inode, +struct btrfs_inode *dir, struct btrfs_inode *inode, int for_rename) { /* @@ -5743,23 +5743,23 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, * into the file. When the file is logged we check it and * don't log the parents if the file is fully on disk. */ - mutex_lock(_I(inode)->log_mutex); - BTRFS_I(inode)->last_unlink_trans = trans->transid; - mutex_unlock(_I(inode)->log_mutex); + mutex_lock(>log_mutex); + inode->last_unlink_trans = trans->transid; + mutex_unlock(>log_mutex); /* * if this directory was already logged any new * names for this file/dir will get recorded */ smp_mb(); - if (BTRFS_I(dir)->logged_trans == trans->transid) + if (dir->logged_trans == trans->transid) return; /* * if the inode we're about to unlink was logged, * the log will be properly updated for any new names */ - if (BTRFS_I(inode)->logged_trans == trans->transid) + if (inode->logged_trans == trans->transid) return; /* @@ -5776,9 +5776,9 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, return; record: - mutex_lock(_I(dir)->log_mutex); - BTRFS_I(dir)->last_unlink_trans = trans->transid; - mutex_unlock(_I(dir)->log_mutex); + mutex_lock(>log_mutex); + dir->last_unlink_trans = trans->transid; + mutex_unlock(>log_mutex); } /* diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index ab858e31ccbc..69702eef9603 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h @@ -80,7 +80,7 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, void btrfs_end_log_trans(struct btrfs_root *root); int btrfs_pin_log_trans(struct btrfs_root *root); void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, -struct inode *dir, struct inode *inode, +struct btrfs_inode *dir, struct btrfs_inode *inode, int for_rename); void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans, struct inode *dir); -- 2.7.4 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCHv2 10/24] btrfs: Make copy_items take btrfs_inode
Signed-off-by: Nikolay Borisov--- fs/btrfs/tree-log.c | 40 +++- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 7669e95be423..12872bf492bd 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3613,16 +3613,16 @@ static int log_inode_item(struct btrfs_trans_handle *trans, } static noinline int copy_items(struct btrfs_trans_handle *trans, - struct inode *inode, + struct btrfs_inode *inode, struct btrfs_path *dst_path, struct btrfs_path *src_path, u64 *last_extent, int start_slot, int nr, int inode_only, u64 logged_isize) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); unsigned long src_offset; unsigned long dst_offset; - struct btrfs_root *log = BTRFS_I(inode)->root->log_root; + struct btrfs_root *log = inode->root->log_root; struct btrfs_file_extent_item *extent; struct btrfs_inode_item *inode_item; struct extent_buffer *src = src_path->nodes[0]; @@ -3633,7 +3633,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, char *ins_data; int i; struct list_head ordered_sums; - int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; + int skip_csum = inode->flags & BTRFS_INODE_NODATASUM; bool has_extents = false; bool need_find_last_extent = true; bool done = false; @@ -3675,7 +3675,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, dst_path->slots[0], struct btrfs_inode_item); fill_inode_item(trans, dst_path->nodes[0], inode_item, - inode, inode_only == LOG_INODE_EXISTS, + >vfs_inode, inode_only == LOG_INODE_EXISTS, logged_isize); } else { copy_extent_buffer(dst_path->nodes[0], src, dst_offset, @@ -3783,7 +3783,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, if (need_find_last_extent) { u64 len; - ret = btrfs_prev_leaf(BTRFS_I(inode)->root, src_path); + ret = btrfs_prev_leaf(inode->root, src_path); if (ret < 0) return ret; if (ret) @@ -3792,7 +3792,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, src_path->slots[0]--; src = src_path->nodes[0]; btrfs_item_key_to_cpu(src, , src_path->slots[0]); - if (key.objectid != btrfs_ino(BTRFS_I(inode)) || + if (key.objectid != btrfs_ino(inode) || key.type != BTRFS_EXTENT_DATA_KEY) goto fill_holes; extent = btrfs_item_ptr(src, src_path->slots[0], @@ -3825,8 +3825,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, if (need_find_last_extent) { /* btrfs_prev_leaf could return 1 without releasing the path */ btrfs_release_path(src_path); - ret = btrfs_search_slot(NULL, BTRFS_I(inode)->root, _key, - src_path, 0, 0); + ret = btrfs_search_slot(NULL, inode->root, _key, src_path, 0, 0); if (ret < 0) return ret; ASSERT(ret == 0); @@ -3846,7 +3845,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, u64 extent_end; if (i >= btrfs_header_nritems(src_path->nodes[0])) { - ret = btrfs_next_leaf(BTRFS_I(inode)->root, src_path); + ret = btrfs_next_leaf(inode->root, src_path); if (ret < 0) return ret; ASSERT(ret == 0); @@ -3857,7 +3856,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, btrfs_item_key_to_cpu(src, , i); if (!btrfs_comp_cpu_keys(, _key)) done = true; - if (key.objectid != btrfs_ino(BTRFS_I(inode)) || + if (key.objectid != btrfs_ino(inode) || key.type != BTRFS_EXTENT_DATA_KEY) { i++; continue; @@ -3880,9 +3879,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, } offset = *last_extent; len = key.offset - *last_extent; - ret =
[PATCHv2 15/24] btrfs: Make log_dir_items take btrfs_inode
Signed-off-by: Nikolay Borisov--- fs/btrfs/tree-log.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index e293ae0e18d7..8d7197a0eceb 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3260,7 +3260,7 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans, * to replay anything deleted before the fsync */ static noinline int log_dir_items(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct inode *inode, + struct btrfs_root *root, struct btrfs_inode *inode, struct btrfs_path *path, struct btrfs_path *dst_path, int key_type, struct btrfs_log_ctx *ctx, @@ -3275,7 +3275,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, int nritems; u64 first_offset = min_offset; u64 last_offset = (u64)-1; - u64 ino = btrfs_ino(BTRFS_I(inode)); + u64 ino = btrfs_ino(inode); log = root->log_root; @@ -3464,7 +3464,7 @@ static noinline int log_directory_changes(struct btrfs_trans_handle *trans, min_key = 0; max_key = 0; while (1) { - ret = log_dir_items(trans, root, inode, path, + ret = log_dir_items(trans, root, BTRFS_I(inode), path, dst_path, key_type, ctx, min_key, _key); if (ret) -- 2.7.4 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCHv2 14/24] btrfs: Make btrfs_log_changed_extents take btrfs_inode
Signed-off-by: Nikolay Borisov--- fs/btrfs/tree-log.c | 22 +++--- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 0e061f91055e..e293ae0e18d7 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4053,7 +4053,7 @@ static int wait_ordered_extents(struct btrfs_trans_handle *trans, } static int log_one_extent(struct btrfs_trans_handle *trans, - struct inode *inode, struct btrfs_root *root, + struct btrfs_inode *inode, struct btrfs_root *root, const struct extent_map *em, struct btrfs_path *path, const struct list_head *logged_list, @@ -4070,7 +4070,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans, int extent_inserted = 0; bool ordered_io_err = false; - ret = wait_ordered_extents(trans, inode, root, em, logged_list, + ret = wait_ordered_extents(trans, >vfs_inode, root, em, logged_list, _io_err); if (ret) return ret; @@ -4082,14 +4082,14 @@ static int log_one_extent(struct btrfs_trans_handle *trans, btrfs_init_map_token(); - ret = __btrfs_drop_extents(trans, log, inode, path, em->start, + ret = __btrfs_drop_extents(trans, log, >vfs_inode, path, em->start, em->start + em->len, NULL, 0, 1, sizeof(*fi), _inserted); if (ret) return ret; if (!extent_inserted) { - key.objectid = btrfs_ino(BTRFS_I(inode)); + key.objectid = btrfs_ino(inode); key.type = BTRFS_EXTENT_DATA_KEY; key.offset = em->start; @@ -4148,7 +4148,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans, static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, -struct inode *inode, +struct btrfs_inode *inode, struct btrfs_path *path, struct list_head *logged_list, struct btrfs_log_ctx *ctx, @@ -4157,14 +4157,14 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, { struct extent_map *em, *n; struct list_head extents; - struct extent_map_tree *tree = _I(inode)->extent_tree; + struct extent_map_tree *tree = >extent_tree; u64 test_gen; int ret = 0; int num = 0; INIT_LIST_HEAD(); - down_write(_I(inode)->dio_sem); + down_write(>dio_sem); write_lock(>lock); test_gen = root->fs_info->last_trans_committed; @@ -4193,7 +4193,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, } list_sort(NULL, , extent_cmp); - btrfs_get_logged_extents(BTRFS_I(inode), logged_list, start, end); + btrfs_get_logged_extents(inode, logged_list, start, end); /* * Some ordered extents started by fsync might have completed * before we could collect them into the list logged_list, which @@ -4204,7 +4204,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, * without writing to the log tree and the fsync must report the * file data write error and not commit the current transaction. */ - ret = filemap_check_errors(inode->i_mapping); + ret = filemap_check_errors(inode->vfs_inode.i_mapping); if (ret) ctx->io_err = ret; process: @@ -4233,7 +4233,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, } WARN_ON(!list_empty()); write_unlock(>lock); - up_write(_I(inode)->dio_sem); + up_write(>dio_sem); btrfs_release_path(path); return ret; @@ -4938,7 +4938,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, goto out_unlock; } if (fast_search) { - ret = btrfs_log_changed_extents(trans, root, inode, dst_path, + ret = btrfs_log_changed_extents(trans, root, BTRFS_I(inode), dst_path, _list, ctx, start, end); if (ret) { err = ret; -- 2.7.4 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCHv2 13/24] btrfs: Make btrfs_get_logged_extents take btrfs_inode
Signed-off-by: Nikolay Borisov--- fs/btrfs/ordered-data.c | 4 ++-- fs/btrfs/ordered-data.h | 2 +- fs/btrfs/tree-log.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 041c3326d109..7ae350a64c77 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -432,7 +432,7 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, } /* Needs to either be called under a log transaction or the log_mutex */ -void btrfs_get_logged_extents(struct inode *inode, +void btrfs_get_logged_extents(struct btrfs_inode *inode, struct list_head *logged_list, const loff_t start, const loff_t end) @@ -442,7 +442,7 @@ void btrfs_get_logged_extents(struct inode *inode, struct rb_node *n; struct rb_node *prev; - tree = _I(inode)->ordered_tree; + tree = >ordered_tree; spin_lock_irq(>lock); n = __tree_search(>tree, end, ); if (!n) diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 5f2b0ca28705..b02b71d41d83 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -201,7 +201,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr, const u64 range_start, const u64 range_len); int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr, const u64 range_start, const u64 range_len); -void btrfs_get_logged_extents(struct inode *inode, +void btrfs_get_logged_extents(struct btrfs_inode *inode, struct list_head *logged_list, const loff_t start, const loff_t end); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 9f2c42016825..0e061f91055e 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4193,7 +4193,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, } list_sort(NULL, , extent_cmp); - btrfs_get_logged_extents(inode, logged_list, start, end); + btrfs_get_logged_extents(BTRFS_I(inode), logged_list, start, end); /* * Some ordered extents started by fsync might have completed * before we could collect them into the list logged_list, which -- 2.7.4 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCHv2 08/24] btrfs: Make logged_inode_size take btrfs_inode
Signed-off-by: Nikolay Borisov--- fs/btrfs/tree-log.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index a7705173150e..20718cfebf89 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4241,13 +4241,13 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, return ret; } -static int logged_inode_size(struct btrfs_root *log, struct inode *inode, +static int logged_inode_size(struct btrfs_root *log, struct btrfs_inode *inode, struct btrfs_path *path, u64 *size_ret) { struct btrfs_key key; int ret; - key.objectid = btrfs_ino(BTRFS_I(inode)); + key.objectid = btrfs_ino(inode); key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; @@ -4699,7 +4699,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, * (zeroes), as if an expanding truncate happened, * instead of getting a file of 4Kb only. */ - err = logged_inode_size(log, inode, path, + err = logged_inode_size(log, BTRFS_I(inode), path, _isize); if (err) goto out_unlock; -- 2.7.4 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCHv2 23/24] btrfs: Make count_inode_extrefs take btrfs_inode
Signed-off-by: Nikolay Borisov--- fs/btrfs/tree-log.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 8c110d0e16c3..47e4f3610348 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1358,14 +1358,14 @@ static int insert_orphan_item(struct btrfs_trans_handle *trans, } static int count_inode_extrefs(struct btrfs_root *root, - struct inode *inode, struct btrfs_path *path) + struct btrfs_inode *inode, struct btrfs_path *path) { int ret = 0; int name_len; unsigned int nlink = 0; u32 item_size; u32 cur_offset = 0; - u64 inode_objectid = btrfs_ino(BTRFS_I(inode)); + u64 inode_objectid = btrfs_ino(inode); u64 offset = 0; unsigned long ptr; struct btrfs_inode_extref *extref; @@ -1487,7 +1487,7 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, nlink = ret; - ret = count_inode_extrefs(root, inode, path); + ret = count_inode_extrefs(root, BTRFS_I(inode), path); if (ret < 0) goto out; -- 2.7.4 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCHv2 12/24] btrfs: Make btrfs_log_trailing_hole take btrfs_inode
Signed-off-by: Nikolay Borisov--- fs/btrfs/tree-log.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 1301c517c2f0..9f2c42016825 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4372,7 +4372,7 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, */ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct inode *inode, + struct btrfs_inode *inode, struct btrfs_path *path) { struct btrfs_fs_info *fs_info = root->fs_info; @@ -4382,8 +4382,8 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, u64 hole_size; struct extent_buffer *leaf; struct btrfs_root *log = root->log_root; - const u64 ino = btrfs_ino(BTRFS_I(inode)); - const u64 i_size = i_size_read(inode); + const u64 ino = btrfs_ino(inode); + const u64 i_size = i_size_read(>vfs_inode); if (!btrfs_fs_incompat(fs_info, NO_HOLES)) return 0; @@ -4925,7 +4925,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) { btrfs_release_path(path); btrfs_release_path(dst_path); - err = btrfs_log_trailing_hole(trans, root, inode, path); + err = btrfs_log_trailing_hole(trans, root, BTRFS_I(inode), path); if (err) goto out_unlock; } -- 2.7.4 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCHv2 18/24] btrfs: Make btrfs_unlink_inode take btrfs_inode
Signed-off-by: Nikolay Borisov--- fs/btrfs/ctree.h| 2 +- fs/btrfs/inode.c| 58 ++--- fs/btrfs/tree-log.c | 14 ++--- 3 files changed, 36 insertions(+), 38 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 6a823719b6c5..06d5e6388b4c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3119,7 +3119,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry); int btrfs_set_inode_index(struct inode *dir, u64 *index); int btrfs_unlink_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct inode *dir, struct inode *inode, + struct btrfs_inode *dir, struct btrfs_inode *inode, const char *name, int name_len); int btrfs_add_link(struct btrfs_trans_handle *trans, struct inode *parent_inode, struct inode *inode, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e86b08eabb82..ac433c43d242 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3996,7 +3996,7 @@ noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans, */ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct inode *dir, struct inode *inode, + struct btrfs_inode *dir, struct btrfs_inode *inode, const char *name, int name_len) { struct btrfs_fs_info *fs_info = root->fs_info; @@ -4006,8 +4006,8 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, struct btrfs_dir_item *di; struct btrfs_key key; u64 index; - u64 ino = btrfs_ino(BTRFS_I(inode)); - u64 dir_ino = btrfs_ino(BTRFS_I(dir)); + u64 ino = btrfs_ino(inode); + u64 dir_ino = btrfs_ino(dir); path = btrfs_alloc_path(); if (!path) { @@ -4043,10 +4043,10 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, * that we delay to delete it, and just do this deletion when * we update the inode item. */ - if (BTRFS_I(inode)->dir_index) { - ret = btrfs_delayed_delete_inode_ref(BTRFS_I(inode)); + if (inode->dir_index) { + ret = btrfs_delayed_delete_inode_ref(inode); if (!ret) { - index = BTRFS_I(inode)->dir_index; + index = inode->dir_index; goto skip_backref; } } @@ -4061,21 +4061,19 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, goto err; } skip_backref: - ret = btrfs_delete_delayed_dir_index(trans, fs_info, BTRFS_I(dir), index); + ret = btrfs_delete_delayed_dir_index(trans, fs_info, dir, index); if (ret) { btrfs_abort_transaction(trans, ret); goto err; } - ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, -BTRFS_I(inode), dir_ino); + ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, inode, dir_ino); if (ret != 0 && ret != -ENOENT) { btrfs_abort_transaction(trans, ret); goto err; } - ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, - BTRFS_I(dir), index); + ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, dir, index); if (ret == -ENOENT) ret = 0; else if (ret) @@ -4085,26 +4083,26 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, if (ret) goto out; - btrfs_i_size_write(dir, dir->i_size - name_len * 2); - inode_inc_iversion(inode); - inode_inc_iversion(dir); - inode->i_ctime = dir->i_mtime = - dir->i_ctime = current_time(inode); - ret = btrfs_update_inode(trans, root, dir); + btrfs_i_size_write(>vfs_inode, dir->vfs_inode.i_size - name_len * 2); + inode_inc_iversion(>vfs_inode); + inode_inc_iversion(>vfs_inode); + inode->vfs_inode.i_ctime = dir->vfs_inode.i_mtime = + dir->vfs_inode.i_ctime = current_time(>vfs_inode); + ret = btrfs_update_inode(trans, root, >vfs_inode); out: return ret; } int btrfs_unlink_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct inode *dir, struct inode *inode, + struct btrfs_inode *dir, struct btrfs_inode *inode, const char *name, int name_len) { int ret; ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len); if (!ret) { - drop_nlink(inode); - ret = btrfs_update_inode(trans, root,
[PATCHv2 00/24] tree-log inode vs btrfs_inode cleanups
So here is a new set of patches cleaning up tree-log function w.r.t inode vs btrfs_inode. There are still some which remain but I didn't find compelling arguments to cleaning them up, so I've left them unchanged. This time there are some size shrinkage: textdata bss dec hex filename 2530598 174661 28288 2733547 29b5eb fs/btrfs/btrfs.ko - upstream master text data bss dec hex filename 2530774 174661 28288 2733723 29b69b fs/btrfs/btrfs.ko - before tree-log cleanup text data bss dec hex filename 2530163 174661 28288 2733112 29b438 fs/btrfs/btrfs.ko - both series applied So the net result of the 2 series is 435 bytes and I assume there will be further reduction in size once further cleanups are made Changes since v1: * Rebased all patche to latest master Nikolay Borisov (24): btrfs: Make btrfs_must_commit_transaction take btrfs_inode btrfs: Make btrfs_record_unlink_dir take btrfs_inode btrfs: Make btrfs_record_snapshot_destroy take btrfs_inode btrfs: Make btrfs_inode_in_log take btrfs_inode btrfs: Make btrfs_log_new_name take btrfs_inode btrfs: Make btrfs_del_dir_entries_in_log take btrfs_inode btrfs: Make btrfs_del_inode_ref take btrfs_inode btrfs: Make logged_inode_size take btrfs_inode btrfs: Make btrfs_check_ref_name_override take btrfs_inode btrfs: Make copy_items take btrfs_inode btrfs: Make btrfs_log_all_xattrs take btrfs_inode btrfs: Make btrfs_log_trailing_hole take btrfs_inode btrfs: Make btrfs_get_logged_extents take btrfs_inode btrfs: Make btrfs_log_changed_extents take btrfs_inode btrfs: Make log_dir_items take btrfs_inode btrfs: Make log_directory_changes take btrfs_inode btrfs: Make log_new_dir_dentries take btrfs_inode btrfs: Make btrfs_unlink_inode take btrfs_inode btrfs: Make drop_one_dir_item take btrfs_inode btrfs: Make __add_inode_ref take btrfs_inode btrfs: Make log_inode_item take btrfs_inode btrfs: Make btrfs_log_inode take btrfs_inode btrfs: Make count_inode_extrefs take btrfs_inode btrfs: Make count_inode_refs take btrfs_inode fs/btrfs/btrfs_inode.h | 16 ++- fs/btrfs/ctree.h| 2 +- fs/btrfs/file.c | 2 +- fs/btrfs/inode.c| 90 --- fs/btrfs/ioctl.c| 2 +- fs/btrfs/ordered-data.c | 4 +- fs/btrfs/ordered-data.h | 2 +- fs/btrfs/tree-log.c | 288 +++- fs/btrfs/tree-log.h | 10 +- 9 files changed, 201 insertions(+), 215 deletions(-) -- 2.7.4 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCHv2 03/24] btrfs: Make btrfs_record_snapshot_destroy take btrfs_inode
Signed-off-by: Nikolay Borisov--- fs/btrfs/ioctl.c| 2 +- fs/btrfs/tree-log.c | 8 fs/btrfs/tree-log.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index e8e1f5f5f93a..7d1b5378de49 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2497,7 +2497,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, trans->block_rsv = _rsv; trans->bytes_reserved = block_rsv.size; - btrfs_record_snapshot_destroy(trans, dir); + btrfs_record_snapshot_destroy(trans, BTRFS_I(dir)); ret = btrfs_unlink_subvol(trans, root, dir, dest->root_key.objectid, diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 6f9a3beb7050..581d31171683 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -5794,11 +5794,11 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, * parent root and tree of tree roots trees, etc) are done. */ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans, - struct inode *dir) + struct btrfs_inode *dir) { - mutex_lock(_I(dir)->log_mutex); - BTRFS_I(dir)->last_unlink_trans = trans->transid; - mutex_unlock(_I(dir)->log_mutex); + mutex_lock(>log_mutex); + dir->last_unlink_trans = trans->transid; + mutex_unlock(>log_mutex); } /* diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 69702eef9603..e08ce78b2ad4 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h @@ -83,7 +83,7 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, struct btrfs_inode *dir, struct btrfs_inode *inode, int for_rename); void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans, - struct inode *dir); + struct btrfs_inode *dir); int btrfs_log_new_name(struct btrfs_trans_handle *trans, struct inode *inode, struct inode *old_dir, struct dentry *parent); -- 2.7.4 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Unocorrectable errors with RAID1
On Tue, Jan 17, 2017 at 1:25 PM, Christoph Grothwrote: > Goldwyn Rodrigues wrote: >> >> On 01/17/2017 02:44 AM, Christoph Groth wrote: >>> >>> Goldwyn Rodrigues wrote: >>> Would you be able to upload a btrfs-image for me to examine. This is a core ctree error where most probably item size is incorrectly registered. >>> >>> >>> Sure, I can do that. I'd like to use the -s option, will this be fine? >> >> >> Yes, I think that should be fine. > > > Unfortunately, giving -s causes btrfs-image to segfault. I tried both > btrfs-progs 4.7.3 and 4.4. I also tried different compression levels. > > Without -s it works, but since this file system contains the complete > digital life of our family, I would rather not share even the file names. > > Any ideas on what could be done? If you need help to debug the problem with > btrfs-image, please tell me what I should do. I can keep the broken file > system around until an image can be created at some later time. Try 4.9, or even 4.8.5, tons of bugs have been fixed since 4.7.3 although I don't know off hand if this particular bug is fixed. I did recently do a btrfs-image with btrfs-progs v4.9 with -s and did not get a segfault. -- Chris Murphy -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 8/8] Revert "ext4: fix wrong gfp type under transaction"
On Jan 17, 2017, at 8:59 AM, Theodore Ts'owrote: > > On Tue, Jan 17, 2017 at 04:18:17PM +0100, Michal Hocko wrote: >> >> OK, so I've been staring into the code and AFAIU current->journal_info >> can contain my stored information. I could either hijack part of the >> word as the ref counting is only consuming low 12b. But that looks too >> ugly to live. Or I can allocate some placeholder. > > Yeah, I was looking at something similar. Can you guarantee that the > context will only take one or two bits? (Looks like it only needs one > bit ATM, even though at the moment you're storing the whole GFP mask, > correct?) > >> But before going to play with that I am really wondering whether we need >> all this with no journal at all. AFAIU what Jack told me it is the >> journal lock(s) which is the biggest problem from the reclaim recursion >> point of view. What would cause a deadlock in no journal mode? > > We still have the original problem for why we need GFP_NOFS even in > ext2. If we are in a writeback path, and we need to allocate memory, > we don't want to recurse back into the file system's writeback path. > Certainly not for the same inode, and while we could make it work if > the mm was writing back another inode, or another superblock, there > are also stack depth considerations that would make this be a bad > idea. So we do need to be able to assert GFP_NOFS even in no journal > mode, and for any file system including ext2, for that matter. > > Because of the fact that we're going to have to play games with > current->journal_info, maybe this is something that I should take > responsibility for, and to go through the the ext4 tree after the main > patch series go through? Maybe you could use xfs and ext2 as sample > (simple) implementations? > > My only ask is that the memalloc nofs context be a well defined N > bits, where N < 16, and I'll find some place to put them (probably > journal_info). I think Dave was suggesting that the NOFS context allow a pointer to an arbitrary struct, so that it is possible to dereference this in the filesystem itself to determine if the recursion is safe or not. That way, ext2 could store an inode pointer (if that is what it cares about) and verify that writeback is not recursing on the same inode, and XFS can store something different. It would also need to store some additional info (e.g. fstype or superblock pointer) so that it can determine how to interpret the NOFS context pointer. I think it makes sense to add a couple of void * pointers to the task struct along with journal_info and leave it up to the filesystem to determine how to use them. Cheers, Andreas signature.asc Description: Message signed with OpenPGP using GPGMail
Re: Unocorrectable errors with RAID1
Goldwyn Rodrigues wrote: On 01/17/2017 02:44 AM, Christoph Groth wrote: Goldwyn Rodrigues wrote: Would you be able to upload a btrfs-image for me to examine. This is a core ctree error where most probably item size is incorrectly registered. Sure, I can do that. I'd like to use the -s option, will this be fine? Yes, I think that should be fine. Unfortunately, giving -s causes btrfs-image to segfault. I tried both btrfs-progs 4.7.3 and 4.4. I also tried different compression levels. Without -s it works, but since this file system contains the complete digital life of our family, I would rather not share even the file names. Any ideas on what could be done? If you need help to debug the problem with btrfs-image, please tell me what I should do. I can keep the broken file system around until an image can be created at some later time. signature.asc Description: PGP signature
Re: [PATCH 8/8] Revert "ext4: fix wrong gfp type under transaction"
On Tue 17-01-17 17:16:19, Michal Hocko wrote: > > > But before going to play with that I am really wondering whether we need > > > all this with no journal at all. AFAIU what Jack told me it is the > > > journal lock(s) which is the biggest problem from the reclaim recursion > > > point of view. What would cause a deadlock in no journal mode? > > > > We still have the original problem for why we need GFP_NOFS even in > > ext2. If we are in a writeback path, and we need to allocate memory, > > we don't want to recurse back into the file system's writeback path. > > But we do not enter the writeback path from the direct reclaim. Or do > you mean something other than pageout()'s mapping->a_ops->writepage? > There is only try_to_release_page where we get back to the filesystems > but I do not see any NOFS protection in ext4_releasepage. Maybe to expand a bit: These days, direct reclaim can call ->releasepage() callback, ->evict_inode() callback (and only for inodes with i_nlink > 0), shrinkers. That's it. So the recursion possibilities are rather more limited than they used to be several years ago and we likely do not need as much GFP_NOFS protection as we used to. Honza -- Jan KaraSUSE Labs, CR -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 00/24] tree-log inode vs btrfs_inode cleanups
On Thu, Jan 12, 2017 at 04:00:26PM +0200, Nikolay Borisov wrote: > So here is a new set of patches cleaning up tree-log function > w.r.t inode vs btrfs_inode. There are still some which remain > but I didn't find compelling arguments to cleaning them up, so > I've left them unchanged. This time there are some size shrinkage: > >text data bss dec hex filename >2530598 174661 28288 2733547 29b5eb fs/btrfs/btrfs.ko - upstream > master > > text data bss dec hex filename > 2530774 174661 28288 2733723 29b69b fs/btrfs/btrfs.ko - before > tree-log cleanup > > text data bss dec hex filename > 2530163 174661 28288 2733112 29b438 fs/btrfs/btrfs.ko - both series > applied > > So the net result of the 2 series is 435 bytes and I assume there > will be further reduction in size once further cleanups are made Thanks. I was about to apply the series but patch 06/24 fails to apply on anytihing that I could use (master, cmason's integration or the cleanups-next branch). Can you please refresh it on top of master? The conflict looks like the patch tries to apply the same change twice to btrfs_del_dir_entries_in_log, so it would be better is you check. Thanks. -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 8/8] Revert "ext4: fix wrong gfp type under transaction"
On Tue 17-01-17 10:59:16, Theodore Ts'o wrote: > On Tue, Jan 17, 2017 at 04:18:17PM +0100, Michal Hocko wrote: > > > > OK, so I've been staring into the code and AFAIU current->journal_info > > can contain my stored information. I could either hijack part of the > > word as the ref counting is only consuming low 12b. But that looks too > > ugly to live. Or I can allocate some placeholder. > > Yeah, I was looking at something similar. Can you guarantee that the > context will only take one or two bits? (Looks like it only needs one > bit ATM, even though at the moment you're storing the whole GFP mask, > correct?) No, I am just storing PF_MEMALLOC_NO{FS,IO} but I assume further changes might want to pull in more changes into the scope context. > > But before going to play with that I am really wondering whether we need > > all this with no journal at all. AFAIU what Jack told me it is the > > journal lock(s) which is the biggest problem from the reclaim recursion > > point of view. What would cause a deadlock in no journal mode? > > We still have the original problem for why we need GFP_NOFS even in > ext2. If we are in a writeback path, and we need to allocate memory, > we don't want to recurse back into the file system's writeback path. But we do not enter the writeback path from the direct reclaim. Or do you mean something other than pageout()'s mapping->a_ops->writepage? There is only try_to_release_page where we get back to the filesystems but I do not see any NOFS protection in ext4_releasepage. > Certainly not for the same inode, and while we could make it work if > the mm was writing back another inode, or another superblock, there > are also stack depth considerations that would make this be a bad > idea. So we do need to be able to assert GFP_NOFS even in no journal > mode, and for any file system including ext2, for that matter. > > Because of the fact that we're going to have to play games with > current->journal_info, maybe this is something that I should take > responsibility for, and to go through the the ext4 tree after the main > patch series go through? How do you see a possibility that we would handle nojournal mode on top of "[PATCH 5/8] jbd2: mark the transaction context with the scope GFP_NOFS context" in a separate patch? But anyway, I agree that we should go with the API sooner rather than later. > Maybe you could use xfs and ext2 as sample > (simple) implementations? > > My only ask is that the memalloc nofs context be a well defined N > bits, where N < 16, and I'll find some place to put them (probably > journal_info). I am pretty sure that we won't need more than a bit or two in a foreseeable future (I can think of GFP_NOWAIT being one candidate). -- Michal Hocko SUSE Labs -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [LSF/MM ATTEND] blk-mq I/O scheduling, Btrfs, VFS topics
On 01/13/2017 08:24 PM, Omar Sandoval wrote: > Hi, > > I'd like to attend LSF/MM again this year to discuss topics in blk-mq, > Btrfs, and the VFS. > > I've been working on the blk-mq I/O scheduling framework [1] with Jens. > Once that is finalized, the next step is a proper multiqueue scheduler. > I've put together a prototype with a few basic ideas [2], but there's > still work to be done. It's hard to say where exactly we'll be in time > for LSF/MM since things are moving pretty fast, but I'm sure there will > be something to discuss. I'd be interested in joining that session too, for obvious reasons. -- Jens Axboe -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 8/8] Revert "ext4: fix wrong gfp type under transaction"
On Tue, Jan 17, 2017 at 04:18:17PM +0100, Michal Hocko wrote: > > OK, so I've been staring into the code and AFAIU current->journal_info > can contain my stored information. I could either hijack part of the > word as the ref counting is only consuming low 12b. But that looks too > ugly to live. Or I can allocate some placeholder. Yeah, I was looking at something similar. Can you guarantee that the context will only take one or two bits? (Looks like it only needs one bit ATM, even though at the moment you're storing the whole GFP mask, correct?) > But before going to play with that I am really wondering whether we need > all this with no journal at all. AFAIU what Jack told me it is the > journal lock(s) which is the biggest problem from the reclaim recursion > point of view. What would cause a deadlock in no journal mode? We still have the original problem for why we need GFP_NOFS even in ext2. If we are in a writeback path, and we need to allocate memory, we don't want to recurse back into the file system's writeback path. Certainly not for the same inode, and while we could make it work if the mm was writing back another inode, or another superblock, there are also stack depth considerations that would make this be a bad idea. So we do need to be able to assert GFP_NOFS even in no journal mode, and for any file system including ext2, for that matter. Because of the fact that we're going to have to play games with current->journal_info, maybe this is something that I should take responsibility for, and to go through the the ext4 tree after the main patch series go through? Maybe you could use xfs and ext2 as sample (simple) implementations? My only ask is that the memalloc nofs context be a well defined N bits, where N < 16, and I'll find some place to put them (probably journal_info). Thanks, - Ted -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] btrfs-progs: Corruption-framework: Include inode fields
On Thu, Jan 05, 2017 at 06:03:58PM +0100, Lakshmipathi.G wrote: > Signed-off-by: Lakshmipathi.GApplied, thanks. -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/2] btrfs-progs: cmds-check.c: supports inode isize fix in lowmem
On Mon, Jan 09, 2017 at 01:38:08PM +0800, Su Yue wrote: > Add a function 'repair_inode_isize' to support inode isize repair. Similar comments to this patch, missng path init and the error message level. > Signed-off-by: Su Yue> --- > cmds-check.c | 49 - > 1 file changed, 48 insertions(+), 1 deletion(-) > > diff --git a/cmds-check.c b/cmds-check.c > index 567ca80..088c0d8 100644 > --- a/cmds-check.c > +++ b/cmds-check.c > @@ -2457,6 +2457,45 @@ out: > } > > /* > + * Set inode's isize to correct value in @info Please make it more detailed why the new value is correct one. > + * > + * Returns <0 means on error > + * Returns 0 means successful repair > + */ > +static int repair_inode_isize_lowmem(struct btrfs_trans_handle *trans, > + struct btrfs_root *root, > + struct inode_item_fix_info *info) > +{ > + struct btrfs_inode_item *ei; 'ei' looks like a copy-paste from some code that uses extent item, if the variable name should be a mnemonic, so please use 'ii'. > + struct btrfs_key key; > + struct btrfs_path path; > + int ret; > + > + ASSERT(info); > + key.objectid = info->ino; > + key.type = BTRFS_INODE_ITEM_KEY; > + key.offset = 0; > + > + ret = btrfs_search_slot(trans, root, , , 0, 1); > + if (ret < 0) > + goto out; > + if (ret > 0) { > + ret = -ENOENT; > + goto out; > + } > + > + ei = btrfs_item_ptr(path.nodes[0], path.slots[0], > + struct btrfs_inode_item); > + btrfs_set_inode_size(path.nodes[0], ei, info->isize); > + btrfs_mark_buffer_dirty(path.nodes[0]); > + printf("reset isize for inode %llu root %llu\n", info->ino, > +root->root_key.objectid); > +out: > + btrfs_release_path(); > + return ret; > +} > + > +/* > * repair_inode_item - repair inode item errors > * > * Repair the inode item if error can be repaired. Any caller should compare > @@ -2484,7 +2523,7 @@ static int repair_inode_item(struct btrfs_root *root, > ret = 0; > goto out; > } > - if (!(err & NBYTES_ERROR)) { > + if (!(err & NBYTES_ERROR) && !(err & ISIZE_ERROR)) { > warning("root %llu INODE[%llu] have error(s) can't repair, > error : %d", > root->objectid, info->ino, err); > /* can't fix any errors, ret should be positive */ > @@ -2505,6 +2544,13 @@ static int repair_inode_item(struct btrfs_root *root, > else if (ret < 0) > goto out; > } > + if (err & ISIZE_ERROR) { > + ret = repair_inode_isize_lowmem(trans, root, info); > + if (ret == 0) > + err &= ~ISIZE_ERROR; > + else if (ret < 0) > + goto out; > + } > > if (err != info->err) { > info->err = err; > @@ -5039,6 +5085,7 @@ out: > > if (isize != size) { > err |= ISIZE_ERROR; > + info->isize = size; > error("root %llu DIR INODE [%llu] size(%llu) not equal > to %llu", > root->objectid, inode_id, isize, size); > } > -- > 2.11.0 > > > > -- > To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in > the body of a message to majord...@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Lsf-pc] [LSF/MM TOPIC] sharing pages between mappings
On Wed 11-01-17 15:13:19, Miklos Szeredi wrote: > On Wed, Jan 11, 2017 at 12:51 PM, Jan Karawrote: > > On Wed 11-01-17 11:29:28, Miklos Szeredi wrote: > >> I know there's work on this for xfs, but could this be done in generic mm > >> code? > >> > >> What are the obstacles? page->mapping and page->index are the obvious > >> ones. > > > > Yes, these two are the main that come to my mind. Also you'd need to > > somehow share the mapping->i_mmap tree so that unmap_mapping_range() works. > > > >> If that's too difficult is it maybe enough to share mappings between > >> files while they are completely identical and clone the mapping when > >> necessary? > > > > Well, but how would the page->mapping->host indirection work? Even if you > > have identical contents of the mappings, you still need to be aware there > > are several inodes behind them and you need to pick the right one > > somehow... > > When do we actually need page->mapping->host? The only place where > it's not available is page writeback. Then we can know that the > original page was already cow-ed and after being cowed, the page > belong only to a single inode. Yeah, in principle the information may exist, however propagating it to all appropriate place may be a mess. > What then happens if the newly written data is cloned before being > written back? We can either write back the page during the clone, so > that only clean pages are ever shared. Or we can let dirty pages be > shared between inodes. The former is what I'd suggest for sanity... I.e. share only read-only pages. > In that latter case the question is: do we > care about which inode we use for writing back the data? Is the inode > needed at all? I don't know enough about filesystem internals to see > clearly what happens in such a situation. > > >> All COW filesystems would benefit, as well as layered ones: lots of > >> fuse fs, and in some cases overlayfs too. > >> > >> Related: what can DAX do in the presence of cloned block? > > > > For DAX handling a block COW should be doable if that is what you are > > asking about. Handling of blocks that can be written to while they are > > shared will be rather difficult (you have problems with keeping dirty bits > > in the radix tree consistent if nothing else). > > What happens if you do: > > - clone_file_range(A, off1, B, off2, len); > > - mmap both A and B using DAX. > > The mapping will contain the same struct page for two different mappings, no? Not the same struct page, as DAX does not have pages with struct page. However the same pfn will be underlying off1 of A and off2 of B. And for reads this is just fine. Once you want to write, you have to make sure you COW before you start modifying the data or you'll get data corruption (we synchronize operations using the exceptional entries in mapping->page_tree in DAX and these are separate for A and B). Honza -- Jan Kara SUSE Labs, CR -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2] btrfs-progs: cmds-check.c: supports inode nbytes fix in lowmem
Hi, I have some comments, see below. On Mon, Jan 09, 2017 at 01:38:07PM +0800, Su Yue wrote: > Added 'repair_inode_item' which dispatches functions such as > 'repair_inode__nbytes_lowmem' to correct errors and > 'struct inode_item_fix_info' to store correct values and errors. > > Signed-off-by: Su Yue> --- > cmds-check.c | 161 > +++ > 1 file changed, 152 insertions(+), 9 deletions(-) > > diff --git a/cmds-check.c b/cmds-check.c > index 1dba298..567ca80 100644 > --- a/cmds-check.c > +++ b/cmds-check.c > @@ -371,6 +371,17 @@ struct root_item_info { > }; > > /* > + * Use inode_item_fix_info as function check_inode_item's arg. > + */ > +struct inode_item_fix_info { > + u64 ino; > + u64 isize; > + u64 nbytes; > + > + int err; > +}; > + > +/* > * Error bit for low memory mode check. > * > * Currently no caller cares about it yet. Just internal use for error > @@ -1866,13 +1877,16 @@ struct node_refs { > static int update_nodes_refs(struct btrfs_root *root, u64 bytenr, >struct node_refs *nrefs, u64 level); > static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path, > - unsigned int ext_ref); > - > + unsigned int ext_ref, > + struct inode_item_fix_info *info); > +static int repair_inode_item(struct btrfs_root *root, > + struct inode_item_fix_info *info); > static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path > *path, > struct node_refs *nrefs, int *level, int ext_ref) > { > struct extent_buffer *cur = path->nodes[0]; > struct btrfs_key key; > + struct inode_item_fix_info info; > u64 cur_bytenr; > u32 nritems; > u64 first_ino = 0; > @@ -1881,6 +1895,7 @@ static int process_one_leaf_v2(struct btrfs_root *root, > struct btrfs_path *path, > int ret = 0; /* Final return value */ > int err = 0; /* Positive error bitmap */ > > + memset(, 0, sizeof(info)); > cur_bytenr = cur->start; > > /* skip to first inode item or the first inode number change */ > @@ -1900,8 +1915,26 @@ static int process_one_leaf_v2(struct btrfs_root > *root, struct btrfs_path *path, > path->slots[0] = i; > > again: > - err |= check_inode_item(root, path, ext_ref); > + err |= check_inode_item(root, path, ext_ref, ); > + > + if (repair && (err & ~LAST_ITEM)) { > + ret = repair_inode_item(root, ); > > + if (ret < 0) > + goto out; > + /* > + * if some errors was repaired, path shall be searched > + * again since path has been changed > + */ > + if (ret == 0) { > + btrfs_item_key_to_cpu(path->nodes[0], , > + path->slots[0]); > + btrfs_release_path(path); > + btrfs_search_slot(NULL, root, , path, 0, 0); > + > + cur = path->nodes[0]; > + } > + } > if (err & LAST_ITEM) > goto out; > > @@ -2211,7 +2244,8 @@ out: > } > > static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path, > - unsigned int ext_ref); > + unsigned int ext_ref, > + struct inode_item_fix_info *info); > > static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path > *path, >int *level, struct node_refs *nrefs, int ext_ref) > @@ -2293,7 +2327,7 @@ static int walk_down_tree_v2(struct btrfs_root *root, > struct btrfs_path *path, > } > > ret = check_child_node(root, cur, path->slots[*level], next); > - if (ret < 0) > + if (ret < 0) > break; > > if (btrfs_is_leaf(next)) > @@ -2383,6 +2417,105 @@ out: > return ret; > } > > +/* > + * Set inode's nbytes to correct value in @info > + * > + * Returns <0 means on error > + * Returns 0 means successful repair > + */ > +static int repair_inode_nbytes_lowmem(struct btrfs_trans_handle *trans, > + struct btrfs_root *root, > + struct inode_item_fix_info *info) > +{ > + struct btrfs_inode_item *ei; > + struct btrfs_key key; > + struct btrfs_path path; > + int ret; > + > + ASSERT(info); > + key.objectid = info->ino; > + key.type = BTRFS_INODE_ITEM_KEY; > + key.offset = 0; The path init call is missing here. > + > + ret = btrfs_search_slot(trans, root, , , 0, 1); > + if (ret < 0) > + goto out; > + if (ret > 0) { > + ret = -ENOENT; > + goto out; > + } > + > + ei = btrfs_item_ptr(path.nodes[0],
Re: [PATCH 8/8] Revert "ext4: fix wrong gfp type under transaction"
On Tue 17-01-17 09:24:25, Michal Hocko wrote: > On Mon 16-01-17 21:56:07, Theodore Ts'o wrote: > > On Fri, Jan 06, 2017 at 03:11:07PM +0100, Michal Hocko wrote: > > > From: Michal Hocko> > > > > > This reverts commit 216553c4b7f3e3e2beb4981cddca9b2027523928. Now that > > > the transaction context uses memalloc_nofs_save and all allocations > > > within the this context inherit GFP_NOFS automatically, there is no > > > reason to mark specific allocations explicitly. > > > > > > This patch should not introduce any functional change. The main point > > > of this change is to reduce explicit GFP_NOFS usage inside ext4 code > > > to make the review of the remaining usage easier. > > > > > > Signed-off-by: Michal Hocko > > > Reviewed-by: Jan Kara > > > > Changes in the jbd2 layer aren't going to guarantee that > > memalloc_nofs_save() will be executed if we are running ext4 without a > > journal (aka in no journal mode). And this is a *very* common > > configuration; it's how ext4 is used inside Google in our production > > servers. > > OK, I wasn't aware of that. > > > So that means the earlier patches will probably need to be changed so > > the nOFS scope is done in the ext4_journal_{start,stop} functions in > > fs/ext4/ext4_jbd2.c. > > I could definitely appreciated some help here. The call paths are rather > complex and I am not familiar with the code enough. On of the biggest > problem I have currently is that there doesn't seem to be an easy place > to store the old allocation context. OK, so I've been staring into the code and AFAIU current->journal_info can contain my stored information. I could either hijack part of the word as the ref counting is only consuming low 12b. But that looks too ugly to live. Or I can allocate some placeholder. But before going to play with that I am really wondering whether we need all this with no journal at all. AFAIU what Jack told me it is the journal lock(s) which is the biggest problem from the reclaim recursion point of view. What would cause a deadlock in no journal mode? -- Michal Hocko SUSE Labs -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] btfs-progs: fsck-tests: corrupt nlink value test
On Mon, Jan 16, 2017 at 09:35:52AM -0700, lakshmipath...@giis.co.in wrote: > If btrfs-corrupt-block is in bad shape, then corruption scripts around > them won't help in long term. > > Yes, documentation for btrfs-corrupt-block needs improvement. imo, > re-arranged priority will be like : (5), (1)/(3) then (4). Agree that > some corner cases, having static image is best option, i think the > corruption test-case needs to be mixture of both static-images and > scripts. Both approaches have their pros and cons so I'll accept both. The functionality provided by the corrupt block utility can be used, any changes to the command line UI will be also applied to the test scripts. -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] btrfs-progs: fsck-tests: missing csum test script
It's not clear from the test what's the purpose. There's one corrupted csum but the whole csum tree rebuild option is used. This is a pretty basic check that the --init-csum-tree works, so it should be mentioned somewhere in the test script. On Thu, Jan 05, 2017 at 08:26:36PM +0100, Lakshmipathi.G wrote: > Signed-off-by: Lakshmipathi.G> --- > tests/fsck-tests/027-missing-data-csum/test.sh | 39 > ++ > 1 file changed, 39 insertions(+) > create mode 100755 tests/fsck-tests/027-missing-data-csum/test.sh > > diff --git a/tests/fsck-tests/027-missing-data-csum/test.sh > b/tests/fsck-tests/027-missing-data-csum/test.sh > new file mode 100755 > index 000..6d1dc97 > --- /dev/null > +++ b/tests/fsck-tests/027-missing-data-csum/test.sh > @@ -0,0 +1,39 @@ > +#!/bin/bash > + > +source $TOP/tests/common > + > +check_prereq btrfs-corrupt-block > +check_prereq mkfs.btrfs > +check_prereq btrfs > + > +setup_root_helper > +prepare_test_dev 512M > + > + > +# simulate missing csum error and repair using init-csum option > +test_csum_corruption() > +{ > + run_check $SUDO_HELPER $TOP/mkfs.btrfs -f $TEST_DEV > + > + run_check_mount_test_dev > + > + export DATASET_SIZE=1 > + generate_dataset small > + > + run_check_umount_test_dev > + > + # find bytenr > + bytenr=`$SUDO_HELPER $TOP/btrfs-debug-tree $TEST_DEV | \ Please don't use btrfs-debug-tree, it's been obsoleted by the dump-tree subcommand. And you can use the '-t csum' option to dump just the csum tree. > + grep "EXTENT_CSUM EXTENT_CSUM" | \ > + cut -f1 -d')' | awk '{print $6}'` This can be simplified to one sed command. > + > + # corrupt csum bytenr > + run_check $SUDO_HELPER $TOP/btrfs-corrupt-block -C $bytenr $TEST_DEV > + > + $SUDO_HELPER $TOP/btrfs check $TEST_DEV >& /dev/null && \ > + _fail "btrfs check failed to detect missing csum." Here we want to capture the output from 'check' for analysis in case of problems. > + run_check $SUDO_HELPER $TOP/btrfs check --repair --init-csum $TEST_DEV > + run_check $SUDO_HELPER $TOP/btrfs check $TEST_DEV > +} > + > +test_csum_corruption -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] btrfs-progs: Corruption-framework: Include inode nlink field
On Thu, Jan 05, 2017 at 11:08:32AM +0100, Lakshmipathi.G wrote: > Patch with fix for David Sterba review comment. > > Signed-off-by: Lakshmipathi.GApplied, thanks. -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Unocorrectable errors with RAID1
On 2017-01-17 04:18, Christoph Groth wrote: Austin S. Hemmelgarn wrote: There's not really much in the way of great documentation that I know of. I can however cover the basics here: (...) Thanks for this explanation. I'm sure it will be also useful to others. Glad I could help. If the chunk to be allocated was a data chunk, you get -ENOSPC (usually, sometimes you might get other odd results) in the userspace application that triggered the allocation. It seems that the available space reported by the system df command corresponds roughly to the size of the block device minus all the "used" space as reported by "btrfs fi df". That's correct. If I understand what you wrote correctly this means that when writing a huge file it may happen that the system df will report enough free space, but btrfs will raise ENOSPC. However, it should be possible to keep writing small files even at this point (assuming that there's enough space for the metadata). Or will btrfs split the huge file into small pieces to fit it into the fragmented free space in the chunks? OK, so the first bit to understanding this is that an extent in a file can't be larger than a chunk. This means that if you have space for 3 1GB data chunks located in 3 different places on the storage device, you can still write a 3GB file to the filesystem, it will just end up with 3 1GB extents. The issues with ENOSPC come in when almost all of your space is allocated to chunks and one type gets full. In such a situation, if you have metadata space, you can keep writing to the FS, but big writes may fail, and you'll eventually end up in a situation where you need to delete things to free up space. Such a situation should be avoided of course. I'm asking out of curiosity. * So scrubbing is not enough to check the health of a btrfs file system? It’s also necessary to read all the files? Scrubbing checks data integrity, but not the state of the data. IOW, you're checking that the data and metadata match with the checksums, but not necessarily that the filesystem itself is valid. I see, but what should one then do to detect problems such as mine as soon as possible? Periodically calculate hashes for all files? I’ve never seen a recommendation to do that for btrfs. Scrub will verify that the data is the same as when the kernel calculated the block checksum. That's really the best that can be done. In your case, it couldn't correct the errors because both copies of the corrupted blocks were bad (this points at an issue with either RAM or the storage controller BTW, not the disks themselves). Had one of the copies been valid, it would have intelligently detected which one was bad and fixed things. I think I understand the problem with the three corrupted blocks that I was able to fix by replacing the files. But there is also the strange "Stale file handle" error with some other files that was not found by scrubbing, and also does not seem to appear in the output of "btrfs dev stats", which is BTW [/dev/sda2].write_io_errs 0 [/dev/sda2].read_io_errs0 [/dev/sda2].flush_io_errs 0 [/dev/sda2].corruption_errs 3 [/dev/sda2].generation_errs 0 [/dev/sdb2].write_io_errs 0 [/dev/sdb2].read_io_errs0 [/dev/sdb2].flush_io_errs 0 [/dev/sdb2].corruption_errs 3 [/dev/sdb2].generation_errs 0 (The 2 times 3 corruption errors seem to be the uncorrectable errors that I could fix by replacing the files.) Yep, those correspond directly to the uncorrectable errors you mentioned in your original post. To get the "stale file handle" error I need to try to read the affected file. That's why I was wondering whether reading all the files periodically is indeed a useful maintenance procedure with btrfs. In the cases I've seen, no it isn't all that useful. As far as the whole ESTALE thing, that's almost certainly a bug and you either shouldn't be getting an error there, or you shouldn't be getting that error code there. "btrfs check" does find the problem, but it can be only run on an unmounted file system. -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Unocorrectable errors with RAID1
On 2017-01-16 23:50, Janos Toth F. wrote: BTRFS uses a 2 level allocation system. At the higher level, you have chunks. These are just big blocks of space on the disk that get used for only one type of lower level allocation (Data, Metadata, or System). Data chunks are normally 1GB, Metadata 256MB, and System depends on the size of the FS when it was created. Within these chunks, BTRFS then allocates individual blocks just like any other filesystem. This always seems to confuse me when I try to get an abstract idea about de-/fragmentation of Btrfs. Can meta-/data be fragmented on both levels? And if so, can defrag and/or balance "cure" both levels of fragmentation (if any)? But how? May be several defrag and balance runs, repeated until returns diminish (or at least you consider them meaningless and/or unnecessary)? Defrag operates only at the block level. It won't allocate chunks unless it has to, and it won't remove chunks unless they become empty from it moving things around (although that's not likely to happen most of the time). Balance functionally operates at both levels, but it doesn't really do any defragmentation. Balance _may_ merge extents sometimes, but I'm not sure of this. It will compact allocations and therefore functionally defragment free space within chunks (though not necessarily at the chunk-level itself). Defrag run with the same options _should_ have no net effect after the first run, the two exceptions being if the filesystem is close to full or if the data set is being modified live while the defrag is happening. Balance run with the same options will eventually hit a point where it doesn't do anything (or only touches one chunk of each type but doesn't actually give any benefit). If you're just using the usage filters or doing a full balance, this point is the second run. If you're using other filters, it's functionally not possible to determine when that point will be without low-level knowledge of the chunk layout. For an idle filesystem, if you run defrag then a full balance, that will get you a near optimal layout. Running them in the reverse order will get you a different layout that may be less optimal than running defrag first because defrag may move data in such a way that new chunks get allocated. Repeated runs of defrag and balance will in more than 95% of cases provide no extra benefit. What balancing does is send everything back through the allocator, which in turn back-fills chunks that are only partially full, and removes ones that are now empty. Does't this have a potential chance of introducing (additional) extent-level fragmentation? In theory, yes. IIRC, extents can't cross a chunk boundary. Beyond that packing constraint, balance shouldn't fragment things further. FWIW, while there isn't a daemon yet that does this, it's a perfect thing for a cronjob. The general maintenance regimen that I use for most of my filesystems is: * Run 'btrfs balance start -dusage=20 -musage=20' daily. This will complete really fast on most filesystems, and keeps the slack-space relatively under-control (and has the nice bonus that it helps defragment free space. * Run a full scrub on all filesystems weekly. This catches silent corruption of the data, and will fix it if possible. * Run a full defrag on all filesystems monthly. This should be run before the balance (reasons are complicated and require more explanation than you probably care for). I would run this at least weekly though on HDD's, as they tend to be more negatively impacted by fragmentation. I wonder if one should always run a full balance instead of a full scrub, since balance should also read (and thus theoretically verify) the meta-/data (does it though? I would expect it to check the chekcsums, but who knows...? may be it's "optimized" to skip that step?) and also perform the "consolidation" of the chunk level. Scrub uses fewer resources than balance. Balance has to read _and_ re-write all data in the FS regardless of the state of the data. Scrub only needs to read the data if it's good, and if it's bad it only (for raid1) has to re-write the replica that's bad, not both of them. In fact, the only practical reason to run balance on a regular basis at all is to compact allocations and defragment free space. This is why I only have it balance chunks that are less than 1/5 full. I wish there was some more "integrated" solution for this: a balance-like operation which consolidates the chunks and also de-fragments the file extents at the same time while passively uncovers (and fixes if necessary and possible) any checksum mismatches / data errors, so that balance and defrag can't work against each-other and the overall work is minimized (compared to several full runs or many different commands). More than 90% of the time, the performance difference between the absolute optimal layout and the one generated by just running defrag then balancing is
Re: Unocorrectable errors with RAID1
On 01/17/2017 02:44 AM, Christoph Groth wrote: > Goldwyn Rodrigues wrote: > >> Would you be able to upload a btrfs-image for me to examine. This is a >> core ctree error where most probably item size is incorrectly registered. > > Sure, I can do that. I'd like to use the -s option, will this be fine? Yes, I think that should be fine. > Is there some preferred place for the upload? If not, I can use > personal webspace. No, there is no preferred place. As far as I can download it, it is fine. -- Goldwyn signature.asc Description: OpenPGP digital signature
Re: corruption: yet another one after deleting a ro snapshot
Am 17. Januar 2017 09:53:19 MEZ schrieb Qu Wenruo: >Just lowmem false alert, as extent-tree dump shows complete fine >result. > >I'll CC you and adds your reported-by tag when there is any update on >this case. Fine, just one thing left right more from my side on this issue: Do you want me to leave the fs untouched until I could verify a lowmem mode fix? Or is it ok to go on using it (and running backups on it)? Cheers, Chris. -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Unocorrectable errors with RAID1
Austin S. Hemmelgarn wrote: There's not really much in the way of great documentation that I know of. I can however cover the basics here: (...) Thanks for this explanation. I'm sure it will be also useful to others. If the chunk to be allocated was a data chunk, you get -ENOSPC (usually, sometimes you might get other odd results) in the userspace application that triggered the allocation. It seems that the available space reported by the system df command corresponds roughly to the size of the block device minus all the "used" space as reported by "btrfs fi df". If I understand what you wrote correctly this means that when writing a huge file it may happen that the system df will report enough free space, but btrfs will raise ENOSPC. However, it should be possible to keep writing small files even at this point (assuming that there's enough space for the metadata). Or will btrfs split the huge file into small pieces to fit it into the fragmented free space in the chunks? Such a situation should be avoided of course. I'm asking out of curiosity. * So scrubbing is not enough to check the health of a btrfs file system? It’s also necessary to read all the files? Scrubbing checks data integrity, but not the state of the data. IOW, you're checking that the data and metadata match with the checksums, but not necessarily that the filesystem itself is valid. I see, but what should one then do to detect problems such as mine as soon as possible? Periodically calculate hashes for all files? I’ve never seen a recommendation to do that for btrfs. Scrub will verify that the data is the same as when the kernel calculated the block checksum. That's really the best that can be done. In your case, it couldn't correct the errors because both copies of the corrupted blocks were bad (this points at an issue with either RAM or the storage controller BTW, not the disks themselves). Had one of the copies been valid, it would have intelligently detected which one was bad and fixed things. I think I understand the problem with the three corrupted blocks that I was able to fix by replacing the files. But there is also the strange "Stale file handle" error with some other files that was not found by scrubbing, and also does not seem to appear in the output of "btrfs dev stats", which is BTW [/dev/sda2].write_io_errs 0 [/dev/sda2].read_io_errs0 [/dev/sda2].flush_io_errs 0 [/dev/sda2].corruption_errs 3 [/dev/sda2].generation_errs 0 [/dev/sdb2].write_io_errs 0 [/dev/sdb2].read_io_errs0 [/dev/sdb2].flush_io_errs 0 [/dev/sdb2].corruption_errs 3 [/dev/sdb2].generation_errs 0 (The 2 times 3 corruption errors seem to be the uncorrectable errors that I could fix by replacing the files.) To get the "stale file handle" error I need to try to read the affected file. That's why I was wondering whether reading all the files periodically is indeed a useful maintenance procedure with btrfs. "btrfs check" does find the problem, but it can be only run on an unmounted file system. signature.asc Description: PGP signature
Re: corruption: yet another one after deleting a ro snapshot
At 01/17/2017 06:07 AM, Christoph Anton Mitterer wrote: On Mon, 2017-01-16 at 13:47 +0800, Qu Wenruo wrote: And I highly suspect if the subvolume 6403 is the RO snapshot you just removed. I guess there is no way to find out whether it was that snapshot, is there? "btrfs subvolume list" could do it." Well that was clear,... but I rather meant something that also shows me the path of the deleted subvol. Deleted subvol lost its ROOT_BACKREF, so there is no info where that subvolume used to be. Anyway: # btrfs subvolume list /data/data-a/3/ ID 6029 gen 2528 top level 5 path data ID 6031 gen 3208 top level 5 path backups ID 7285 gen 3409 top level 5 path snapshots/_external-fs/data-a1/data/2017-01-11_1 So since I only had two further snapshots in snapshots/_external- fs/data-a1/data/ it must be the deleted one. btw: data is empty, and backup contains actually some files (~25k, ~360GB)... these are not created via send/receive, but either via cp or rsync. And they are always in the same subvol (i.e. the backups svol isn't deleted like the snaphots are) Also checked the extent tree, the result is a little interesting: 1) Most tree backref are good. In fact, 3 of all the 4 errors reported are tree blocks shared by other subvolumes, like: item 77 key (5120 METADATA_ITEM 1) itemoff 13070 itemsize 42 extent refs 2 gen 11 flags TREE_BLOCK|FULL_BACKREF tree block skinny level 1 tree block backref root 7285 tree block backref root 6572 This means the tree blocks are share by 2 other subvolumes, 7285 and 6572. 7285 subvolume is completely OK, while 6572 is also undergoing subvolume deletion(while real deletion doesn't start yet). Well there were in total three snapshots, the still existing: snapshots/_external-fs/data-a1/data/2017-01-11_1 and two earlier ones, one from around 2016-09-16_1 (= probably ID 6572?), one even a bit earlier from 2016-08-19_1 (probably ID 6403?). Each one was created with send -p | receive, using the respectively earlier one as parent. So it's quite reasonable that they share the extents and also that it'sby 2 subvols. And considering the generation, I assume 6403 is deleted before 6572. Don't remember which one of the 2 subvols form 2016 I've deleted first, the older or the more recent one... my bash history implies in this order: 4203 btrfs subvolume delete 2016-08-19_1 4204 btrfs subvolume delete 2016-09-16_1 So we're almost clear that, btrfs (maybe only btrfsck) doesn't handle it well if there are multiple subvolume undergoing deletion. This gives us enough info to try to build such image by ourselves now. (Although still quite hard to do though). Well keep me informed if you actually find/fix something :) And for the scary lowmem mode, it's false alert. I manually checked the used size of a block group and it's OK. So you're going to fix this? Yes, digging now. The lowmem mode bug should be much easier to fix, compared to the lost backref false alert. BTW, most of your block groups are completely used, without any space. But interestingly, mostly data extent size are just 512K, larger than compressed extent upper limit, but still quite small. Not sure if I understand this... In other words, your fs seems to be fragmented considering the upper limit of a data extent is 128M. (Or your case is quite common in common world?) No, I don't think I understand what you mean :D So you are mostly OK to mount it rw any time you want, and I have already downloaded the raw data. Okay, I've remounted it now RW, called btrfs filesystem sync, and waited until the HDD became silent and showed no further activity. (again 3.9) # btrfs check /dev/nbd0 ; echo $? Checking filesystem on /dev/nbd0 UUID: 326d292d-f97b-43ca-b1e8-c722d3474719 checking extents checking free space cache checking fs roots checking csums checking root refs found 7469206884352 bytes used err is 0 total csum bytes: 7281779252 total tree bytes: 10837262336 total fs tree bytes: 2011906048 total extent tree bytes: 1015349248 btree space waste bytes: 922444044 file data blocks allocated: 7458369622016 referenced 7579485159424 0 Nice to see it. => as you can see, original mode pretends things would be fine now. # btrfs check --mode=lowmem /dev/nbd0 ; echo $? Checking filesystem on /dev/nbd0 UUID: 326d292d-f97b-43ca-b1e8-c722d3474719 checking extents ERROR: block group[74117545984 1073741824] used 1073741824 but extent items used 0 ERROR: block group[239473786880 1073741824] used 1073741824 but extent items used 1207959552 ERROR: block group[500393050112 1073741824] used 1073741824 but extent items used 1207959552 ERROR: block group[581997428736 1073741824] used 1073741824 but extent items used 0 ERROR: block group[626557714432 1073741824] used 1073741824 but extent items used 0 ERROR: block group[668433645568 1073741824] used 1073741824 but extent items used 0 ERROR: block
Re: Unocorrectable errors with RAID1
Goldwyn Rodrigues wrote: Would you be able to upload a btrfs-image for me to examine. This is a core ctree error where most probably item size is incorrectly registered. Sure, I can do that. I'd like to use the -s option, will this be fine? Is there some preferred place for the upload? If not, I can use personal webspace. signature.asc Description: PGP signature
Re: [PATCH 8/8] Revert "ext4: fix wrong gfp type under transaction"
On Mon 16-01-17 21:56:07, Theodore Ts'o wrote: > On Fri, Jan 06, 2017 at 03:11:07PM +0100, Michal Hocko wrote: > > From: Michal Hocko> > > > This reverts commit 216553c4b7f3e3e2beb4981cddca9b2027523928. Now that > > the transaction context uses memalloc_nofs_save and all allocations > > within the this context inherit GFP_NOFS automatically, there is no > > reason to mark specific allocations explicitly. > > > > This patch should not introduce any functional change. The main point > > of this change is to reduce explicit GFP_NOFS usage inside ext4 code > > to make the review of the remaining usage easier. > > > > Signed-off-by: Michal Hocko > > Reviewed-by: Jan Kara > > Changes in the jbd2 layer aren't going to guarantee that > memalloc_nofs_save() will be executed if we are running ext4 without a > journal (aka in no journal mode). And this is a *very* common > configuration; it's how ext4 is used inside Google in our production > servers. OK, I wasn't aware of that. > So that means the earlier patches will probably need to be changed so > the nOFS scope is done in the ext4_journal_{start,stop} functions in > fs/ext4/ext4_jbd2.c. I could definitely appreciated some help here. The call paths are rather complex and I am not familiar with the code enough. On of the biggest problem I have currently is that there doesn't seem to be an easy place to store the old allocation context. The original patch had it inside the journal handle. I was thinking about putting it into superblock but ext4_journal_stop doesn't seem to have access to the sb if there is no handle. Now, if ext4_journal_start is never called from a nested context then this is not a big deal but there are just too many caller to check... -- Michal Hocko SUSE Labs -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html