[PATCH] Btrfs: refactor btrfs_extent_same() slightly

2017-01-17 Thread Omar Sandoval
From: Omar Sandoval 

This was originally a prep patch for changing the behavior on len=0, but
we went another direction with that. This still makes the function
slightly easier to follow.

Reviewed-by: Qu Wenruo 
Signed-off-by: Omar Sandoval 
---
Qu thought this would still be a worthwhile cleanup. I'm fine either
way. Applies to Dave's for-next branch.

 fs/btrfs/ioctl.c | 33 -
 1 file changed, 12 insertions(+), 21 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index bf7227d43b5d..3542e3d2 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3127,26 +3127,27 @@ static int btrfs_extent_same(struct inode *src, u64 
loff, u64 olen,
int ret;
u64 len = olen;
struct cmp_pages cmp;
-   int same_inode = 0;
+   bool same_inode = (src == dst);
u64 same_lock_start = 0;
u64 same_lock_len = 0;
 
-   if (src == dst)
-   same_inode = 1;
-
if (len == 0)
return 0;
 
-   if (same_inode) {
+   if (same_inode)
inode_lock(src);
+   else
+   btrfs_double_inode_lock(src, dst);
 
-   ret = extent_same_check_offsets(src, loff, , olen);
-   if (ret)
-   goto out_unlock;
-   ret = extent_same_check_offsets(src, dst_loff, , olen);
-   if (ret)
-   goto out_unlock;
+   ret = extent_same_check_offsets(src, loff, , olen);
+   if (ret)
+   goto out_unlock;
 
+   ret = extent_same_check_offsets(dst, dst_loff, , olen);
+   if (ret)
+   goto out_unlock;
+
+   if (same_inode) {
/*
 * Single inode case wants the same checks, except we
 * don't want our length pushed out past i_size as
@@ -3174,16 +3175,6 @@ static int btrfs_extent_same(struct inode *src, u64 
loff, u64 olen,
 
same_lock_start = min_t(u64, loff, dst_loff);
same_lock_len = max_t(u64, loff, dst_loff) + len - 
same_lock_start;
-   } else {
-   btrfs_double_inode_lock(src, dst);
-
-   ret = extent_same_check_offsets(src, loff, , olen);
-   if (ret)
-   goto out_unlock;
-
-   ret = extent_same_check_offsets(dst, dst_loff, , olen);
-   if (ret)
-   goto out_unlock;
}
 
/* don't make the dst file partly checksummed */
-- 
2.11.0

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs: constify struct btrfs_{,disk_}key wherever possible

2017-01-17 Thread Omar Sandoval
From: Omar Sandoval 

In a lot of places, it's unclear when it's safe to reuse a struct
btrfs_key after it has been passed to a helper function. Constify these
arguments wherever possible to make it obvious.

Signed-off-by: Omar Sandoval 
---
This applies to Dave's for-next branch. If it's too intrusive of a
change, it can wait, but I think it's a nice cleanup.

 fs/btrfs/ctree.c   | 58 +---
 fs/btrfs/ctree.h   | 60 ++
 fs/btrfs/extent-tree.c |  9 
 fs/btrfs/root-tree.c   |  6 ++---
 4 files changed, 69 insertions(+), 64 deletions(-)

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 146b2dc0d2cf..72dd200f0478 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -28,9 +28,9 @@
 
 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
  *root, struct btrfs_path *path, int level);
-static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_key *ins_key,
- struct btrfs_path *path, int data_size, int extend);
+static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root 
*root,
+ const struct btrfs_key *ins_key, struct btrfs_path *path,
+ int data_size, int extend);
 static int push_node_left(struct btrfs_trans_handle *trans,
  struct btrfs_fs_info *fs_info,
  struct extent_buffer *dst,
@@ -1580,7 +1580,8 @@ static int close_blocks(u64 blocknr, u64 other, u32 
blocksize)
 /*
  * compare two keys in a memcmp fashion
  */
-static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
+static int comp_keys(const struct btrfs_disk_key *disk,
+const struct btrfs_key *k2)
 {
struct btrfs_key k1;
 
@@ -1592,7 +1593,7 @@ static int comp_keys(struct btrfs_disk_key *disk, struct 
btrfs_key *k2)
 /*
  * same as comp_keys only with two btrfs_key's
  */
-int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
+int btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2)
 {
if (k1->objectid > k2->objectid)
return 1;
@@ -1732,8 +1733,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
  * slot may point to max if the key is bigger than all of the keys
  */
 static noinline int generic_bin_search(struct extent_buffer *eb,
-  unsigned long p,
-  int item_size, struct btrfs_key *key,
+  unsigned long p, int item_size,
+  const struct btrfs_key *key,
   int max, int *slot)
 {
int low = 0;
@@ -1802,7 +1803,7 @@ static noinline int generic_bin_search(struct 
extent_buffer *eb,
  * simple bin_search frontend that does the right thing for
  * leaves vs nodes
  */
-static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
+static int bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
  int level, int *slot)
 {
if (level == 0)
@@ -1819,7 +1820,7 @@ static int bin_search(struct extent_buffer *eb, struct 
btrfs_key *key,
  slot);
 }
 
-int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
+int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
 int level, int *slot)
 {
return bin_search(eb, key, level, slot);
@@ -2440,7 +2441,7 @@ static int
 read_block_for_search(struct btrfs_trans_handle *trans,
   struct btrfs_root *root, struct btrfs_path *p,
   struct extent_buffer **eb_ret, int level, int slot,
-  struct btrfs_key *key, u64 time_seq)
+  const struct btrfs_key *key, u64 time_seq)
 {
struct btrfs_fs_info *fs_info = root->fs_info;
u64 blocknr;
@@ -2587,7 +2588,7 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans,
 }
 
 static void key_search_validate(struct extent_buffer *b,
-   struct btrfs_key *key,
+   const struct btrfs_key *key,
int level)
 {
 #ifdef CONFIG_BTRFS_ASSERT
@@ -2606,7 +2607,7 @@ static void key_search_validate(struct extent_buffer *b,
 #endif
 }
 
-static int key_search(struct extent_buffer *b, struct btrfs_key *key,
+static int key_search(struct extent_buffer *b, const struct btrfs_key *key,
  int level, int *prev_cmp, int *slot)
 {
if (*prev_cmp != 0) {
@@ -2668,9 +2669,9 @@ int btrfs_find_item(struct btrfs_root *fs_root, struct 
btrfs_path *path,
  * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
  * possible)
  */
-int btrfs_search_slot(struct btrfs_trans_handle 

gdb log of crashed "btrfs-image -s"

2017-01-17 Thread Christoph Groth

Christoph Groth wrote:

Chris Murphy wrote:

On Tue, Jan 17, 2017 at 1:25 PM, Christoph Groth
 wrote:
Any ideas on what could be done?  If you need help to debug 
the problem with
btrfs-image, please tell me what I should do.  I can keep the 
broken file
system around until an image can be created at some later 
time.


Try 4.9, or even 4.8.5, tons of bugs have been fixed since 
4.7.3
although I don't know off hand if this particular bug is 
fixed. I did
recently do a btrfs-image with btrfs-progs v4.9 with -s and did 
not

get a segfault.


I compiled btrfs-image.static from btrfs-tools 4.9 (from git) 
and started it from Debian testing's initramfs.  The exact 
command that I use is:


/mnt/btrfs-image.static -c3 -s /dev/sda2 /mnt/mim-s.bim

It runs for a couple of seconds (enough to write 20263936 bytes 
of output) and then quits with


*** Error in `/mnt/btrfs-image.static`: double free or 
corruption  (!prev): 0x009f0940 ***

== Backtrace: ==
[0x45fb97]
[0x465442]
[0x465c1e]
[0x402694]
[0x402dcb]
[0x4031fe]
[0x4050ff]
[0x405783]
[0x44cb73]
[0x44cdfe]
[0x400b2a]

(I had to type the above off the other screen, but I double 
checked that there are no errors.)


The executable that I used can be downloaded from 
http://groth.fr/btrfs-image.static

Its md5sum is 48abbc82ac6d3c0cb88cba1e5edb85fd.

I hope that this can help someone to see what's going on.


I ran the same executable under gdb from a live system.  The log 
is attached.




btrfs-image.log
Description: Binary data


signature.asc
Description: PGP signature


Re: [PATCH] xfstests: btrfs/047: check btrfs-convert with extent and non-extent source

2017-01-17 Thread Eryu Guan
On Wed, Jan 18, 2017 at 07:17:02AM +0530, Lakshmipathi.G wrote:
> Signed-off-by: Lakshmipathi.G 

Need detailed test description in commit log too.

> ---
>  tests/btrfs/047 | 108 
> 
>  tests/btrfs/047.out |   1 +
>  tests/btrfs/group   |   1 +
>  3 files changed, 110 insertions(+)
>  create mode 100755 tests/btrfs/047
>  create mode 100644 tests/btrfs/047.out
> 
> diff --git a/tests/btrfs/047 b/tests/btrfs/047
> new file mode 100755
> index 000..0c4b2c7
> --- /dev/null
> +++ b/tests/btrfs/047
> @@ -0,0 +1,108 @@
> +#! /bin/bash
> +# FS QA Test 047
> +#
> +# Test btrfs-convert
> +# 

Trailing whitespace in above line.

> +# 1) create ext3 filesystem & populate it.
> +# 2) update ext3 filesystem to ext4.
> +# 3) populate data.
> +# 4) source has combination of non-extent and extent files.
> +# 5) convert it btrfs, mount and verify contents.
> +#---
> +# Copyright (c) 2017 Lakshmipathi.G  All Rights Reserved.
> +#
> +# This program is free software; you can redistribute it and/or
> +# modify it under the terms of the GNU General Public License as
> +# published by the Free Software Foundation.
> +#
> +# This program is distributed in the hope that it would be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program; if not, write the Free Software Foundation,
> +# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
> +#---
> +#
> +
> +seq=`basename $0`
> +seqres=$RESULT_DIR/$seq
> +echo "QA output created by $seq"
> +
> +here=`pwd`
> +tmp=/tmp/$$
> +status=1 # failure is the default!
> +trap "_cleanup; exit \$status" 0 1 2 3 15
> +
> +_cleanup()
> +{
> + cd /
> + rm -f $tmp.*
> +}
> +
> +# get standard environment, filters and checks
> +. ./common/rc
> +. ./common/filter
> +
> +# remove previous $seqres.full before test
> +rm -f $seqres.full
> +
> +# real QA test starts here
> +
> +# Modify as appropriate.
> +_supported_fs btrfs
> +_supported_os Linux
> +_require_scratch_nocheck
> +
> +BTRFS_CONVERT_PROG="`set_prog_path btrfs-convert`"
> +E2FSCK_PROG="`set_prog_path e2fsck`"
> +TUNE2FS_PROG="`set_prog_path tune2fs`"

These should go to common/config. Can you please update btrfs/012 as
well to move such defines to common/config?

> +
> +_require_command "$BTRFS_CONVERT_PROG" btrfs-convert
> +_require_command "$MKFS_EXT4_PROG" mkfs.ext4
> +_require_command "$E2FSCK_PROG" e2fsck
> +_require_command "$TUNE2FS_PROG" tune2fs
> +
> +rm -f $seqres.full
> +
> +BLOCK_SIZE=`_get_block_size $TEST_DIR`
> +
> +# Create & populate an ext3 filesystem
> +$MKFS_EXT4_PROG -t ext3 -b $BLOCK_SIZE $SCRATCH_DEV > $seqres.full 2>&1 || \
> + _notrun "Could not create ext3 filesystem"

Better to add "-F" option to mkfs to force mkfs so it won't stop when
there's an existing fs on SCRATCH_DEV.

> +
> +# mount and populate non-extent file
> +mount -t ext3 $SCRATCH_DEV $SCRATCH_MNT
> +dd if=/dev/urandom of=$SCRATCH_MNT/f1.txt bs=1MB count=10 >> $seqres.full 
> 2>&1
> +NON_EXTENT_MD5=`md5sum $SCRATCH_MNT/f1.txt  | awk '{print $1}' `

Better to have different files with different file sizes and different
types, e.g. run fsstress to create such a fs structure.

> +_scratch_unmount
> +
> +# Upgrade it to ext4.
> +$TUNE2FS_PROG -O extents,uninit_bg,dir_index $SCRATCH_DEV >> $seqres.full 
> 2>&1
> +$E2FSCK_PROG -fyD $SCRATCH_DEV >> $seqres.full 2>&1

Why is this e2fsck needed? Add some comments? Or it just can be removed?

> +
> +# mount and populate extent file
> +mount -t ext4 $SCRATCH_DEV $SCRATCH_MNT
> +dd if=/dev/urandom of=$SCRATCH_MNT/f2.txt bs=1MB count=10 >> $seqres.full 
> 2>&1
> +EXTENT_MD5=`md5sum $SCRATCH_MNT/f2.txt  | awk '{print $1}'`
> +_scratch_unmount
> +
> +# Convert non-extent & extent data to btrfs, mount it, verify the data
> +$BTRFS_CONVERT_PROG $SCRATCH_DEV >> $seqres.full 2>&1 || \
> + _fail "btrfs-convert failed"
> +_scratch_mount || _fail "Could not mount new btrfs fs"
> +
> +F1_MD5=`md5sum $SCRATCH_MNT/f1.txt  | awk '{print $1}'`
> +F2_MD5=`md5sum $SCRATCH_MNT/f2.txt  | awk '{print $1}'`
> +if [ $NON_EXTENT_MD5 != $F1_MD5 ] ; then 

Trailing whitespace in above line.

> +_fail "ext3 file mismatch."

No need to _fail, just echo this message to break golden image.

And need indention inside "if-then-fi"

> +fi
> +
> +if [ $EXTENT_MD5 != $F2_MD5 ] ; then 

Trailing whitespace.

> +_fail "ext4 file mismatch."

Same here. Use echo and indention.

> +fi
> +   

Trailing whitespace.

> +# success, all done
> +status=0
> +exit
> diff --git a/tests/btrfs/047.out b/tests/btrfs/047.out
> new file mode 100644
> index 000..58e2353
> 

[PATCH] btrfs-progs: lowmem-check: Fix false alert on dropped leaf

2017-01-17 Thread Qu Wenruo
For btrfs-progs test case 021-partially-dropped-snapshot-case, if the
first leaf is already dropped, btrfs check low-memory mode will report
false alert:

checking fs roots
checksum verify failed on 29917184 found E4E3BDB6 wanted 
checksum verify failed on 29917184 found E4E3BDB6 wanted 
checksum verify failed on 29917184 found E4E3BDB6 wanted 
checksum verify failed on 29917184 found E4E3BDB6 wanted 

This is caused by we are calling check_fs_first_inode() function,
unlike the rest part of check_fs_root_v2(), it doesn't have enough check
on dropping progress, and caused the false alert.

Fix it by checking dropping progress before searching slot.

Signed-off-by: Qu Wenruo 
---
 cmds-check.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/cmds-check.c b/cmds-check.c
index 1dba2985..25247fd9 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -4939,11 +4939,18 @@ static int check_fs_first_inode(struct btrfs_root 
*root, unsigned int ext_ref)
int err = 0;
int ret;
 
-   btrfs_init_path();
key.objectid = BTRFS_FIRST_FREE_OBJECTID;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
 
+   /* For root being dropped, we don't need to check first inode */
+   if (btrfs_root_refs(>root_item) == 0 &&
+   btrfs_disk_key_objectid(>root_item.drop_progress) >=
+   key.objectid)
+   return 0;
+
+   btrfs_init_path();
+
ret = btrfs_search_slot(NULL, root, , , 0, 0);
if (ret < 0)
goto out;
-- 
2.11.0



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3] btrfs-progs: Fix disable backtrace assert error

2017-01-17 Thread Qu Wenruo
Due to commit 00e769d04c2c83029d6c71(btrfs-progs: Correct value printed
by assertions/BUG_ON/WARN_ON), which changed the assert_trace()
parameter, the condition passed to assert/WARN_ON/BUG_ON are logical
notted for backtrace enabled and disabled case.

Such behavior makes us easier to pass value wrong, and in fact it did
cause us to pass wrong condition for ASSERT().

Instead of passing different conditions for ASSERT/WARN_ON/BUG_ON()
manually, this patch will use ASSERT() to implement the resting
ASSERT/WARN_ON/BUG(), so we don't need to pass 3 different conditions
but only one.

Also, move WARN_ON() out of the ifdef branch, as it's completely the
same for both branches.

Cc: Goldwyn Rodrigues 
Signed-off-by: Qu Wenruo 
---
Sorry for late update, being digging the dev-replace/scrub bug

v2:
  Keep ASSERT() outputing meaningful error string, use ASSERT() to
  implement BUG_ON() so only the abused BUG_ON() output is affected.
  Suggested by David.
v3:
  Update commit message, since we use ASSERT() instead of BUG_ON() as
  main assert function now.
---
 kerncompat.h | 13 +
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/kerncompat.h b/kerncompat.h
index 19ed3fc0..fe23774e 100644
--- a/kerncompat.h
+++ b/kerncompat.h
@@ -291,18 +291,15 @@ static inline void assert_trace(const char *assertion, 
const char *filename,
abort();
exit(1);
 }
-
-#define BUG_ON(c) assert_trace(#c, __FILE__, __func__, __LINE__, (long)(c))
-#define WARN_ON(c) warning_trace(#c, __FILE__, __func__, __LINE__, (long)(c))
 #defineASSERT(c) assert_trace(#c, __FILE__, __func__, __LINE__, 
(long)!(c))
-#define BUG() assert_trace(NULL, __FILE__, __func__, __LINE__, 1)
 #else
-#define BUG_ON(c) assert(!(c))
-#define WARN_ON(c) warning_trace(#c, __FILE__, __func__, __LINE__, (long)(c))
-#define ASSERT(c) assert(!(c))
-#define BUG() assert(0)
+#define ASSERT(c) assert(c)
 #endif
 
+#define BUG_ON(c) ASSERT(!(c))
+#define BUG() BUG_ON(1)
+#define WARN_ON(c) warning_trace(#c, __FILE__, __func__, __LINE__, (long)(c))
+
 #define container_of(ptr, type, member) ({  \
 const typeof( ((type *)0)->member ) *__mptr = (ptr);\
(type *)( (char *)__mptr - offsetof(type,member) );})
-- 
2.11.0



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] btrfs-progs: Fix disable backtrace assert error

2017-01-17 Thread Qu Wenruo
Due to commit 00e769d04c2c83029d6c71(btrfs-progs: Correct value printed
by assertions/BUG_ON/WARN_ON), which changed the assert_trace()
parameter, the condition passed to assert/WARN_ON/BUG_ON are logical
notted for backtrace enabled and disabled case.

Such behavior makes us easier to pass value wrong, and in fact it did
cause us to pass wrong condition for ASSERT().

Instead of passing different conditions for ASSERT/WARN_ON/BUG_ON()
manually, this patch will use BUG_ON() to implement the resting
ASSERT/WARN_ON/BUG(), so we don't need to pass 3 different conditions
but only one.

And to further info the review for the fact that the condition should be
different, rename "assert_trace" to "bugon_trace", as unlike assert, we
will only trigger the bug when condition is true.

Also, move WARN_ON() out of the ifdef branch, as it's completely the
same for both branches.

Cc: Goldwyn Rodrigues 
Signed-off-by: Qu Wenruo 
---
Sorry for late update, being digging the dev-replace/scrub bug

v2:
  Keep ASSERT() outputing meaningful error string, use ASSERT() to
  implement BUG_ON() so only the abused BUG_ON() output is affected.
  Suggested by David.
---
 kerncompat.h | 13 +
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/kerncompat.h b/kerncompat.h
index 19ed3fc0..fe23774e 100644
--- a/kerncompat.h
+++ b/kerncompat.h
@@ -291,18 +291,15 @@ static inline void assert_trace(const char *assertion, 
const char *filename,
abort();
exit(1);
 }
-
-#define BUG_ON(c) assert_trace(#c, __FILE__, __func__, __LINE__, (long)(c))
-#define WARN_ON(c) warning_trace(#c, __FILE__, __func__, __LINE__, (long)(c))
 #defineASSERT(c) assert_trace(#c, __FILE__, __func__, __LINE__, 
(long)!(c))
-#define BUG() assert_trace(NULL, __FILE__, __func__, __LINE__, 1)
 #else
-#define BUG_ON(c) assert(!(c))
-#define WARN_ON(c) warning_trace(#c, __FILE__, __func__, __LINE__, (long)(c))
-#define ASSERT(c) assert(!(c))
-#define BUG() assert(0)
+#define ASSERT(c) assert(c)
 #endif
 
+#define BUG_ON(c) ASSERT(!(c))
+#define BUG() BUG_ON(1)
+#define WARN_ON(c) warning_trace(#c, __FILE__, __func__, __LINE__, (long)(c))
+
 #define container_of(ptr, type, member) ({  \
 const typeof( ((type *)0)->member ) *__mptr = (ptr);\
(type *)( (char *)__mptr - offsetof(type,member) );})
-- 
2.11.0



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] btrfs-progs: Fix disable backtrace assert error

2017-01-17 Thread Qu Wenruo
Due to commit 00e769d04c2c83029d6c71(btrfs-progs: Correct value printed
by assertions/BUG_ON/WARN_ON), which changed the assert_trace()
parameter, the condition passed to assert/WARN_ON/BUG_ON are logical
notted for backtrace enabled and disabled case.

Such behavior makes us easier to pass value wrong, and in fact it did
cause us to pass wrong condition for ASSERT().

Instead of passing different conditions for ASSERT/WARN_ON/BUG_ON()
manually, this patch will use BUG_ON() to implement the resting
ASSERT/WARN_ON/BUG(), so we don't need to pass 3 different conditions
but only one.

And to further info the review for the fact that the condition should be
different, rename "assert_trace" to "bugon_trace", as unlike assert, we
will only trigger the bug when condition is true.

Also, move WARN_ON() out of the ifdef branch, as it's completely the
same for both branches.

Cc: Goldwyn Rodrigues 
Signed-off-by: Qu Wenruo 
---
Sorry for late update, being digging the dev-replace/scrub bug

v2:
  Keep ASSERT() outputing meaningful error string, use ASSERT() to
  implement BUG_ON() so only the abused BUG_ON() output is affected.
  Suggested by David.
---
 kerncompat.h | 13 +
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/kerncompat.h b/kerncompat.h
index 19ed3fc0..fe23774e 100644
--- a/kerncompat.h
+++ b/kerncompat.h
@@ -291,18 +291,15 @@ static inline void assert_trace(const char *assertion, 
const char *filename,
abort();
exit(1);
 }
-
-#define BUG_ON(c) assert_trace(#c, __FILE__, __func__, __LINE__, (long)(c))
-#define WARN_ON(c) warning_trace(#c, __FILE__, __func__, __LINE__, (long)(c))
 #defineASSERT(c) assert_trace(#c, __FILE__, __func__, __LINE__, 
(long)!(c))
-#define BUG() assert_trace(NULL, __FILE__, __func__, __LINE__, 1)
 #else
-#define BUG_ON(c) assert(!(c))
-#define WARN_ON(c) warning_trace(#c, __FILE__, __func__, __LINE__, (long)(c))
-#define ASSERT(c) assert(!(c))
-#define BUG() assert(0)
+#define ASSERT(c) assert(c)
 #endif
 
+#define BUG_ON(c) ASSERT(!(c))
+#define BUG() BUG_ON(1)
+#define WARN_ON(c) warning_trace(#c, __FILE__, __func__, __LINE__, (long)(c))
+
 #define container_of(ptr, type, member) ({  \
 const typeof( ((type *)0)->member ) *__mptr = (ptr);\
(type *)( (char *)__mptr - offsetof(type,member) );})
-- 
2.11.0



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] xfstests: btrfs/047: check btrfs-convert with extent and non-extent source

2017-01-17 Thread Lakshmipathi.G
Signed-off-by: Lakshmipathi.G 
---
 tests/btrfs/047 | 108 
 tests/btrfs/047.out |   1 +
 tests/btrfs/group   |   1 +
 3 files changed, 110 insertions(+)
 create mode 100755 tests/btrfs/047
 create mode 100644 tests/btrfs/047.out

diff --git a/tests/btrfs/047 b/tests/btrfs/047
new file mode 100755
index 000..0c4b2c7
--- /dev/null
+++ b/tests/btrfs/047
@@ -0,0 +1,108 @@
+#! /bin/bash
+# FS QA Test 047
+#
+# Test btrfs-convert
+# 
+# 1) create ext3 filesystem & populate it.
+# 2) update ext3 filesystem to ext4.
+# 3) populate data.
+# 4) source has combination of non-extent and extent files.
+# 5) convert it btrfs, mount and verify contents.
+#---
+# Copyright (c) 2017 Lakshmipathi.G  All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#---
+#
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1   # failure is the default!
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+_cleanup()
+{
+   cd /
+   rm -f $tmp.*
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+
+# remove previous $seqres.full before test
+rm -f $seqres.full
+
+# real QA test starts here
+
+# Modify as appropriate.
+_supported_fs btrfs
+_supported_os Linux
+_require_scratch_nocheck
+
+BTRFS_CONVERT_PROG="`set_prog_path btrfs-convert`"
+E2FSCK_PROG="`set_prog_path e2fsck`"
+TUNE2FS_PROG="`set_prog_path tune2fs`"
+
+_require_command "$BTRFS_CONVERT_PROG" btrfs-convert
+_require_command "$MKFS_EXT4_PROG" mkfs.ext4
+_require_command "$E2FSCK_PROG" e2fsck
+_require_command "$TUNE2FS_PROG" tune2fs
+
+rm -f $seqres.full
+
+BLOCK_SIZE=`_get_block_size $TEST_DIR`
+
+# Create & populate an ext3 filesystem
+$MKFS_EXT4_PROG -t ext3 -b $BLOCK_SIZE $SCRATCH_DEV > $seqres.full 2>&1 || \
+   _notrun "Could not create ext3 filesystem"
+
+# mount and populate non-extent file
+mount -t ext3 $SCRATCH_DEV $SCRATCH_MNT
+dd if=/dev/urandom of=$SCRATCH_MNT/f1.txt bs=1MB count=10 >> $seqres.full 2>&1
+NON_EXTENT_MD5=`md5sum $SCRATCH_MNT/f1.txt  | awk '{print $1}' `
+_scratch_unmount
+
+# Upgrade it to ext4.
+$TUNE2FS_PROG -O extents,uninit_bg,dir_index $SCRATCH_DEV >> $seqres.full 2>&1
+$E2FSCK_PROG -fyD $SCRATCH_DEV >> $seqres.full 2>&1
+
+# mount and populate extent file
+mount -t ext4 $SCRATCH_DEV $SCRATCH_MNT
+dd if=/dev/urandom of=$SCRATCH_MNT/f2.txt bs=1MB count=10 >> $seqres.full 2>&1
+EXTENT_MD5=`md5sum $SCRATCH_MNT/f2.txt  | awk '{print $1}'`
+_scratch_unmount
+
+# Convert non-extent & extent data to btrfs, mount it, verify the data
+$BTRFS_CONVERT_PROG $SCRATCH_DEV >> $seqres.full 2>&1 || \
+   _fail "btrfs-convert failed"
+_scratch_mount || _fail "Could not mount new btrfs fs"
+
+F1_MD5=`md5sum $SCRATCH_MNT/f1.txt  | awk '{print $1}'`
+F2_MD5=`md5sum $SCRATCH_MNT/f2.txt  | awk '{print $1}'`
+if [ $NON_EXTENT_MD5 != $F1_MD5 ] ; then 
+_fail "ext3 file mismatch."
+fi
+
+if [ $EXTENT_MD5 != $F2_MD5 ] ; then 
+_fail "ext4 file mismatch."
+fi
+   
+# success, all done
+status=0
+exit
diff --git a/tests/btrfs/047.out b/tests/btrfs/047.out
new file mode 100644
index 000..58e2353
--- /dev/null
+++ b/tests/btrfs/047.out
@@ -0,0 +1 @@
+QA output created by 047
diff --git a/tests/btrfs/group b/tests/btrfs/group
index 3fbf706..224a082 100644
--- a/tests/btrfs/group
+++ b/tests/btrfs/group
@@ -49,6 +49,7 @@
 044 auto quick send
 045 auto quick send
 046 auto quick send
+047 auto convert
 048 auto quick
 049 auto quick
 050 auto quick send
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/2] btrfs-progs: quota: fix printing during wait mode

2017-01-17 Thread jeffm
From: Jeff Mahoney 

If we call "btrfs quota rescan -w", it will attempt to start the rescan
operation, wait for it, and then print the "quota rescan started" message.
The wait could last an arbitrary amount of time, so printing it after
the wait isn't very helpful.

This patch reworks how we print the rescan started message as well as the
printing of the messages, including adding an error message for status
query failures (which could be EPERM/EFAULT/ENOMEM, not just no rescan
in progress) and wait failures.

Signed-off-by: Jeff Mahoney 
---
 cmds-quota.c | 40 +++-
 1 file changed, 27 insertions(+), 13 deletions(-)

diff --git a/cmds-quota.c b/cmds-quota.c
index 75c032b..f9b422d 100644
--- a/cmds-quota.c
+++ b/cmds-quota.c
@@ -154,28 +154,42 @@ static int cmd_quota_rescan(int argc, char **argv)
ret = ioctl(fd, ioctlnum, );
e = errno;
 
-   if (wait_for_completion && (ret == 0 || e == EINPROGRESS)) {
-   ret = ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_WAIT, );
-   e = errno;
-   }
-   close_file_or_dir(fd, dirstream);
-
-   if (ioctlnum == BTRFS_IOC_QUOTA_RESCAN) {
+   if (ioctlnum == BTRFS_IOC_QUOTA_RESCAN_STATUS) {
+   close_file_or_dir(fd, dirstream);
if (ret < 0) {
-   error("quota rescan failed: %s", strerror(e));
+   error("could not obtain quota rescan status: %s",
+ strerror(e));
return 1;
-   }  else {
-   printf("quota rescan started\n");
}
-   } else {
-   if (!args.flags) {
+   if (!args.flags)
printf("no rescan operation in progress\n");
-   } else {
+   else
printf("rescan operation running (current key %lld)\n",
args.progress);
+   return 0;
+   }
+
+   if (ret == 0) {
+   printf("quota rescan started\n");
+   fflush(stdout);
+   } else if (ret < 0 && (!wait_for_completion || e != EINPROGRESS)) {
+   error("quota rescan failed: %s", strerror(e));
+   close_file_or_dir(fd, dirstream);
+   return 1;
+   }
+
+   if (wait_for_completion) {
+   ret = ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_WAIT, );
+   e = errno;
+   if (ret < 0) {
+   error("quota rescan wait failed: %s",
+ strerror(e));
+   close_file_or_dir(fd, dirstream);
+   return 1;
}
}
 
+   close_file_or_dir(fd, dirstream);
return 0;
 }
 
-- 
2.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/2] btrfs-progs: quota: Add -W option to rescan to wait without starting rescan

2017-01-17 Thread jeffm
From: Jeff Mahoney 

This patch adds a new -W option to wait for a rescan without starting a
new operation.  This is useful for things like xfstests where we want
do to do a "btrfs quota enable" and not continue until the subsequent
rescan has finished.

In addition to documenting the new option in the man page, I've cleaned
up the rescan entry to document the -w option a bit better.

Signed-off-by: Jeff Mahoney 
---
 Documentation/btrfs-quota.asciidoc | 10 +++---
 cmds-quota.c   | 21 +++--
 2 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/Documentation/btrfs-quota.asciidoc 
b/Documentation/btrfs-quota.asciidoc
index 33c3bfd..7b29a97 100644
--- a/Documentation/btrfs-quota.asciidoc
+++ b/Documentation/btrfs-quota.asciidoc
@@ -222,15 +222,19 @@ Disable subvolume quota support for a filesystem.
 *enable* ::
 Enable subvolume quota support for a filesystem.
 
-*rescan* [-s] ::
+*rescan* [-s|-w|-W] ::
 Trash all qgroup numbers and scan the metadata again with the current config.
 +
 `Options`
 +
 -s
-show status of a running rescan operation.
+Show status of a running rescan operation.
+
 -w
-wait for rescan operation to finish(can be already in progress).
+Start rescan operation and wait until it has finished before exiting.  If a 
rescan is already running, wait until it finishes and then exit without 
starting a new one.
+
+-W
+Wait for rescan operation to finish and then exit.  If a rescan is not already 
running, exit silently.
 
 EXIT STATUS
 ---
diff --git a/cmds-quota.c b/cmds-quota.c
index f9b422d..a6df839 100644
--- a/cmds-quota.c
+++ b/cmds-quota.c
@@ -121,14 +121,20 @@ static int cmd_quota_rescan(int argc, char **argv)
int wait_for_completion = 0;
 
while (1) {
-   int c = getopt(argc, argv, "sw");
+   int c = getopt(argc, argv, "swW");
if (c < 0)
break;
switch (c) {
case 's':
ioctlnum = BTRFS_IOC_QUOTA_RESCAN_STATUS;
break;
+   case 'W':
+   ioctlnum = 0;
+   wait_for_completion = 1;
+   break;
case 'w':
+   /* Reset it in case the user did both -W and -w */
+   ioctlnum = BTRFS_IOC_QUOTA_RESCAN;
wait_for_completion = 1;
break;
default:
@@ -136,8 +142,9 @@ static int cmd_quota_rescan(int argc, char **argv)
}
}
 
-   if (ioctlnum != BTRFS_IOC_QUOTA_RESCAN && wait_for_completion) {
-   error("switch -w cannot be used with -s");
+   if (ioctlnum == BTRFS_IOC_QUOTA_RESCAN_STATUS && wait_for_completion) {
+   error("switch -%c cannot be used with -s",
+ ioctlnum ? 'w' : 'W');
return 1;
}
 
@@ -151,8 +158,10 @@ static int cmd_quota_rescan(int argc, char **argv)
if (fd < 0)
return 1;
 
-   ret = ioctl(fd, ioctlnum, );
-   e = errno;
+   if (ioctlnum) {
+   ret = ioctl(fd, ioctlnum, );
+   e = errno;
+   }
 
if (ioctlnum == BTRFS_IOC_QUOTA_RESCAN_STATUS) {
close_file_or_dir(fd, dirstream);
@@ -169,7 +178,7 @@ static int cmd_quota_rescan(int argc, char **argv)
return 0;
}
 
-   if (ret == 0) {
+   if (ioctlnum == BTRFS_IOC_QUOTA_RESCAN && ret == 0) {
printf("quota rescan started\n");
fflush(stdout);
} else if (ret < 0 && (!wait_for_completion || e != EINPROGRESS)) {
-- 
2.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: corruption: yet another one after deleting a ro snapshot

2017-01-17 Thread Christoph Anton Mitterer
On Wed, 2017-01-18 at 08:41 +0800, Qu Wenruo wrote:
> Since we have your extent tree and root tree dump, I think we should
> be 
> able to build a image to reproduce the case.
+1

> BTW, your fs is too large for us to really do some verification or
> other 
> work.

Sure I know... but that's simply the one which I work the most with and
where I stumble over such things.

I have e.g. a smaller one (well still 1TB in total 500GB used) which is
the root-fs from my notebook... but not really any issues with that so
far ^^


Cheers,
Chris.

smime.p7s
Description: S/MIME cryptographic signature


Re: [PATCH 2/2] btrfs: replace: Use ref counts to avoid destroying target device when canceled

2017-01-17 Thread Qu Wenruo



At 01/18/2017 06:47 AM, Josef Bacik wrote:

On Mon, Jan 16, 2017 at 5:10 PM, Qu Wenruo  wrote:

When dev-replace and scrub are run at the same time, dev-replace can be
canceled by scrub. It's quite common for btrfs/069.

While in that case, target device can be destroyed at cancel time,
leading to a user-after-free bug:

 Process A (dev-replace) | Process B(scrub)
--
 |(Any RW is OK)
 |scrub_setup_recheck_block()
 ||- btrfs_map_sblock()
 |   Got a bbio with tgtdev
btrfs_dev_replace_finishing()|
|- btrfs_destory_dev_replace_tgtdev()|
   |- call_rcu(free_device)  |
  |- __free_device() |
 |- kfree(device)|
 | Scrub worker:
 | Access bbio->stripes[], which
 | contains tgtdev.
 | This triggers general protection.

The bug is mostly obvious for RAID5/6 since raid56 choose to keep old
rbio and rbio->bbio for later steal, this hugely enlarged the race
window and makes it much easier to trigger the bug.

This patch introduces 'tgtdev_refs' and 'tgtdev_wait' for btrfs_device
to wait for all its user released the target device.

Signed-off-by: Qu Wenruo 
---
 fs/btrfs/dev-replace.c |  7 ++-
 fs/btrfs/volumes.c | 36 +++-
 fs/btrfs/volumes.h | 10 ++
 3 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 5de280b9ad73..794a6a0bedf2 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -558,7 +558,6 @@ static int btrfs_dev_replace_finishing(struct
btrfs_fs_info *fs_info,
   rcu_str_deref(src_device->name),
   src_device->devid,
   rcu_str_deref(tgt_device->name));
-tgt_device->is_tgtdev_for_dev_replace = 0;
 tgt_device->devid = src_device->devid;
 src_device->devid = BTRFS_DEV_REPLACE_DEVID;
 memcpy(uuid_tmp, tgt_device->uuid, sizeof(uuid_tmp));
@@ -579,6 +578,12 @@ static int btrfs_dev_replace_finishing(struct
btrfs_fs_info *fs_info,

 btrfs_dev_replace_unlock(dev_replace, 1);

+/*
+ * Only change is_tgtdev_for_dev_replace flag after all its
+ * users get released.
+ */
+wait_target_device(tgt_device);
+tgt_device->is_tgtdev_for_dev_replace = 0;
 btrfs_rm_dev_replace_blocked(fs_info);

 btrfs_rm_dev_replace_remove_srcdev(fs_info, src_device);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index bb8592e1a364..74a6ee981b78 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2064,6 +2064,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct
btrfs_fs_info *fs_info,
 WARN_ON(!tgtdev);
 mutex_lock(_info->fs_devices->device_list_mutex);

+wait_target_device(tgtdev);
 btrfs_sysfs_rm_device_link(fs_info->fs_devices, tgtdev);

 if (tgtdev->bdev)
@@ -2598,6 +2599,8 @@ int btrfs_init_dev_replace_tgtdev(struct
btrfs_fs_info *fs_info,
 device->is_tgtdev_for_dev_replace = 1;
 device->mode = FMODE_EXCL;
 device->dev_stats_valid = 1;
+atomic_set(>tgtdev_refs, 0);
+init_waitqueue_head(>tgtdev_wait);
 set_blocksize(device->bdev, 4096);
 device->fs_devices = fs_info->fs_devices;
 list_add(>dev_list, _info->fs_devices->devices);
@@ -2624,6 +2627,8 @@ void
btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info,
 tgtdev->sector_size = sectorsize;
 tgtdev->fs_info = fs_info;
 tgtdev->in_fs_metadata = 1;
+atomic_set(>tgtdev_refs, 0);
+init_waitqueue_head(>tgtdev_wait);
 }

 static noinline int btrfs_update_device(struct btrfs_trans_handle
*trans,
@@ -5302,6 +5307,32 @@ static struct btrfs_bio *alloc_btrfs_bio(int
total_stripes, int real_stripes)
 return bbio;
 }

+static void pin_bbio_target_device(struct btrfs_bio *bbio)
+{
+int i;
+
+for (i = 0; i < bbio->num_stripes; i++) {
+struct btrfs_device *device = bbio->stripes[i].dev;
+
+if (device->is_tgtdev_for_dev_replace)
+atomic_inc(>tgtdev_refs);
+}
+}


Can we just do this at the map time?  So when we add a new stripe we go
ahead and take the ref then, and the same at complete time?  Thanks,

Josef


Thanks for the review.

But I'm not quite sure what you mean here.

This pin_bbio_target_device() is called inside the dev_replace lock 
protection, so it's called at map time.


And we must call it after patching mirror inside __btrfs_map_block(), so 
I can't find a better timing to call it.


Would you please give me some hint?

Thanks,
Qu


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org

Re: corruption: yet another one after deleting a ro snapshot

2017-01-17 Thread Qu Wenruo



At 01/17/2017 06:39 PM, Christoph Anton Mitterer wrote:

Am 17. Januar 2017 09:53:19 MEZ schrieb Qu Wenruo :

Just lowmem false alert, as extent-tree dump shows complete fine
result.

I'll CC you and adds your reported-by tag when there is any update on
this case.


Fine, just one thing left right more from my side on this issue:
Do you want me to leave the fs untouched until I could verify a lowmem mode fix?
Or is it ok to go on using it (and running backups on it)?

Cheers,
Chris.


Since we have your extent tree and root tree dump, I think we should be 
able to build a image to reproduce the case.


So you're OK to go on using it.
BTW, your fs is too large for us to really do some verification or other 
work.


Thanks,
Qu


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Unocorrectable errors with RAID1

2017-01-17 Thread Christoph Groth

Goldwyn Rodrigues wrote:

As Chris mentioned, try a later version. If you are familiar 
with git, you could even try the devel version.


Looking at the commits in current devel (2f4a73f9a612876116) since 
v4.9, there doesn't seem to be anything relevant, but I can retry, 
if you think it's worth.


signature.asc
Description: PGP signature


Re: Unocorrectable errors with RAID1

2017-01-17 Thread Christoph Groth

Chris Murphy wrote:

On Tue, Jan 17, 2017 at 1:25 PM, Christoph Groth
 wrote:
Any ideas on what could be done?  If you need help to debug the 
problem with
btrfs-image, please tell me what I should do.  I can keep the 
broken file

system around until an image can be created at some later time.


Try 4.9, or even 4.8.5, tons of bugs have been fixed since 4.7.3
although I don't know off hand if this particular bug is 
fixed. I did
recently do a btrfs-image with btrfs-progs v4.9 with -s and did 
not

get a segfault.


I compiled btrfs-image.static from btrfs-tools 4.9 (from git) and 
started it from Debian testing's initramfs.  The exact command 
that I use is:


/mnt/btrfs-image.static -c3 -s /dev/sda2 /mnt/mim-s.bim

It runs for a couple of seconds (enough to write 20263936 bytes of 
output) and then quits with


*** Error in `/mnt/btrfs-image.static`: double free or corruption 
   (!prev): 0x009f0940 ***

== Backtrace: ==
[0x45fb97]
[0x465442]
[0x465c1e]
[0x402694]
[0x402dcb]
[0x4031fe]
[0x4050ff]
[0x405783]
[0x44cb73]
[0x44cdfe]
[0x400b2a]

(I had to type the above off the other screen, but I double 
checked that there are no errors.)


The executable that I used can be downloaded from 
http://groth.fr/btrfs-image.static

Its md5sum is 48abbc82ac6d3c0cb88cba1e5edb85fd.

I hope that this can help someone to see what's going on.


signature.asc
Description: PGP signature


Re: Unocorrectable errors with RAID1

2017-01-17 Thread Goldwyn Rodrigues


On 01/17/2017 02:25 PM, Christoph Groth wrote:
> Goldwyn Rodrigues wrote:
>> On 01/17/2017 02:44 AM, Christoph Groth wrote:
>>> Goldwyn Rodrigues wrote:
>>>
 Would you be able to upload a btrfs-image for me to examine. This is a
 core ctree error where most probably item size is incorrectly
 registered.
>>>
>>> Sure, I can do that.  I'd like to use the -s option, will this be fine? 
>>
>> Yes, I think that should be fine.
> 
> Unfortunately, giving -s causes btrfs-image to segfault.  I tried both
> btrfs-progs 4.7.3 and 4.4.  I also tried different compression levels.
> 
> Without -s it works, but since this file system contains the complete
> digital life of our family, I would rather not share even the file names.
> 
> Any ideas on what could be done?  If you need help to debug the problem
> with btrfs-image, please tell me what I should do.  I can keep the
> broken file system around until an image can be created at some later time.

As Chris mentioned, try a later version. If you are familiar with git,
you could even try the devel version.

-- 
Goldwyn



signature.asc
Description: OpenPGP digital signature


Re: [PATCH 2/2] btrfs: replace: Use ref counts to avoid destroying target device when canceled

2017-01-17 Thread Josef Bacik
On Mon, Jan 16, 2017 at 5:10 PM, Qu Wenruo  
wrote:
When dev-replace and scrub are run at the same time, dev-replace can 
be

canceled by scrub. It's quite common for btrfs/069.

While in that case, target device can be destroyed at cancel time,
leading to a user-after-free bug:

 Process A (dev-replace) | Process B(scrub)
--
 |(Any RW is OK)
 |scrub_setup_recheck_block()
 ||- btrfs_map_sblock()
 |   Got a bbio with tgtdev
btrfs_dev_replace_finishing()|
|- btrfs_destory_dev_replace_tgtdev()|
   |- call_rcu(free_device)  |
  |- __free_device() |
 |- kfree(device)|
 | Scrub worker:
 | Access bbio->stripes[], which
 | contains tgtdev.
 | This triggers general 
protection.


The bug is mostly obvious for RAID5/6 since raid56 choose to keep old
rbio and rbio->bbio for later steal, this hugely enlarged the race
window and makes it much easier to trigger the bug.

This patch introduces 'tgtdev_refs' and 'tgtdev_wait' for btrfs_device
to wait for all its user released the target device.

Signed-off-by: Qu Wenruo 
---
 fs/btrfs/dev-replace.c |  7 ++-
 fs/btrfs/volumes.c | 36 +++-
 fs/btrfs/volumes.h | 10 ++
 3 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 5de280b9ad73..794a6a0bedf2 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -558,7 +558,6 @@ static int btrfs_dev_replace_finishing(struct 
btrfs_fs_info *fs_info,

  rcu_str_deref(src_device->name),
  src_device->devid,
  rcu_str_deref(tgt_device->name));
-   tgt_device->is_tgtdev_for_dev_replace = 0;
tgt_device->devid = src_device->devid;
src_device->devid = BTRFS_DEV_REPLACE_DEVID;
memcpy(uuid_tmp, tgt_device->uuid, sizeof(uuid_tmp));
@@ -579,6 +578,12 @@ static int btrfs_dev_replace_finishing(struct 
btrfs_fs_info *fs_info,


btrfs_dev_replace_unlock(dev_replace, 1);

+   /*
+* Only change is_tgtdev_for_dev_replace flag after all its
+* users get released.
+*/
+   wait_target_device(tgt_device);
+   tgt_device->is_tgtdev_for_dev_replace = 0;
btrfs_rm_dev_replace_blocked(fs_info);

btrfs_rm_dev_replace_remove_srcdev(fs_info, src_device);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index bb8592e1a364..74a6ee981b78 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2064,6 +2064,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct 
btrfs_fs_info *fs_info,

WARN_ON(!tgtdev);
mutex_lock(_info->fs_devices->device_list_mutex);

+   wait_target_device(tgtdev);
btrfs_sysfs_rm_device_link(fs_info->fs_devices, tgtdev);

if (tgtdev->bdev)
@@ -2598,6 +2599,8 @@ int btrfs_init_dev_replace_tgtdev(struct 
btrfs_fs_info *fs_info,

device->is_tgtdev_for_dev_replace = 1;
device->mode = FMODE_EXCL;
device->dev_stats_valid = 1;
+   atomic_set(>tgtdev_refs, 0);
+   init_waitqueue_head(>tgtdev_wait);
set_blocksize(device->bdev, 4096);
device->fs_devices = fs_info->fs_devices;
list_add(>dev_list, _info->fs_devices->devices);
@@ -2624,6 +2627,8 @@ void 
btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info 
*fs_info,

tgtdev->sector_size = sectorsize;
tgtdev->fs_info = fs_info;
tgtdev->in_fs_metadata = 1;
+   atomic_set(>tgtdev_refs, 0);
+   init_waitqueue_head(>tgtdev_wait);
 }

 static noinline int btrfs_update_device(struct btrfs_trans_handle 
*trans,
@@ -5302,6 +5307,32 @@ static struct btrfs_bio *alloc_btrfs_bio(int 
total_stripes, int real_stripes)

return bbio;
 }

+static void pin_bbio_target_device(struct btrfs_bio *bbio)
+{
+   int i;
+
+   for (i = 0; i < bbio->num_stripes; i++) {
+   struct btrfs_device *device = bbio->stripes[i].dev;
+
+   if (device->is_tgtdev_for_dev_replace)
+   atomic_inc(>tgtdev_refs);
+   }
+}


Can we just do this at the map time?  So when we add a new stripe we go 
ahead and take the ref then, and the same at complete time?  Thanks,


Josef

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHv2 06/24] btrfs: Make btrfs_del_dir_entries_in_log take btrfs_inode

2017-01-17 Thread Nikolay Borisov
Signed-off-by: Nikolay Borisov 
---
 fs/btrfs/inode.c|  2 +-
 fs/btrfs/tree-log.c | 10 +-
 fs/btrfs/tree-log.h |  2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 41b1e2ed63b4..ebfeabafe1b1 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4075,7 +4075,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle 
*trans,
}
 
ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
-  dir, index);
+  BTRFS_I(dir), index);
if (ret == -ENOENT)
ret = 0;
else if (ret)
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index df822908f2be..caa8d886b4ae 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3084,7 +3084,7 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle 
*trans,
 int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
 struct btrfs_root *root,
 const char *name, int name_len,
-struct inode *dir, u64 index)
+struct btrfs_inode *dir, u64 index)
 {
struct btrfs_root *log;
struct btrfs_dir_item *di;
@@ -3092,16 +3092,16 @@ int btrfs_del_dir_entries_in_log(struct 
btrfs_trans_handle *trans,
int ret;
int err = 0;
int bytes_del = 0;
-   u64 dir_ino = btrfs_ino(BTRFS_I(dir));
+   u64 dir_ino = btrfs_ino(dir);
 
-   if (BTRFS_I(dir)->logged_trans < trans->transid)
+   if (dir->logged_trans < trans->transid)
return 0;
 
ret = join_running_log_trans(root);
if (ret)
return 0;
 
-   mutex_lock(_I(dir)->log_mutex);
+   mutex_lock(>log_mutex);
 
log = root->log_root;
path = btrfs_alloc_path();
@@ -3176,7 +3176,7 @@ int btrfs_del_dir_entries_in_log(struct 
btrfs_trans_handle *trans,
 fail:
btrfs_free_path(path);
 out_unlock:
-   mutex_unlock(_I(dir)->log_mutex);
+   mutex_unlock(>log_mutex);
if (ret == -ENOSPC) {
btrfs_set_log_full_commit(root->fs_info, trans);
ret = 0;
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index 2bcbac7efa9c..6c2b316b28e0 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -72,7 +72,7 @@ int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
 int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
 struct btrfs_root *root,
 const char *name, int name_len,
-struct inode *dir, u64 index);
+struct btrfs_inode *dir, u64 index);
 int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
   struct btrfs_root *root,
   const char *name, int name_len,
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHv2 17/24] btrfs: Make log_new_dir_dentries take btrfs_inode

2017-01-17 Thread Nikolay Borisov
Signed-off-by: Nikolay Borisov 
---
 fs/btrfs/tree-log.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 38cda7869bf9..b0cc56fe86e9 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -5155,7 +5155,7 @@ struct btrfs_dir_list {
  */
 static int log_new_dir_dentries(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
-   struct inode *start_inode,
+   struct btrfs_inode *start_inode,
struct btrfs_log_ctx *ctx)
 {
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -5174,7 +5174,7 @@ static int log_new_dir_dentries(struct btrfs_trans_handle 
*trans,
btrfs_free_path(path);
return -ENOMEM;
}
-   dir_elem->ino = btrfs_ino(BTRFS_I(start_inode));
+   dir_elem->ino = btrfs_ino(start_inode);
list_add_tail(_elem->list, _list);
 
while (!list_empty(_list)) {
@@ -5368,7 +5368,7 @@ static int btrfs_log_all_parents(struct 
btrfs_trans_handle *trans,
ret = 1;
if (!ret && ctx && ctx->log_new_dentries)
ret = log_new_dir_dentries(trans, root,
-  dir_inode, ctx);
+  BTRFS_I(dir_inode), 
ctx);
iput(dir_inode);
if (ret)
goto out;
@@ -5531,7 +5531,7 @@ static int btrfs_log_inode_parent(struct 
btrfs_trans_handle *trans,
old_parent = parent;
}
if (log_dentries)
-   ret = log_new_dir_dentries(trans, root, orig_inode, ctx);
+   ret = log_new_dir_dentries(trans, root, BTRFS_I(orig_inode), 
ctx);
else
ret = 0;
 end_trans:
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHv2 22/24] btrfs: Make btrfs_log_inode take btrfs_inode

2017-01-17 Thread Nikolay Borisov
Signed-off-by: Nikolay Borisov 
---
 fs/btrfs/tree-log.c | 92 ++---
 1 file changed, 45 insertions(+), 47 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 1348ab5e3229..8c110d0e16c3 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -97,7 +97,7 @@
 #define LOG_WALK_REPLAY_ALL 3
 
 static int btrfs_log_inode(struct btrfs_trans_handle *trans,
-  struct btrfs_root *root, struct inode *inode,
+  struct btrfs_root *root, struct btrfs_inode *inode,
   int inode_only,
   const loff_t start,
   const loff_t end,
@@ -4589,7 +4589,7 @@ static int btrfs_check_ref_name_override(struct 
extent_buffer *eb,
  * This handles both files and directories.
  */
 static int btrfs_log_inode(struct btrfs_trans_handle *trans,
-  struct btrfs_root *root, struct inode *inode,
+  struct btrfs_root *root, struct btrfs_inode *inode,
   int inode_only,
   const loff_t start,
   const loff_t end,
@@ -4610,8 +4610,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle 
*trans,
int ins_start_slot = 0;
int ins_nr;
bool fast_search = false;
-   u64 ino = btrfs_ino(BTRFS_I(inode));
-   struct extent_map_tree *em_tree = _I(inode)->extent_tree;
+   u64 ino = btrfs_ino(inode);
+   struct extent_map_tree *em_tree = >extent_tree;
u64 logged_isize = 0;
bool need_log_inode_item = true;
 
@@ -4632,10 +4632,10 @@ static int btrfs_log_inode(struct btrfs_trans_handle 
*trans,
 
 
/* today the code can only do partial logging of directories */
-   if (S_ISDIR(inode->i_mode) ||
+   if (S_ISDIR(inode->vfs_inode.i_mode) ||
(!test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
-  _I(inode)->runtime_flags) &&
-inode_only >= LOG_INODE_EXISTS))
+  >runtime_flags) &&
+inode_only == LOG_INODE_EXISTS))
max_key.type = BTRFS_XATTR_ITEM_KEY;
else
max_key.type = (u8)-1;
@@ -4647,11 +4647,11 @@ static int btrfs_log_inode(struct btrfs_trans_handle 
*trans,
 * order for the log replay code to mark inodes for link count
 * fixup (create temporary BTRFS_TREE_LOG_FIXUP_OBJECTID items).
 */
-   if (S_ISDIR(inode->i_mode) ||
-   BTRFS_I(inode)->generation > fs_info->last_trans_committed)
-   ret = btrfs_commit_inode_delayed_items(trans, BTRFS_I(inode));
+   if (S_ISDIR(inode->vfs_inode.i_mode) ||
+   inode->generation > fs_info->last_trans_committed)
+   ret = btrfs_commit_inode_delayed_items(trans, inode);
else
-   ret = btrfs_commit_inode_delayed_inode(BTRFS_I(inode));
+   ret = btrfs_commit_inode_delayed_inode(inode);
 
if (ret) {
btrfs_free_path(path);
@@ -4661,17 +4661,16 @@ static int btrfs_log_inode(struct btrfs_trans_handle 
*trans,
 
if (inode_only == LOG_OTHER_INODE) {
inode_only = LOG_INODE_EXISTS;
-   mutex_lock_nested(_I(inode)->log_mutex,
- SINGLE_DEPTH_NESTING);
+   mutex_lock_nested(>log_mutex, SINGLE_DEPTH_NESTING);
} else {
-   mutex_lock(_I(inode)->log_mutex);
+   mutex_lock(>log_mutex);
}
 
/*
 * a brute force approach to making sure we get the most uptodate
 * copies of everything.
 */
-   if (S_ISDIR(inode->i_mode)) {
+   if (S_ISDIR(inode->vfs_inode.i_mode)) {
int max_key_type = BTRFS_DIR_LOG_INDEX_KEY;
 
if (inode_only == LOG_INODE_EXISTS)
@@ -4692,31 +4691,30 @@ static int btrfs_log_inode(struct btrfs_trans_handle 
*trans,
 * (zeroes), as if an expanding truncate happened,
 * instead of getting a file of 4Kb only.
 */
-   err = logged_inode_size(log, BTRFS_I(inode), path,
-   _isize);
+   err = logged_inode_size(log, inode, path, 
_isize);
if (err)
goto out_unlock;
}
if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
-_I(inode)->runtime_flags)) {
+>runtime_flags)) {
if (inode_only == LOG_INODE_EXISTS) {
max_key.type = BTRFS_XATTR_ITEM_KEY;
ret = drop_objectid_items(trans, log, path, ino,
  max_key.type);
} else {

[PATCHv2 11/24] btrfs: Make btrfs_log_all_xattrs take btrfs_inode

2017-01-17 Thread Nikolay Borisov
Signed-off-by: Nikolay Borisov 
---
 fs/btrfs/tree-log.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 12872bf492bd..1301c517c2f0 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4277,13 +4277,13 @@ static int logged_inode_size(struct btrfs_root *log, 
struct btrfs_inode *inode,
  */
 static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
-   struct inode *inode,
+   struct btrfs_inode *inode,
struct btrfs_path *path,
struct btrfs_path *dst_path)
 {
int ret;
struct btrfs_key key;
-   const u64 ino = btrfs_ino(BTRFS_I(inode));
+   const u64 ino = btrfs_ino(inode);
int ins_nr = 0;
int start_slot = 0;
 
@@ -4304,7 +4304,7 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle 
*trans,
if (ins_nr > 0) {
u64 last_extent = 0;
 
-   ret = copy_items(trans, BTRFS_I(inode), 
dst_path, path,
+   ret = copy_items(trans, inode, dst_path, path,
 _extent, start_slot,
 ins_nr, 1, 0);
/* can't be 1, extent items aren't processed */
@@ -4334,7 +4334,7 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle 
*trans,
if (ins_nr > 0) {
u64 last_extent = 0;
 
-   ret = copy_items(trans, BTRFS_I(inode), dst_path, path,
+   ret = copy_items(trans, inode, dst_path, path,
 _extent, start_slot,
 ins_nr, 1, 0);
/* can't be 1, extent items aren't processed */
@@ -4919,7 +4919,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle 
*trans,
 
btrfs_release_path(path);
btrfs_release_path(dst_path);
-   err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path);
+   err = btrfs_log_all_xattrs(trans, root, BTRFS_I(inode), path, dst_path);
if (err)
goto out_unlock;
if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) {
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHv2 04/24] btrfs: Make btrfs_inode_in_log take btrfs_inode

2017-01-17 Thread Nikolay Borisov
Signed-off-by: Nikolay Borisov 
---
 fs/btrfs/btrfs_inode.h | 16 +++-
 fs/btrfs/file.c|  2 +-
 fs/btrfs/inode.c   | 16 
 fs/btrfs/tree-log.c|  4 ++--
 4 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 4fed080545c6..b2dde0efebc0 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -255,16 +255,14 @@ static inline bool btrfs_is_free_space_inode(struct inode 
*inode)
return false;
 }
 
-static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
+static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
 {
int ret = 0;
 
-   spin_lock(_I(inode)->lock);
-   if (BTRFS_I(inode)->logged_trans == generation &&
-   BTRFS_I(inode)->last_sub_trans <=
-   BTRFS_I(inode)->last_log_commit &&
-   BTRFS_I(inode)->last_sub_trans <=
-   BTRFS_I(inode)->root->last_log_commit) {
+   spin_lock(>lock);
+   if (inode->logged_trans == generation &&
+   inode->last_sub_trans <= inode->last_log_commit &&
+   inode->last_sub_trans <= inode->root->last_log_commit) {
/*
 * After a ranged fsync we might have left some extent maps
 * (that fall outside the fsync's range). So return false
@@ -272,10 +270,10 @@ static inline int btrfs_inode_in_log(struct inode *inode, 
u64 generation)
 * will be called and process those extent maps.
 */
smp_mb();
-   if (list_empty(_I(inode)->extent_tree.modified_extents))
+   if (list_empty(>extent_tree.modified_extents))
ret = 1;
}
-   spin_unlock(_I(inode)->lock);
+   spin_unlock(>lock);
return ret;
 }
 
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 0d32f45cef28..149b79b3aaf8 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2062,7 +2062,7 @@ int btrfs_sync_file(struct file *file, loff_t start, 
loff_t end, int datasync)
 * commit does not start nor waits for ordered extents to complete.
 */
smp_mb();
-   if (btrfs_inode_in_log(inode, fs_info->generation) ||
+   if (btrfs_inode_in_log(BTRFS_I(inode), fs_info->generation) ||
(full_sync && BTRFS_I(inode)->last_trans <=
 fs_info->last_trans_committed) ||
(!btrfs_have_ordered_extents_in_range(inode, start, len) &&
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a8374f1d8c61..9442c80fe551 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -9683,11 +9683,11 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 * allow the tasks to sync it.
 */
if (ret && (root_log_pinned || dest_log_pinned)) {
-   if (btrfs_inode_in_log(old_dir, fs_info->generation) ||
-   btrfs_inode_in_log(new_dir, fs_info->generation) ||
-   btrfs_inode_in_log(old_inode, fs_info->generation) ||
+   if (btrfs_inode_in_log(BTRFS_I(old_dir), fs_info->generation) ||
+   btrfs_inode_in_log(BTRFS_I(new_dir), fs_info->generation) ||
+   btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) 
||
(new_inode &&
-btrfs_inode_in_log(new_inode, fs_info->generation)))
+btrfs_inode_in_log(BTRFS_I(new_inode), 
fs_info->generation)))
btrfs_set_log_full_commit(fs_info, trans);
 
if (root_log_pinned) {
@@ -9959,11 +9959,11 @@ static int btrfs_rename(struct inode *old_dir, struct 
dentry *old_dentry,
 * allow the tasks to sync it.
 */
if (ret && log_pinned) {
-   if (btrfs_inode_in_log(old_dir, fs_info->generation) ||
-   btrfs_inode_in_log(new_dir, fs_info->generation) ||
-   btrfs_inode_in_log(old_inode, fs_info->generation) ||
+   if (btrfs_inode_in_log(BTRFS_I(old_dir), fs_info->generation) ||
+   btrfs_inode_in_log(BTRFS_I(new_dir), fs_info->generation) ||
+   btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) 
||
(new_inode &&
-btrfs_inode_in_log(new_inode, fs_info->generation)))
+btrfs_inode_in_log(BTRFS_I(new_inode), 
fs_info->generation)))
btrfs_set_log_full_commit(fs_info, trans);
 
btrfs_end_log_trans(root);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 581d31171683..37adad5dabd6 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -5237,7 +5237,7 @@ static int log_new_dir_dentries(struct btrfs_trans_handle 
*trans,
goto next_dir_inode;
}
 
-   if (btrfs_inode_in_log(di_inode, trans->transid)) {
+   if 

[PATCHv2 20/24] btrfs: Make __add_inode_ref take btrfs_inode

2017-01-17 Thread Nikolay Borisov
Signed-off-by: Nikolay Borisov 
---
 fs/btrfs/tree-log.c | 26 --
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 35434d686653..d919cd4252ba 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -991,7 +991,7 @@ static inline int __add_inode_ref(struct btrfs_trans_handle 
*trans,
  struct btrfs_root *root,
  struct btrfs_path *path,
  struct btrfs_root *log_root,
- struct inode *dir, struct inode *inode,
+ struct btrfs_inode *dir, struct btrfs_inode 
*inode,
  struct extent_buffer *eb,
  u64 inode_objectid, u64 parent_objectid,
  u64 ref_index, char *name, int namelen,
@@ -1047,12 +1047,11 @@ static inline int __add_inode_ref(struct 
btrfs_trans_handle *trans,
parent_objectid,
victim_name,
victim_name_len)) {
-   inc_nlink(inode);
+   inc_nlink(>vfs_inode);
btrfs_release_path(path);
 
-   ret = btrfs_unlink_inode(trans, root, 
BTRFS_I(dir),
-BTRFS_I(inode), 
victim_name,
-victim_name_len);
+   ret = btrfs_unlink_inode(trans, root, dir, 
inode, 
+   victim_name, victim_name_len);
kfree(victim_name);
if (ret)
return ret;
@@ -1114,15 +1113,14 @@ static inline int __add_inode_ref(struct 
btrfs_trans_handle *trans,
parent_objectid, victim_name,
victim_name_len)) {
ret = -ENOENT;
-   victim_parent = read_one_inode(root,
-  parent_objectid);
+   victim_parent = read_one_inode(root, 
parent_objectid);
if (victim_parent) {
-   inc_nlink(inode);
+   inc_nlink(>vfs_inode);
btrfs_release_path(path);
 
ret = btrfs_unlink_inode(trans, root,
 
BTRFS_I(victim_parent),
-BTRFS_I(inode),
+inode,
 victim_name,
 
victim_name_len);
if (!ret)
@@ -1148,20 +1146,20 @@ static inline int __add_inode_ref(struct 
btrfs_trans_handle *trans,
btrfs_release_path(path);
 
/* look for a conflicting sequence number */
-   di = btrfs_lookup_dir_index_item(trans, root, path, 
btrfs_ino(BTRFS_I(dir)),
+   di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir),
 ref_index, name, namelen, 0);
if (di && !IS_ERR(di)) {
-   ret = drop_one_dir_item(trans, root, path, BTRFS_I(dir), di);
+   ret = drop_one_dir_item(trans, root, path, dir, di);
if (ret)
return ret;
}
btrfs_release_path(path);
 
/* look for a conflicing name */
-   di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(BTRFS_I(dir)),
+   di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir),
   name, namelen, 0);
if (di && !IS_ERR(di)) {
-   ret = drop_one_dir_item(trans, root, path, BTRFS_I(dir), di);
+   ret = drop_one_dir_item(trans, root, path, dir, di);
if (ret)
return ret;
}
@@ -1307,7 +1305,7 @@ static noinline int add_inode_ref(struct 
btrfs_trans_handle *trans,
 
if (!search_done) {
ret = __add_inode_ref(trans, root, path, log,
- dir, inode, eb,
+ BTRFS_I(dir), 
BTRFS_I(inode), eb,
  inode_objectid,
  parent_objectid,
   

[PATCHv2 05/24] btrfs: Make btrfs_log_new_name take btrfs_inode

2017-01-17 Thread Nikolay Borisov
Signed-off-by: Nikolay Borisov 
---
 fs/btrfs/inode.c|  8 
 fs/btrfs/tree-log.c | 18 --
 fs/btrfs/tree-log.h |  2 +-
 3 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 9442c80fe551..41b1e2ed63b4 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6600,7 +6600,7 @@ static int btrfs_link(struct dentry *old_dentry, struct 
inode *dir,
goto fail;
}
d_instantiate(dentry, inode);
-   btrfs_log_new_name(trans, inode, NULL, parent);
+   btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent);
}
 
btrfs_balance_delayed_items(fs_info);
@@ -9660,13 +9660,13 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 
if (root_log_pinned) {
parent = new_dentry->d_parent;
-   btrfs_log_new_name(trans, old_inode, old_dir, parent);
+   btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir), 
parent);
btrfs_end_log_trans(root);
root_log_pinned = false;
}
if (dest_log_pinned) {
parent = old_dentry->d_parent;
-   btrfs_log_new_name(trans, new_inode, new_dir, parent);
+   btrfs_log_new_name(trans, BTRFS_I(new_inode), BTRFS_I(new_dir), 
parent);
btrfs_end_log_trans(dest);
dest_log_pinned = false;
}
@@ -9932,7 +9932,7 @@ static int btrfs_rename(struct inode *old_dir, struct 
dentry *old_dentry,
if (log_pinned) {
struct dentry *parent = new_dentry->d_parent;
 
-   btrfs_log_new_name(trans, old_inode, old_dir, parent);
+   btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir), 
parent);
btrfs_end_log_trans(root);
log_pinned = false;
}
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 37adad5dabd6..df822908f2be 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -5809,30 +5809,28 @@ void btrfs_record_snapshot_destroy(struct 
btrfs_trans_handle *trans,
  * full transaction commit is required.
  */
 int btrfs_log_new_name(struct btrfs_trans_handle *trans,
-   struct inode *inode, struct inode *old_dir,
+   struct btrfs_inode *inode, struct btrfs_inode *old_dir,
struct dentry *parent)
 {
-   struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-   struct btrfs_root * root = BTRFS_I(inode)->root;
+   struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+   struct btrfs_root * root = inode->root;
 
/*
 * this will force the logging code to walk the dentry chain
 * up for the file
 */
-   if (S_ISREG(inode->i_mode))
-   BTRFS_I(inode)->last_unlink_trans = trans->transid;
+   if (S_ISREG(inode->vfs_inode.i_mode))
+   inode->last_unlink_trans = trans->transid;
 
/*
 * if this inode hasn't been logged and directory we're renaming it
 * from hasn't been logged, we don't need to log it
 */
-   if (BTRFS_I(inode)->logged_trans <=
-   fs_info->last_trans_committed &&
-   (!old_dir || BTRFS_I(old_dir)->logged_trans <=
-   fs_info->last_trans_committed))
+   if (inode->logged_trans <= fs_info->last_trans_committed &&
+   (!old_dir || old_dir->logged_trans <= 
fs_info->last_trans_committed))
return 0;
 
-   return btrfs_log_inode_parent(trans, root, inode, parent, 0,
+   return btrfs_log_inode_parent(trans, root, >vfs_inode, parent, 0,
  LLONG_MAX, 1, NULL);
 }
 
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index e08ce78b2ad4..2bcbac7efa9c 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -85,6 +85,6 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
 void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
   struct btrfs_inode *dir);
 int btrfs_log_new_name(struct btrfs_trans_handle *trans,
-   struct inode *inode, struct inode *old_dir,
+   struct btrfs_inode *inode, struct btrfs_inode *old_dir,
struct dentry *parent);
 #endif
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHv2 24/24] btrfs: Make count_inode_refs take btrfs_inode

2017-01-17 Thread Nikolay Borisov
Signed-off-by: Nikolay Borisov 
---
 fs/btrfs/tree-log.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 47e4f3610348..a16da4a3ab63 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1402,7 +1402,7 @@ static int count_inode_extrefs(struct btrfs_root *root,
 }
 
 static int count_inode_refs(struct btrfs_root *root,
-  struct inode *inode, struct btrfs_path *path)
+  struct btrfs_inode *inode, struct btrfs_path 
*path)
 {
int ret;
struct btrfs_key key;
@@ -1410,7 +1410,7 @@ static int count_inode_refs(struct btrfs_root *root,
unsigned long ptr;
unsigned long ptr_end;
int name_len;
-   u64 ino = btrfs_ino(BTRFS_I(inode));
+   u64 ino = btrfs_ino(inode);
 
key.objectid = ino;
key.type = BTRFS_INODE_REF_KEY;
@@ -1481,7 +1481,7 @@ static noinline int fixup_inode_link_count(struct 
btrfs_trans_handle *trans,
if (!path)
return -ENOMEM;
 
-   ret = count_inode_refs(root, inode, path);
+   ret = count_inode_refs(root, BTRFS_I(inode), path);
if (ret < 0)
goto out;
 
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHv2 21/24] btrfs: Make log_inode_item take btrfs_inode

2017-01-17 Thread Nikolay Borisov
Signed-off-by: Nikolay Borisov 
---
 fs/btrfs/tree-log.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index d919cd4252ba..1348ab5e3229 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3592,19 +3592,18 @@ static void fill_inode_item(struct btrfs_trans_handle 
*trans,
 
 static int log_inode_item(struct btrfs_trans_handle *trans,
  struct btrfs_root *log, struct btrfs_path *path,
- struct inode *inode)
+ struct btrfs_inode *inode)
 {
struct btrfs_inode_item *inode_item;
int ret;
 
ret = btrfs_insert_empty_item(trans, log, path,
- _I(inode)->location,
- sizeof(*inode_item));
+ >location, sizeof(*inode_item));
if (ret && ret != -EEXIST)
return ret;
inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
-   fill_inode_item(trans, path->nodes[0], inode_item, inode, 0, 0);
+   fill_inode_item(trans, path->nodes[0], inode_item, >vfs_inode, 
0, 0);
btrfs_release_path(path);
return 0;
 }
@@ -4930,7 +4929,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle 
*trans,
btrfs_release_path(path);
btrfs_release_path(dst_path);
if (need_log_inode_item) {
-   err = log_inode_item(trans, log, dst_path, inode);
+   err = log_inode_item(trans, log, dst_path, BTRFS_I(inode));
if (err)
goto out_unlock;
}
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHv2 07/24] btrfs: Make btrfs_del_inode_ref take btrfs_inode

2017-01-17 Thread Nikolay Borisov
Signed-off-by: Nikolay Borisov 
---
 fs/btrfs/inode.c|  2 +-
 fs/btrfs/tree-log.c | 10 +-
 fs/btrfs/tree-log.h |  2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ebfeabafe1b1..e86b08eabb82 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4068,7 +4068,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle 
*trans,
}
 
ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len,
-inode, dir_ino);
+BTRFS_I(inode), dir_ino);
if (ret != 0 && ret != -ENOENT) {
btrfs_abort_transaction(trans, ret);
goto err;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index caa8d886b4ae..a7705173150e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3192,25 +3192,25 @@ int btrfs_del_dir_entries_in_log(struct 
btrfs_trans_handle *trans,
 int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
   struct btrfs_root *root,
   const char *name, int name_len,
-  struct inode *inode, u64 dirid)
+  struct btrfs_inode *inode, u64 dirid)
 {
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_root *log;
u64 index;
int ret;
 
-   if (BTRFS_I(inode)->logged_trans < trans->transid)
+   if (inode->logged_trans < trans->transid)
return 0;
 
ret = join_running_log_trans(root);
if (ret)
return 0;
log = root->log_root;
-   mutex_lock(_I(inode)->log_mutex);
+   mutex_lock(>log_mutex);
 
-   ret = btrfs_del_inode_ref(trans, log, name, name_len, 
btrfs_ino(BTRFS_I(inode)),
+   ret = btrfs_del_inode_ref(trans, log, name, name_len, btrfs_ino(inode),
  dirid, );
-   mutex_unlock(_I(inode)->log_mutex);
+   mutex_unlock(>log_mutex);
if (ret == -ENOSPC) {
btrfs_set_log_full_commit(fs_info, trans);
ret = 0;
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index 6c2b316b28e0..bc50f128c6be 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -76,7 +76,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle 
*trans,
 int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
   struct btrfs_root *root,
   const char *name, int name_len,
-  struct inode *inode, u64 dirid);
+  struct btrfs_inode *inode, u64 dirid);
 void btrfs_end_log_trans(struct btrfs_root *root);
 int btrfs_pin_log_trans(struct btrfs_root *root);
 void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHv2 16/24] btrfs: Make log_directory_changes take btrfs_inode

2017-01-17 Thread Nikolay Borisov
Signed-off-by: Nikolay Borisov 
---
 fs/btrfs/tree-log.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 8d7197a0eceb..38cda7869bf9 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3450,7 +3450,7 @@ static noinline int log_dir_items(struct 
btrfs_trans_handle *trans,
  * key logged by this transaction.
  */
 static noinline int log_directory_changes(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode,
+ struct btrfs_root *root, struct btrfs_inode *inode,
  struct btrfs_path *path,
  struct btrfs_path *dst_path,
  struct btrfs_log_ctx *ctx)
@@ -3464,9 +3464,8 @@ static noinline int log_directory_changes(struct 
btrfs_trans_handle *trans,
min_key = 0;
max_key = 0;
while (1) {
-   ret = log_dir_items(trans, root, BTRFS_I(inode), path,
-   dst_path, key_type, ctx, min_key,
-   _key);
+   ret = log_dir_items(trans, root, inode, path, dst_path, 
key_type, 
+   ctx, min_key, _key);
if (ret)
return ret;
if (max_key == (u64)-1)
@@ -4977,7 +4976,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle 
*trans,
}
 
if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
-   ret = log_directory_changes(trans, root, inode, path, dst_path,
+   ret = log_directory_changes(trans, root, BTRFS_I(inode), path, 
dst_path,
ctx);
if (ret) {
err = ret;
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHv2 19/24] btrfs: Make drop_one_dir_item take btrfs_inode

2017-01-17 Thread Nikolay Borisov
Signed-off-by: Nikolay Borisov 
---
 fs/btrfs/tree-log.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index b2c0a30811f6..35434d686653 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -843,7 +843,7 @@ static noinline int replay_one_extent(struct 
btrfs_trans_handle *trans,
 static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
  struct btrfs_root *root,
  struct btrfs_path *path,
- struct inode *dir,
+ struct btrfs_inode *dir,
  struct btrfs_dir_item *di)
 {
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -875,7 +875,7 @@ static noinline int drop_one_dir_item(struct 
btrfs_trans_handle *trans,
if (ret)
goto out;
 
-   ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir), BTRFS_I(inode), 
name, name_len);
+   ret = btrfs_unlink_inode(trans, root, dir, BTRFS_I(inode), name, 
name_len);
if (ret)
goto out;
else
@@ -1151,7 +1151,7 @@ static inline int __add_inode_ref(struct 
btrfs_trans_handle *trans,
di = btrfs_lookup_dir_index_item(trans, root, path, 
btrfs_ino(BTRFS_I(dir)),
 ref_index, name, namelen, 0);
if (di && !IS_ERR(di)) {
-   ret = drop_one_dir_item(trans, root, path, dir, di);
+   ret = drop_one_dir_item(trans, root, path, BTRFS_I(dir), di);
if (ret)
return ret;
}
@@ -1161,7 +1161,7 @@ static inline int __add_inode_ref(struct 
btrfs_trans_handle *trans,
di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(BTRFS_I(dir)),
   name, namelen, 0);
if (di && !IS_ERR(di)) {
-   ret = drop_one_dir_item(trans, root, path, dir, di);
+   ret = drop_one_dir_item(trans, root, path, BTRFS_I(dir), di);
if (ret)
return ret;
}
@@ -1769,7 +1769,7 @@ static noinline int replay_one_name(struct 
btrfs_trans_handle *trans,
if (!exists)
goto out;
 
-   ret = drop_one_dir_item(trans, root, path, dir, dst_di);
+   ret = drop_one_dir_item(trans, root, path, BTRFS_I(dir), dst_di);
if (ret)
goto out;
 
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHv2 01/24] btrfs: Make btrfs_must_commit_transaction take btrfs_inode

2017-01-17 Thread Nikolay Borisov
Signed-off-by: Nikolay Borisov 
---
 fs/btrfs/tree-log.c | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index b814cd7bbe70..a2a822a993af 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -5021,13 +5021,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle 
*trans,
  * we logged the inode or it might have also done the unlink).
  */
 static bool btrfs_must_commit_transaction(struct btrfs_trans_handle *trans,
- struct inode *inode)
+ struct btrfs_inode *inode)
 {
-   struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+   struct btrfs_fs_info *fs_info = inode->root->fs_info;
bool ret = false;
 
-   mutex_lock(_I(inode)->log_mutex);
-   if (BTRFS_I(inode)->last_unlink_trans > fs_info->last_trans_committed) {
+   mutex_lock(>log_mutex);
+   if (inode->last_unlink_trans > fs_info->last_trans_committed) {
/*
 * Make sure any commits to the log are forced to be full
 * commits.
@@ -5035,7 +5035,7 @@ static bool btrfs_must_commit_transaction(struct 
btrfs_trans_handle *trans,
btrfs_set_log_full_commit(fs_info, trans);
ret = true;
}
-   mutex_unlock(_I(inode)->log_mutex);
+   mutex_unlock(>log_mutex);
 
return ret;
 }
@@ -5084,7 +5084,7 @@ static noinline int check_parent_dirs_for_sync(struct 
btrfs_trans_handle *trans,
BTRFS_I(inode)->logged_trans = trans->transid;
smp_mb();
 
-   if (btrfs_must_commit_transaction(trans, inode)) {
+   if (btrfs_must_commit_transaction(trans, BTRFS_I(inode))) {
ret = 1;
break;
}
@@ -5094,7 +5094,7 @@ static noinline int check_parent_dirs_for_sync(struct 
btrfs_trans_handle *trans,
 
if (IS_ROOT(parent)) {
inode = d_inode(parent);
-   if (btrfs_must_commit_transaction(trans, inode))
+   if (btrfs_must_commit_transaction(trans, 
BTRFS_I(inode)))
ret = 1;
break;
}
@@ -5248,7 +5248,7 @@ static int log_new_dir_dentries(struct btrfs_trans_handle 
*trans,
ret = btrfs_log_inode(trans, root, di_inode,
  log_mode, 0, LLONG_MAX, ctx);
if (!ret &&
-   btrfs_must_commit_transaction(trans, di_inode))
+   btrfs_must_commit_transaction(trans, 
BTRFS_I(di_inode)))
ret = 1;
iput(di_inode);
if (ret)
@@ -5368,7 +5368,7 @@ static int btrfs_log_all_parents(struct 
btrfs_trans_handle *trans,
ret = btrfs_log_inode(trans, root, dir_inode,
  LOG_INODE_ALL, 0, LLONG_MAX, ctx);
if (!ret &&
-   btrfs_must_commit_transaction(trans, dir_inode))
+   btrfs_must_commit_transaction(trans, 
BTRFS_I(dir_inode)))
ret = 1;
if (!ret && ctx && ctx->log_new_dentries)
ret = log_new_dir_dentries(trans, root,
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHv2 09/24] btrfs: Make btrfs_check_ref_name_override take btrfs_inode

2017-01-17 Thread Nikolay Borisov
Signed-off-by: Nikolay Borisov 
---
 fs/btrfs/tree-log.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 20718cfebf89..7669e95be423 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4495,7 +4495,7 @@ static int btrfs_log_trailing_hole(struct 
btrfs_trans_handle *trans,
 static int btrfs_check_ref_name_override(struct extent_buffer *eb,
 const int slot,
 const struct btrfs_key *key,
-struct inode *inode,
+struct btrfs_inode *inode,
 u64 *other_ino)
 {
int ret;
@@ -4551,9 +4551,8 @@ static int btrfs_check_ref_name_override(struct 
extent_buffer *eb,
}
 
read_extent_buffer(eb, name, name_ptr, this_name_len);
-   di = btrfs_lookup_dir_item(NULL, BTRFS_I(inode)->root,
-  search_path, parent,
-  name, this_name_len, 0);
+   di = btrfs_lookup_dir_item(NULL, inode->root, search_path,
+   parent, name, this_name_len, 0);
if (di && !IS_ERR(di)) {
struct btrfs_key di_key;
 
@@ -4769,7 +4768,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle 
*trans,
 
ret = btrfs_check_ref_name_override(path->nodes[0],
path->slots[0],
-   _key, inode,
+   _key, 
BTRFS_I(inode),
_ino);
if (ret < 0) {
err = ret;
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHv2 02/24] btrfs: Make btrfs_record_unlink_dir take btrfs_inode

2017-01-17 Thread Nikolay Borisov
Signed-off-by: Nikolay Borisov 
---
 fs/btrfs/inode.c|  8 
 fs/btrfs/tree-log.c | 18 +-
 fs/btrfs/tree-log.h |  2 +-
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d49c3b78e2fb..a8374f1d8c61 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4142,7 +4142,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry 
*dentry)
if (IS_ERR(trans))
return PTR_ERR(trans);
 
-   btrfs_record_unlink_dir(trans, dir, d_inode(dentry), 0);
+   btrfs_record_unlink_dir(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)), 
0);
 
ret = btrfs_unlink_inode(trans, root, dir, d_inode(dentry),
 dentry->d_name.name, dentry->d_name.len);
@@ -9593,8 +9593,8 @@ static int btrfs_rename_exchange(struct inode *old_dir,
new_inode->i_ctime = ctime;
 
if (old_dentry->d_parent != new_dentry->d_parent) {
-   btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
-   btrfs_record_unlink_dir(trans, new_dir, new_inode, 1);
+   btrfs_record_unlink_dir(trans, BTRFS_I(old_dir), 
BTRFS_I(old_inode), 1);
+   btrfs_record_unlink_dir(trans, BTRFS_I(new_dir), 
BTRFS_I(new_inode), 1);
}
 
/* src is a subvolume */
@@ -9873,7 +9873,7 @@ static int btrfs_rename(struct inode *old_dir, struct 
dentry *old_dentry,
old_inode->i_ctime = current_time(old_dir);
 
if (old_dentry->d_parent != new_dentry->d_parent)
-   btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
+   btrfs_record_unlink_dir(trans, BTRFS_I(old_dir), 
BTRFS_I(old_inode), 1);
 
if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index a2a822a993af..6f9a3beb7050 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -5730,7 +5730,7 @@ int btrfs_recover_log_trees(struct btrfs_root 
*log_root_tree)
  * inodes, etc) are done.
  */
 void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
-struct inode *dir, struct inode *inode,
+struct btrfs_inode *dir, struct btrfs_inode *inode,
 int for_rename)
 {
/*
@@ -5743,23 +5743,23 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle 
*trans,
 * into the file.  When the file is logged we check it and
 * don't log the parents if the file is fully on disk.
 */
-   mutex_lock(_I(inode)->log_mutex);
-   BTRFS_I(inode)->last_unlink_trans = trans->transid;
-   mutex_unlock(_I(inode)->log_mutex);
+   mutex_lock(>log_mutex);
+   inode->last_unlink_trans = trans->transid;
+   mutex_unlock(>log_mutex);
 
/*
 * if this directory was already logged any new
 * names for this file/dir will get recorded
 */
smp_mb();
-   if (BTRFS_I(dir)->logged_trans == trans->transid)
+   if (dir->logged_trans == trans->transid)
return;
 
/*
 * if the inode we're about to unlink was logged,
 * the log will be properly updated for any new names
 */
-   if (BTRFS_I(inode)->logged_trans == trans->transid)
+   if (inode->logged_trans == trans->transid)
return;
 
/*
@@ -5776,9 +5776,9 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle 
*trans,
return;
 
 record:
-   mutex_lock(_I(dir)->log_mutex);
-   BTRFS_I(dir)->last_unlink_trans = trans->transid;
-   mutex_unlock(_I(dir)->log_mutex);
+   mutex_lock(>log_mutex);
+   dir->last_unlink_trans = trans->transid;
+   mutex_unlock(>log_mutex);
 }
 
 /*
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index ab858e31ccbc..69702eef9603 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -80,7 +80,7 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle 
*trans,
 void btrfs_end_log_trans(struct btrfs_root *root);
 int btrfs_pin_log_trans(struct btrfs_root *root);
 void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
-struct inode *dir, struct inode *inode,
+struct btrfs_inode *dir, struct btrfs_inode *inode,
 int for_rename);
 void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
   struct inode *dir);
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHv2 10/24] btrfs: Make copy_items take btrfs_inode

2017-01-17 Thread Nikolay Borisov
Signed-off-by: Nikolay Borisov 
---
 fs/btrfs/tree-log.c | 40 +++-
 1 file changed, 19 insertions(+), 21 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 7669e95be423..12872bf492bd 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3613,16 +3613,16 @@ static int log_inode_item(struct btrfs_trans_handle 
*trans,
 }
 
 static noinline int copy_items(struct btrfs_trans_handle *trans,
-  struct inode *inode,
+  struct btrfs_inode *inode,
   struct btrfs_path *dst_path,
   struct btrfs_path *src_path, u64 *last_extent,
   int start_slot, int nr, int inode_only,
   u64 logged_isize)
 {
-   struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+   struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
unsigned long src_offset;
unsigned long dst_offset;
-   struct btrfs_root *log = BTRFS_I(inode)->root->log_root;
+   struct btrfs_root *log = inode->root->log_root;
struct btrfs_file_extent_item *extent;
struct btrfs_inode_item *inode_item;
struct extent_buffer *src = src_path->nodes[0];
@@ -3633,7 +3633,7 @@ static noinline int copy_items(struct btrfs_trans_handle 
*trans,
char *ins_data;
int i;
struct list_head ordered_sums;
-   int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
+   int skip_csum = inode->flags & BTRFS_INODE_NODATASUM;
bool has_extents = false;
bool need_find_last_extent = true;
bool done = false;
@@ -3675,7 +3675,7 @@ static noinline int copy_items(struct btrfs_trans_handle 
*trans,
dst_path->slots[0],
struct btrfs_inode_item);
fill_inode_item(trans, dst_path->nodes[0], inode_item,
-   inode, inode_only == LOG_INODE_EXISTS,
+   >vfs_inode, inode_only == 
LOG_INODE_EXISTS,
logged_isize);
} else {
copy_extent_buffer(dst_path->nodes[0], src, dst_offset,
@@ -3783,7 +3783,7 @@ static noinline int copy_items(struct btrfs_trans_handle 
*trans,
if (need_find_last_extent) {
u64 len;
 
-   ret = btrfs_prev_leaf(BTRFS_I(inode)->root, src_path);
+   ret = btrfs_prev_leaf(inode->root, src_path);
if (ret < 0)
return ret;
if (ret)
@@ -3792,7 +3792,7 @@ static noinline int copy_items(struct btrfs_trans_handle 
*trans,
src_path->slots[0]--;
src = src_path->nodes[0];
btrfs_item_key_to_cpu(src, , src_path->slots[0]);
-   if (key.objectid != btrfs_ino(BTRFS_I(inode)) ||
+   if (key.objectid != btrfs_ino(inode) ||
key.type != BTRFS_EXTENT_DATA_KEY)
goto fill_holes;
extent = btrfs_item_ptr(src, src_path->slots[0],
@@ -3825,8 +3825,7 @@ static noinline int copy_items(struct btrfs_trans_handle 
*trans,
if (need_find_last_extent) {
/* btrfs_prev_leaf could return 1 without releasing the path */
btrfs_release_path(src_path);
-   ret = btrfs_search_slot(NULL, BTRFS_I(inode)->root, _key,
-   src_path, 0, 0);
+   ret = btrfs_search_slot(NULL, inode->root, _key, 
src_path, 0, 0);
if (ret < 0)
return ret;
ASSERT(ret == 0);
@@ -3846,7 +3845,7 @@ static noinline int copy_items(struct btrfs_trans_handle 
*trans,
u64 extent_end;
 
if (i >= btrfs_header_nritems(src_path->nodes[0])) {
-   ret = btrfs_next_leaf(BTRFS_I(inode)->root, src_path);
+   ret = btrfs_next_leaf(inode->root, src_path);
if (ret < 0)
return ret;
ASSERT(ret == 0);
@@ -3857,7 +3856,7 @@ static noinline int copy_items(struct btrfs_trans_handle 
*trans,
btrfs_item_key_to_cpu(src, , i);
if (!btrfs_comp_cpu_keys(, _key))
done = true;
-   if (key.objectid != btrfs_ino(BTRFS_I(inode)) ||
+   if (key.objectid != btrfs_ino(inode) ||
key.type != BTRFS_EXTENT_DATA_KEY) {
i++;
continue;
@@ -3880,9 +3879,8 @@ static noinline int copy_items(struct btrfs_trans_handle 
*trans,
}
offset = *last_extent;
len = key.offset - *last_extent;
-   ret = 

[PATCHv2 15/24] btrfs: Make log_dir_items take btrfs_inode

2017-01-17 Thread Nikolay Borisov
Signed-off-by: Nikolay Borisov 
---
 fs/btrfs/tree-log.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index e293ae0e18d7..8d7197a0eceb 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3260,7 +3260,7 @@ static noinline int insert_dir_log_key(struct 
btrfs_trans_handle *trans,
  * to replay anything deleted before the fsync
  */
 static noinline int log_dir_items(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode,
+ struct btrfs_root *root, struct btrfs_inode *inode,
  struct btrfs_path *path,
  struct btrfs_path *dst_path, int key_type,
  struct btrfs_log_ctx *ctx,
@@ -3275,7 +3275,7 @@ static noinline int log_dir_items(struct 
btrfs_trans_handle *trans,
int nritems;
u64 first_offset = min_offset;
u64 last_offset = (u64)-1;
-   u64 ino = btrfs_ino(BTRFS_I(inode));
+   u64 ino = btrfs_ino(inode);
 
log = root->log_root;
 
@@ -3464,7 +3464,7 @@ static noinline int log_directory_changes(struct 
btrfs_trans_handle *trans,
min_key = 0;
max_key = 0;
while (1) {
-   ret = log_dir_items(trans, root, inode, path,
+   ret = log_dir_items(trans, root, BTRFS_I(inode), path,
dst_path, key_type, ctx, min_key,
_key);
if (ret)
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHv2 14/24] btrfs: Make btrfs_log_changed_extents take btrfs_inode

2017-01-17 Thread Nikolay Borisov
Signed-off-by: Nikolay Borisov 
---
 fs/btrfs/tree-log.c | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 0e061f91055e..e293ae0e18d7 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4053,7 +4053,7 @@ static int wait_ordered_extents(struct btrfs_trans_handle 
*trans,
 }
 
 static int log_one_extent(struct btrfs_trans_handle *trans,
- struct inode *inode, struct btrfs_root *root,
+ struct btrfs_inode *inode, struct btrfs_root *root,
  const struct extent_map *em,
  struct btrfs_path *path,
  const struct list_head *logged_list,
@@ -4070,7 +4070,7 @@ static int log_one_extent(struct btrfs_trans_handle 
*trans,
int extent_inserted = 0;
bool ordered_io_err = false;
 
-   ret = wait_ordered_extents(trans, inode, root, em, logged_list,
+   ret = wait_ordered_extents(trans, >vfs_inode, root, em, 
logged_list,
   _io_err);
if (ret)
return ret;
@@ -4082,14 +4082,14 @@ static int log_one_extent(struct btrfs_trans_handle 
*trans,
 
btrfs_init_map_token();
 
-   ret = __btrfs_drop_extents(trans, log, inode, path, em->start,
+   ret = __btrfs_drop_extents(trans, log, >vfs_inode, path, 
em->start,
   em->start + em->len, NULL, 0, 1,
   sizeof(*fi), _inserted);
if (ret)
return ret;
 
if (!extent_inserted) {
-   key.objectid = btrfs_ino(BTRFS_I(inode));
+   key.objectid = btrfs_ino(inode);
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = em->start;
 
@@ -4148,7 +4148,7 @@ static int log_one_extent(struct btrfs_trans_handle 
*trans,
 
 static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
 struct btrfs_root *root,
-struct inode *inode,
+struct btrfs_inode *inode,
 struct btrfs_path *path,
 struct list_head *logged_list,
 struct btrfs_log_ctx *ctx,
@@ -4157,14 +4157,14 @@ static int btrfs_log_changed_extents(struct 
btrfs_trans_handle *trans,
 {
struct extent_map *em, *n;
struct list_head extents;
-   struct extent_map_tree *tree = _I(inode)->extent_tree;
+   struct extent_map_tree *tree = >extent_tree;
u64 test_gen;
int ret = 0;
int num = 0;
 
INIT_LIST_HEAD();
 
-   down_write(_I(inode)->dio_sem);
+   down_write(>dio_sem);
write_lock(>lock);
test_gen = root->fs_info->last_trans_committed;
 
@@ -4193,7 +4193,7 @@ static int btrfs_log_changed_extents(struct 
btrfs_trans_handle *trans,
}
 
list_sort(NULL, , extent_cmp);
-   btrfs_get_logged_extents(BTRFS_I(inode), logged_list, start, end);
+   btrfs_get_logged_extents(inode, logged_list, start, end);
/*
 * Some ordered extents started by fsync might have completed
 * before we could collect them into the list logged_list, which
@@ -4204,7 +4204,7 @@ static int btrfs_log_changed_extents(struct 
btrfs_trans_handle *trans,
 * without writing to the log tree and the fsync must report the
 * file data write error and not commit the current transaction.
 */
-   ret = filemap_check_errors(inode->i_mapping);
+   ret = filemap_check_errors(inode->vfs_inode.i_mapping);
if (ret)
ctx->io_err = ret;
 process:
@@ -4233,7 +4233,7 @@ static int btrfs_log_changed_extents(struct 
btrfs_trans_handle *trans,
}
WARN_ON(!list_empty());
write_unlock(>lock);
-   up_write(_I(inode)->dio_sem);
+   up_write(>dio_sem);
 
btrfs_release_path(path);
return ret;
@@ -4938,7 +4938,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle 
*trans,
goto out_unlock;
}
if (fast_search) {
-   ret = btrfs_log_changed_extents(trans, root, inode, dst_path,
+   ret = btrfs_log_changed_extents(trans, root, BTRFS_I(inode), 
dst_path,
_list, ctx, start, end);
if (ret) {
err = ret;
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHv2 13/24] btrfs: Make btrfs_get_logged_extents take btrfs_inode

2017-01-17 Thread Nikolay Borisov
Signed-off-by: Nikolay Borisov 
---
 fs/btrfs/ordered-data.c | 4 ++--
 fs/btrfs/ordered-data.h | 2 +-
 fs/btrfs/tree-log.c | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 041c3326d109..7ae350a64c77 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -432,7 +432,7 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
 }
 
 /* Needs to either be called under a log transaction or the log_mutex */
-void btrfs_get_logged_extents(struct inode *inode,
+void btrfs_get_logged_extents(struct btrfs_inode *inode,
  struct list_head *logged_list,
  const loff_t start,
  const loff_t end)
@@ -442,7 +442,7 @@ void btrfs_get_logged_extents(struct inode *inode,
struct rb_node *n;
struct rb_node *prev;
 
-   tree = _I(inode)->ordered_tree;
+   tree = >ordered_tree;
spin_lock_irq(>lock);
n = __tree_search(>tree, end, );
if (!n)
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 5f2b0ca28705..b02b71d41d83 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -201,7 +201,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int 
nr,
   const u64 range_start, const u64 range_len);
 int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
  const u64 range_start, const u64 range_len);
-void btrfs_get_logged_extents(struct inode *inode,
+void btrfs_get_logged_extents(struct btrfs_inode *inode,
  struct list_head *logged_list,
  const loff_t start,
  const loff_t end);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 9f2c42016825..0e061f91055e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4193,7 +4193,7 @@ static int btrfs_log_changed_extents(struct 
btrfs_trans_handle *trans,
}
 
list_sort(NULL, , extent_cmp);
-   btrfs_get_logged_extents(inode, logged_list, start, end);
+   btrfs_get_logged_extents(BTRFS_I(inode), logged_list, start, end);
/*
 * Some ordered extents started by fsync might have completed
 * before we could collect them into the list logged_list, which
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHv2 08/24] btrfs: Make logged_inode_size take btrfs_inode

2017-01-17 Thread Nikolay Borisov
Signed-off-by: Nikolay Borisov 
---
 fs/btrfs/tree-log.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index a7705173150e..20718cfebf89 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4241,13 +4241,13 @@ static int btrfs_log_changed_extents(struct 
btrfs_trans_handle *trans,
return ret;
 }
 
-static int logged_inode_size(struct btrfs_root *log, struct inode *inode,
+static int logged_inode_size(struct btrfs_root *log, struct btrfs_inode *inode,
 struct btrfs_path *path, u64 *size_ret)
 {
struct btrfs_key key;
int ret;
 
-   key.objectid = btrfs_ino(BTRFS_I(inode));
+   key.objectid = btrfs_ino(inode);
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
 
@@ -4699,7 +4699,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle 
*trans,
 * (zeroes), as if an expanding truncate happened,
 * instead of getting a file of 4Kb only.
 */
-   err = logged_inode_size(log, inode, path,
+   err = logged_inode_size(log, BTRFS_I(inode), path,
_isize);
if (err)
goto out_unlock;
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHv2 23/24] btrfs: Make count_inode_extrefs take btrfs_inode

2017-01-17 Thread Nikolay Borisov
Signed-off-by: Nikolay Borisov 
---
 fs/btrfs/tree-log.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 8c110d0e16c3..47e4f3610348 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1358,14 +1358,14 @@ static int insert_orphan_item(struct btrfs_trans_handle 
*trans,
 }
 
 static int count_inode_extrefs(struct btrfs_root *root,
-  struct inode *inode, struct btrfs_path *path)
+  struct btrfs_inode *inode, struct btrfs_path 
*path)
 {
int ret = 0;
int name_len;
unsigned int nlink = 0;
u32 item_size;
u32 cur_offset = 0;
-   u64 inode_objectid = btrfs_ino(BTRFS_I(inode));
+   u64 inode_objectid = btrfs_ino(inode);
u64 offset = 0;
unsigned long ptr;
struct btrfs_inode_extref *extref;
@@ -1487,7 +1487,7 @@ static noinline int fixup_inode_link_count(struct 
btrfs_trans_handle *trans,
 
nlink = ret;
 
-   ret = count_inode_extrefs(root, inode, path);
+   ret = count_inode_extrefs(root, BTRFS_I(inode), path);
if (ret < 0)
goto out;
 
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHv2 12/24] btrfs: Make btrfs_log_trailing_hole take btrfs_inode

2017-01-17 Thread Nikolay Borisov
Signed-off-by: Nikolay Borisov 
---
 fs/btrfs/tree-log.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 1301c517c2f0..9f2c42016825 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4372,7 +4372,7 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle 
*trans,
  */
 static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
   struct btrfs_root *root,
-  struct inode *inode,
+  struct btrfs_inode *inode,
   struct btrfs_path *path)
 {
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -4382,8 +4382,8 @@ static int btrfs_log_trailing_hole(struct 
btrfs_trans_handle *trans,
u64 hole_size;
struct extent_buffer *leaf;
struct btrfs_root *log = root->log_root;
-   const u64 ino = btrfs_ino(BTRFS_I(inode));
-   const u64 i_size = i_size_read(inode);
+   const u64 ino = btrfs_ino(inode);
+   const u64 i_size = i_size_read(>vfs_inode);
 
if (!btrfs_fs_incompat(fs_info, NO_HOLES))
return 0;
@@ -4925,7 +4925,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle 
*trans,
if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) {
btrfs_release_path(path);
btrfs_release_path(dst_path);
-   err = btrfs_log_trailing_hole(trans, root, inode, path);
+   err = btrfs_log_trailing_hole(trans, root, BTRFS_I(inode), 
path);
if (err)
goto out_unlock;
}
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHv2 18/24] btrfs: Make btrfs_unlink_inode take btrfs_inode

2017-01-17 Thread Nikolay Borisov
Signed-off-by: Nikolay Borisov 
---
 fs/btrfs/ctree.h|  2 +-
 fs/btrfs/inode.c| 58 ++---
 fs/btrfs/tree-log.c | 14 ++---
 3 files changed, 36 insertions(+), 38 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 6a823719b6c5..06d5e6388b4c 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3119,7 +3119,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, 
struct dentry *dentry);
 int btrfs_set_inode_index(struct inode *dir, u64 *index);
 int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
   struct btrfs_root *root,
-  struct inode *dir, struct inode *inode,
+  struct btrfs_inode *dir, struct btrfs_inode *inode,
   const char *name, int name_len);
 int btrfs_add_link(struct btrfs_trans_handle *trans,
   struct inode *parent_inode, struct inode *inode,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e86b08eabb82..ac433c43d242 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3996,7 +3996,7 @@ noinline int btrfs_update_inode_fallback(struct 
btrfs_trans_handle *trans,
  */
 static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
-   struct inode *dir, struct inode *inode,
+   struct btrfs_inode *dir, struct btrfs_inode 
*inode,
const char *name, int name_len)
 {
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -4006,8 +4006,8 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle 
*trans,
struct btrfs_dir_item *di;
struct btrfs_key key;
u64 index;
-   u64 ino = btrfs_ino(BTRFS_I(inode));
-   u64 dir_ino = btrfs_ino(BTRFS_I(dir));
+   u64 ino = btrfs_ino(inode);
+   u64 dir_ino = btrfs_ino(dir);
 
path = btrfs_alloc_path();
if (!path) {
@@ -4043,10 +4043,10 @@ static int __btrfs_unlink_inode(struct 
btrfs_trans_handle *trans,
 * that we delay to delete it, and just do this deletion when
 * we update the inode item.
 */
-   if (BTRFS_I(inode)->dir_index) {
-   ret = btrfs_delayed_delete_inode_ref(BTRFS_I(inode));
+   if (inode->dir_index) {
+   ret = btrfs_delayed_delete_inode_ref(inode);
if (!ret) {
-   index = BTRFS_I(inode)->dir_index;
+   index = inode->dir_index;
goto skip_backref;
}
}
@@ -4061,21 +4061,19 @@ static int __btrfs_unlink_inode(struct 
btrfs_trans_handle *trans,
goto err;
}
 skip_backref:
-   ret = btrfs_delete_delayed_dir_index(trans, fs_info, BTRFS_I(dir), 
index);
+   ret = btrfs_delete_delayed_dir_index(trans, fs_info, dir, index);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto err;
}
 
-   ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len,
-BTRFS_I(inode), dir_ino);
+   ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, inode, 
dir_ino);
if (ret != 0 && ret != -ENOENT) {
btrfs_abort_transaction(trans, ret);
goto err;
}
 
-   ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
-  BTRFS_I(dir), index);
+   ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, dir, 
index);
if (ret == -ENOENT)
ret = 0;
else if (ret)
@@ -4085,26 +4083,26 @@ static int __btrfs_unlink_inode(struct 
btrfs_trans_handle *trans,
if (ret)
goto out;
 
-   btrfs_i_size_write(dir, dir->i_size - name_len * 2);
-   inode_inc_iversion(inode);
-   inode_inc_iversion(dir);
-   inode->i_ctime = dir->i_mtime =
-   dir->i_ctime = current_time(inode);
-   ret = btrfs_update_inode(trans, root, dir);
+   btrfs_i_size_write(>vfs_inode, dir->vfs_inode.i_size - name_len * 
2);
+   inode_inc_iversion(>vfs_inode);
+   inode_inc_iversion(>vfs_inode);
+   inode->vfs_inode.i_ctime = dir->vfs_inode.i_mtime =
+   dir->vfs_inode.i_ctime = current_time(>vfs_inode);
+   ret = btrfs_update_inode(trans, root, >vfs_inode);
 out:
return ret;
 }
 
 int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
   struct btrfs_root *root,
-  struct inode *dir, struct inode *inode,
+  struct btrfs_inode *dir, struct btrfs_inode *inode,
   const char *name, int name_len)
 {
int ret;
ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
if (!ret) {
-   drop_nlink(inode);
-   ret = btrfs_update_inode(trans, root, 

[PATCHv2 00/24] tree-log inode vs btrfs_inode cleanups

2017-01-17 Thread Nikolay Borisov

So here is a new set of patches cleaning up tree-log function 
w.r.t inode vs btrfs_inode. There are still some which remain 
but I didn't find compelling arguments to cleaning them up, so 
I've left them unchanged. This time there are some size shrinkage:

   textdata bss dec hex filename
   2530598   174661   28288 2733547  29b5eb fs/btrfs/btrfs.ko - upstream 
master

text   data bss dec hex filename
2530774  174661   28288 2733723  29b69b fs/btrfs/btrfs.ko - before 
tree-log cleanup

text   data bss dec hex filename
2530163  174661   28288 2733112  29b438 fs/btrfs/btrfs.ko - both series 
applied 

So the net result of the 2 series is 435 bytes and I assume there 
will be further reduction in size once further cleanups are made 

Changes since v1: 
 * Rebased all patche to latest master

Nikolay Borisov (24):
  btrfs: Make btrfs_must_commit_transaction take btrfs_inode
  btrfs: Make btrfs_record_unlink_dir take btrfs_inode
  btrfs: Make btrfs_record_snapshot_destroy take btrfs_inode
  btrfs: Make btrfs_inode_in_log take btrfs_inode
  btrfs: Make btrfs_log_new_name take btrfs_inode
  btrfs: Make btrfs_del_dir_entries_in_log take btrfs_inode
  btrfs: Make btrfs_del_inode_ref take btrfs_inode
  btrfs: Make logged_inode_size take btrfs_inode
  btrfs: Make btrfs_check_ref_name_override take btrfs_inode
  btrfs: Make copy_items take btrfs_inode
  btrfs: Make btrfs_log_all_xattrs take btrfs_inode
  btrfs: Make btrfs_log_trailing_hole take btrfs_inode
  btrfs: Make btrfs_get_logged_extents take btrfs_inode
  btrfs: Make btrfs_log_changed_extents take btrfs_inode
  btrfs: Make log_dir_items take btrfs_inode
  btrfs: Make log_directory_changes take btrfs_inode
  btrfs: Make log_new_dir_dentries take btrfs_inode
  btrfs: Make btrfs_unlink_inode take btrfs_inode
  btrfs: Make drop_one_dir_item take btrfs_inode
  btrfs: Make __add_inode_ref take btrfs_inode
  btrfs: Make log_inode_item take btrfs_inode
  btrfs: Make btrfs_log_inode take btrfs_inode
  btrfs: Make count_inode_extrefs take btrfs_inode
  btrfs: Make count_inode_refs take btrfs_inode

 fs/btrfs/btrfs_inode.h  |  16 ++-
 fs/btrfs/ctree.h|   2 +-
 fs/btrfs/file.c |   2 +-
 fs/btrfs/inode.c|  90 ---
 fs/btrfs/ioctl.c|   2 +-
 fs/btrfs/ordered-data.c |   4 +-
 fs/btrfs/ordered-data.h |   2 +-
 fs/btrfs/tree-log.c | 288 +++-
 fs/btrfs/tree-log.h |  10 +-
 9 files changed, 201 insertions(+), 215 deletions(-)

-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHv2 03/24] btrfs: Make btrfs_record_snapshot_destroy take btrfs_inode

2017-01-17 Thread Nikolay Borisov
Signed-off-by: Nikolay Borisov 
---
 fs/btrfs/ioctl.c| 2 +-
 fs/btrfs/tree-log.c | 8 
 fs/btrfs/tree-log.h | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index e8e1f5f5f93a..7d1b5378de49 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2497,7 +2497,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file 
*file,
trans->block_rsv = _rsv;
trans->bytes_reserved = block_rsv.size;
 
-   btrfs_record_snapshot_destroy(trans, dir);
+   btrfs_record_snapshot_destroy(trans, BTRFS_I(dir));
 
ret = btrfs_unlink_subvol(trans, root, dir,
dest->root_key.objectid,
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 6f9a3beb7050..581d31171683 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -5794,11 +5794,11 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle 
*trans,
  * parent root and tree of tree roots trees, etc) are done.
  */
 void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
-  struct inode *dir)
+  struct btrfs_inode *dir)
 {
-   mutex_lock(_I(dir)->log_mutex);
-   BTRFS_I(dir)->last_unlink_trans = trans->transid;
-   mutex_unlock(_I(dir)->log_mutex);
+   mutex_lock(>log_mutex);
+   dir->last_unlink_trans = trans->transid;
+   mutex_unlock(>log_mutex);
 }
 
 /*
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index 69702eef9603..e08ce78b2ad4 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -83,7 +83,7 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
 struct btrfs_inode *dir, struct btrfs_inode *inode,
 int for_rename);
 void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
-  struct inode *dir);
+  struct btrfs_inode *dir);
 int btrfs_log_new_name(struct btrfs_trans_handle *trans,
struct inode *inode, struct inode *old_dir,
struct dentry *parent);
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Unocorrectable errors with RAID1

2017-01-17 Thread Chris Murphy
On Tue, Jan 17, 2017 at 1:25 PM, Christoph Groth
 wrote:
> Goldwyn Rodrigues wrote:
>>
>> On 01/17/2017 02:44 AM, Christoph Groth wrote:
>>>
>>> Goldwyn Rodrigues wrote:
>>>
 Would you be able to upload a btrfs-image for me to examine. This is a
 core ctree error where most probably item size is incorrectly
 registered.
>>>
>>>
>>> Sure, I can do that.  I'd like to use the -s option, will this be fine?
>>
>>
>> Yes, I think that should be fine.
>
>
> Unfortunately, giving -s causes btrfs-image to segfault.  I tried both
> btrfs-progs 4.7.3 and 4.4.  I also tried different compression levels.
>
> Without -s it works, but since this file system contains the complete
> digital life of our family, I would rather not share even the file names.
>
> Any ideas on what could be done?  If you need help to debug the problem with
> btrfs-image, please tell me what I should do.  I can keep the broken file
> system around until an image can be created at some later time.

Try 4.9, or even 4.8.5, tons of bugs have been fixed since 4.7.3
although I don't know off hand if this particular bug is fixed. I did
recently do a btrfs-image with btrfs-progs v4.9 with -s and did not
get a segfault.



-- 
Chris Murphy
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 8/8] Revert "ext4: fix wrong gfp type under transaction"

2017-01-17 Thread Andreas Dilger
On Jan 17, 2017, at 8:59 AM, Theodore Ts'o  wrote:
> 
> On Tue, Jan 17, 2017 at 04:18:17PM +0100, Michal Hocko wrote:
>> 
>> OK, so I've been staring into the code and AFAIU current->journal_info
>> can contain my stored information. I could either hijack part of the
>> word as the ref counting is only consuming low 12b. But that looks too
>> ugly to live. Or I can allocate some placeholder.
> 
> Yeah, I was looking at something similar.  Can you guarantee that the
> context will only take one or two bits?  (Looks like it only needs one
> bit ATM, even though at the moment you're storing the whole GFP mask,
> correct?)
> 
>> But before going to play with that I am really wondering whether we need
>> all this with no journal at all. AFAIU what Jack told me it is the
>> journal lock(s) which is the biggest problem from the reclaim recursion
>> point of view. What would cause a deadlock in no journal mode?
> 
> We still have the original problem for why we need GFP_NOFS even in
> ext2.  If we are in a writeback path, and we need to allocate memory,
> we don't want to recurse back into the file system's writeback path.
> Certainly not for the same inode, and while we could make it work if
> the mm was writing back another inode, or another superblock, there
> are also stack depth considerations that would make this be a bad
> idea.  So we do need to be able to assert GFP_NOFS even in no journal
> mode, and for any file system including ext2, for that matter.
> 
> Because of the fact that we're going to have to play games with
> current->journal_info, maybe this is something that I should take
> responsibility for, and to go through the the ext4 tree after the main
> patch series go through?  Maybe you could use xfs and ext2 as sample
> (simple) implementations?
> 
> My only ask is that the memalloc nofs context be a well defined N
> bits, where N < 16, and I'll find some place to put them (probably
> journal_info).

I think Dave was suggesting that the NOFS context allow a pointer to
an arbitrary struct, so that it is possible to dereference this in
the filesystem itself to determine if the recursion is safe or not.
That way, ext2 could store an inode pointer (if that is what it cares
about) and verify that writeback is not recursing on the same inode,
and XFS can store something different.  It would also need to store
some additional info (e.g. fstype or superblock pointer) so that it
can determine how to interpret the NOFS context pointer.

I think it makes sense to add a couple of void * pointers to the task
struct along with journal_info and leave it up to the filesystem to
determine how to use them.

Cheers, Andreas







signature.asc
Description: Message signed with OpenPGP using GPGMail


Re: Unocorrectable errors with RAID1

2017-01-17 Thread Christoph Groth

Goldwyn Rodrigues wrote:

On 01/17/2017 02:44 AM, Christoph Groth wrote:

Goldwyn Rodrigues wrote:

Would you be able to upload a btrfs-image for me to 
examine. This is a
core ctree error where most probably item size is incorrectly 
registered.


Sure, I can do that.  I'd like to use the -s option, will this 
be fine? 


Yes, I think that should be fine.


Unfortunately, giving -s causes btrfs-image to segfault.  I tried 
both btrfs-progs 4.7.3 and 4.4.  I also tried different 
compression levels.


Without -s it works, but since this file system contains the 
complete digital life of our family, I would rather not share even 
the file names.


Any ideas on what could be done?  If you need help to debug the 
problem with btrfs-image, please tell me what I should do.  I can 
keep the broken file system around until an image can be created 
at some later time.


signature.asc
Description: PGP signature


Re: [PATCH 8/8] Revert "ext4: fix wrong gfp type under transaction"

2017-01-17 Thread Jan Kara
On Tue 17-01-17 17:16:19, Michal Hocko wrote:
> > > But before going to play with that I am really wondering whether we need
> > > all this with no journal at all. AFAIU what Jack told me it is the
> > > journal lock(s) which is the biggest problem from the reclaim recursion
> > > point of view. What would cause a deadlock in no journal mode?
> > 
> > We still have the original problem for why we need GFP_NOFS even in
> > ext2.  If we are in a writeback path, and we need to allocate memory,
> > we don't want to recurse back into the file system's writeback path.
> 
> But we do not enter the writeback path from the direct reclaim. Or do
> you mean something other than pageout()'s mapping->a_ops->writepage?
> There is only try_to_release_page where we get back to the filesystems
> but I do not see any NOFS protection in ext4_releasepage.

Maybe to expand a bit: These days, direct reclaim can call ->releasepage()
callback, ->evict_inode() callback (and only for inodes with i_nlink > 0),
shrinkers. That's it. So the recursion possibilities are rather more limited
than they used to be several years ago and we likely do not need as much
GFP_NOFS protection as we used to.

Honza
-- 
Jan Kara 
SUSE Labs, CR
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 00/24] tree-log inode vs btrfs_inode cleanups

2017-01-17 Thread David Sterba
On Thu, Jan 12, 2017 at 04:00:26PM +0200, Nikolay Borisov wrote:
> So here is a new set of patches cleaning up tree-log function 
> w.r.t inode vs btrfs_inode. There are still some which remain 
> but I didn't find compelling arguments to cleaning them up, so 
> I've left them unchanged. This time there are some size shrinkage:
> 
>text  data bss dec hex filename
>2530598 174661   28288 2733547  29b5eb fs/btrfs/btrfs.ko - upstream 
> master
> 
>   text   data bss dec hex filename
>   2530774  174661   28288 2733723  29b69b fs/btrfs/btrfs.ko - before 
> tree-log cleanup
> 
> text data bss dec hex filename
>   2530163  174661   28288 2733112  29b438 fs/btrfs/btrfs.ko - both series 
> applied 
> 
> So the net result of the 2 series is 435 bytes and I assume there 
> will be further reduction in size once further cleanups are made 

Thanks. I was about to apply the series but patch 06/24 fails to apply
on anytihing that I could use (master, cmason's integration or the
cleanups-next branch). Can you please refresh it on top of master? The
conflict looks like the patch tries to apply the same change twice to
btrfs_del_dir_entries_in_log, so it would be better is you check.
Thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 8/8] Revert "ext4: fix wrong gfp type under transaction"

2017-01-17 Thread Michal Hocko
On Tue 17-01-17 10:59:16, Theodore Ts'o wrote:
> On Tue, Jan 17, 2017 at 04:18:17PM +0100, Michal Hocko wrote:
> > 
> > OK, so I've been staring into the code and AFAIU current->journal_info
> > can contain my stored information. I could either hijack part of the
> > word as the ref counting is only consuming low 12b. But that looks too
> > ugly to live. Or I can allocate some placeholder.
> 
> Yeah, I was looking at something similar.  Can you guarantee that the
> context will only take one or two bits?  (Looks like it only needs one
> bit ATM, even though at the moment you're storing the whole GFP mask,
> correct?)

No, I am just storing PF_MEMALLOC_NO{FS,IO} but I assume further changes
might want to pull in more changes into the scope context.

> > But before going to play with that I am really wondering whether we need
> > all this with no journal at all. AFAIU what Jack told me it is the
> > journal lock(s) which is the biggest problem from the reclaim recursion
> > point of view. What would cause a deadlock in no journal mode?
> 
> We still have the original problem for why we need GFP_NOFS even in
> ext2.  If we are in a writeback path, and we need to allocate memory,
> we don't want to recurse back into the file system's writeback path.

But we do not enter the writeback path from the direct reclaim. Or do
you mean something other than pageout()'s mapping->a_ops->writepage?
There is only try_to_release_page where we get back to the filesystems
but I do not see any NOFS protection in ext4_releasepage.

> Certainly not for the same inode, and while we could make it work if
> the mm was writing back another inode, or another superblock, there
> are also stack depth considerations that would make this be a bad
> idea.  So we do need to be able to assert GFP_NOFS even in no journal
> mode, and for any file system including ext2, for that matter.
> 
> Because of the fact that we're going to have to play games with
> current->journal_info, maybe this is something that I should take
> responsibility for, and to go through the the ext4 tree after the main
> patch series go through?

How do you see a possibility that we would handle nojournal mode on
top of "[PATCH 5/8] jbd2: mark the transaction context with the scope
GFP_NOFS context" in a separate patch?

But anyway, I agree that we should go with the API sooner rather than
later.

>   Maybe you could use xfs and ext2 as sample
> (simple) implementations?
> 
> My only ask is that the memalloc nofs context be a well defined N
> bits, where N < 16, and I'll find some place to put them (probably
> journal_info).

I am pretty sure that we won't need more than a bit or two in a
foreseeable future (I can think of GFP_NOWAIT being one candidate).
-- 
Michal Hocko
SUSE Labs
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [LSF/MM ATTEND] blk-mq I/O scheduling, Btrfs, VFS topics

2017-01-17 Thread Jens Axboe
On 01/13/2017 08:24 PM, Omar Sandoval wrote:
> Hi,
> 
> I'd like to attend LSF/MM again this year to discuss topics in blk-mq,
> Btrfs, and the VFS.
> 
> I've been working on the blk-mq I/O scheduling framework [1] with Jens.
> Once that is finalized, the next step is a proper multiqueue scheduler.
> I've put together a prototype with a few basic ideas [2], but there's
> still work to be done. It's hard to say where exactly we'll be in time
> for LSF/MM since things are moving pretty fast, but I'm sure there will
> be something to discuss.

I'd be interested in joining that session too, for obvious reasons.

-- 
Jens Axboe

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 8/8] Revert "ext4: fix wrong gfp type under transaction"

2017-01-17 Thread Theodore Ts'o
On Tue, Jan 17, 2017 at 04:18:17PM +0100, Michal Hocko wrote:
> 
> OK, so I've been staring into the code and AFAIU current->journal_info
> can contain my stored information. I could either hijack part of the
> word as the ref counting is only consuming low 12b. But that looks too
> ugly to live. Or I can allocate some placeholder.

Yeah, I was looking at something similar.  Can you guarantee that the
context will only take one or two bits?  (Looks like it only needs one
bit ATM, even though at the moment you're storing the whole GFP mask,
correct?)

> But before going to play with that I am really wondering whether we need
> all this with no journal at all. AFAIU what Jack told me it is the
> journal lock(s) which is the biggest problem from the reclaim recursion
> point of view. What would cause a deadlock in no journal mode?

We still have the original problem for why we need GFP_NOFS even in
ext2.  If we are in a writeback path, and we need to allocate memory,
we don't want to recurse back into the file system's writeback path.
Certainly not for the same inode, and while we could make it work if
the mm was writing back another inode, or another superblock, there
are also stack depth considerations that would make this be a bad
idea.  So we do need to be able to assert GFP_NOFS even in no journal
mode, and for any file system including ext2, for that matter.

Because of the fact that we're going to have to play games with
current->journal_info, maybe this is something that I should take
responsibility for, and to go through the the ext4 tree after the main
patch series go through?  Maybe you could use xfs and ext2 as sample
(simple) implementations?

My only ask is that the memalloc nofs context be a well defined N
bits, where N < 16, and I'll find some place to put them (probably
journal_info).

Thanks,

- Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs-progs: Corruption-framework: Include inode fields

2017-01-17 Thread David Sterba
On Thu, Jan 05, 2017 at 06:03:58PM +0100, Lakshmipathi.G wrote:
> Signed-off-by: Lakshmipathi.G 

Applied, thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/2] btrfs-progs: cmds-check.c: supports inode isize fix in lowmem

2017-01-17 Thread David Sterba
On Mon, Jan 09, 2017 at 01:38:08PM +0800, Su Yue wrote:
> Add a function 'repair_inode_isize' to support inode isize repair.

Similar comments to this patch, missng path init and the error message
level.

> Signed-off-by: Su Yue 
> ---
>  cmds-check.c | 49 -
>  1 file changed, 48 insertions(+), 1 deletion(-)
> 
> diff --git a/cmds-check.c b/cmds-check.c
> index 567ca80..088c0d8 100644
> --- a/cmds-check.c
> +++ b/cmds-check.c
> @@ -2457,6 +2457,45 @@ out:
>  }
>  
>  /*
> + * Set inode's isize to correct value in @info

Please make it more detailed why the new value is correct one.

> + *
> + * Returns <0  means on error
> + * Returns  0  means successful repair
> + */
> +static int repair_inode_isize_lowmem(struct btrfs_trans_handle *trans,
> +   struct btrfs_root *root,
> +   struct inode_item_fix_info *info)
> +{
> + struct btrfs_inode_item *ei;

'ei' looks like a copy-paste from some code that uses extent item, if
the variable name should be a mnemonic, so please use 'ii'.

> + struct btrfs_key key;
> + struct btrfs_path path;
> + int ret;
> +
> + ASSERT(info);
> + key.objectid = info->ino;
> + key.type = BTRFS_INODE_ITEM_KEY;
> + key.offset = 0;
> +
> + ret = btrfs_search_slot(trans, root, , , 0, 1);
> + if (ret < 0)
> + goto out;
> + if (ret > 0) {
> + ret = -ENOENT;
> + goto out;
> + }
> +
> + ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
> + struct btrfs_inode_item);
> + btrfs_set_inode_size(path.nodes[0], ei, info->isize);
> + btrfs_mark_buffer_dirty(path.nodes[0]);
> + printf("reset isize for inode %llu root %llu\n", info->ino,
> +root->root_key.objectid);
> +out:
> + btrfs_release_path();
> + return ret;
> +}
> +
> +/*
>   * repair_inode_item - repair inode item errors
>   *
>   * Repair the inode item if error can be repaired. Any caller should compare
> @@ -2484,7 +2523,7 @@ static int repair_inode_item(struct btrfs_root *root,
>   ret = 0;
>   goto out;
>   }
> - if (!(err & NBYTES_ERROR)) {
> + if (!(err & NBYTES_ERROR) && !(err & ISIZE_ERROR)) {
>   warning("root %llu INODE[%llu] have error(s) can't repair, 
> error : %d",
>   root->objectid, info->ino, err);
>   /* can't fix any errors, ret should be positive */
> @@ -2505,6 +2544,13 @@ static int repair_inode_item(struct btrfs_root *root,
>   else if (ret < 0)
>   goto out;
>   }
> + if (err & ISIZE_ERROR) {
> + ret = repair_inode_isize_lowmem(trans, root, info);
> + if (ret == 0)
> + err &= ~ISIZE_ERROR;
> + else if (ret < 0)
> + goto out;
> + }
>  
>   if (err != info->err) {
>   info->err = err;
> @@ -5039,6 +5085,7 @@ out:
>  
>   if (isize != size) {
>   err |= ISIZE_ERROR;
> + info->isize = size;
>   error("root %llu DIR INODE [%llu] size(%llu) not equal 
> to %llu",
> root->objectid, inode_id, isize, size);
>   }
> -- 
> 2.11.0
> 
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Lsf-pc] [LSF/MM TOPIC] sharing pages between mappings

2017-01-17 Thread Jan Kara
On Wed 11-01-17 15:13:19, Miklos Szeredi wrote:
> On Wed, Jan 11, 2017 at 12:51 PM, Jan Kara  wrote:
> > On Wed 11-01-17 11:29:28, Miklos Szeredi wrote:
> >> I know there's work on this for xfs, but could this be done in generic mm
> >> code?
> >>
> >> What are the obstacles?  page->mapping and page->index are the obvious
> >> ones.
> >
> > Yes, these two are the main that come to my mind. Also you'd need to
> > somehow share the mapping->i_mmap tree so that unmap_mapping_range() works.
> >
> >> If that's too difficult is it maybe enough to share mappings between
> >> files while they are completely identical and clone the mapping when
> >> necessary?
> >
> > Well, but how would the page->mapping->host indirection work? Even if you
> > have identical contents of the mappings, you still need to be aware there
> > are several inodes behind them and you need to pick the right one
> > somehow...
> 
> When do we actually need page->mapping->host?  The only place where
> it's not available is page writeback.  Then we can know that the
> original page was already cow-ed and after being cowed, the page
> belong only to a single inode.

Yeah, in principle the information may exist, however propagating it to all
appropriate place may be a mess.

> What then happens if the newly written data is cloned before being
> written back?   We can either write back the page during the clone, so
> that only clean pages are ever shared.  Or we can let dirty pages be
> shared between inodes.

The former is what I'd suggest for sanity... I.e. share only read-only
pages.

> In that latter case the question is: do we
> care about which inode we use for writing back the data?  Is the inode
> needed at all?  I don't know enough about filesystem internals to see
> clearly what happens in such a situation.
> 
> >> All COW filesystems would benefit, as well as layered ones: lots of
> >> fuse fs, and in some cases overlayfs too.
> >>
> >> Related:  what can DAX do in the presence of cloned block?
> >
> > For DAX handling a block COW should be doable if that is what you are
> > asking about. Handling of blocks that can be written to while they are
> > shared will be rather difficult (you have problems with keeping dirty bits
> > in the radix tree consistent if nothing else).
> 
> What happens if you do:
> 
> - clone_file_range(A, off1, B, off2, len);
> 
> - mmap both A and B using DAX.
> 
> The mapping will contain the same struct page for two different mappings, no?

Not the same struct page, as DAX does not have pages with struct page.
However the same pfn will be underlying off1 of A and off2 of B. And for
reads this is just fine. Once you want to write, you have to make sure you
COW before you start modifying the data or you'll get data corruption (we
synchronize operations using the exceptional entries in mapping->page_tree
in DAX and these are separate for A and B).

Honza
-- 
Jan Kara 
SUSE Labs, CR
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] btrfs-progs: cmds-check.c: supports inode nbytes fix in lowmem

2017-01-17 Thread David Sterba
Hi,

I have some comments, see below.

On Mon, Jan 09, 2017 at 01:38:07PM +0800, Su Yue wrote:
> Added 'repair_inode_item' which dispatches functions such as
> 'repair_inode__nbytes_lowmem' to correct errors and
> 'struct inode_item_fix_info' to store correct values and errors.
> 
> Signed-off-by: Su Yue 
> ---
>  cmds-check.c | 161 
> +++
>  1 file changed, 152 insertions(+), 9 deletions(-)
> 
> diff --git a/cmds-check.c b/cmds-check.c
> index 1dba298..567ca80 100644
> --- a/cmds-check.c
> +++ b/cmds-check.c
> @@ -371,6 +371,17 @@ struct root_item_info {
>  };
>  
>  /*
> + * Use inode_item_fix_info as function check_inode_item's arg.
> + */
> +struct inode_item_fix_info {
> + u64 ino;
> + u64 isize;
> + u64 nbytes;
> +
> + int err;
> +};
> +
> +/*
>   * Error bit for low memory mode check.
>   *
>   * Currently no caller cares about it yet.  Just internal use for error
> @@ -1866,13 +1877,16 @@ struct node_refs {
>  static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
>struct node_refs *nrefs, u64 level);
>  static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
> - unsigned int ext_ref);
> -
> + unsigned int ext_ref,
> + struct inode_item_fix_info *info);
> +static int repair_inode_item(struct btrfs_root *root,
> +  struct inode_item_fix_info *info);
>  static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path 
> *path,
>  struct node_refs *nrefs, int *level, int ext_ref)
>  {
>   struct extent_buffer *cur = path->nodes[0];
>   struct btrfs_key key;
> + struct inode_item_fix_info info;
>   u64 cur_bytenr;
>   u32 nritems;
>   u64 first_ino = 0;
> @@ -1881,6 +1895,7 @@ static int process_one_leaf_v2(struct btrfs_root *root, 
> struct btrfs_path *path,
>   int ret = 0; /* Final return value */
>   int err = 0; /* Positive error bitmap */
>  
> + memset(, 0, sizeof(info));
>   cur_bytenr = cur->start;
>  
>   /* skip to first inode item or the first inode number change */
> @@ -1900,8 +1915,26 @@ static int process_one_leaf_v2(struct btrfs_root 
> *root, struct btrfs_path *path,
>   path->slots[0] = i;
>  
>  again:
> - err |= check_inode_item(root, path, ext_ref);
> + err |= check_inode_item(root, path, ext_ref, );
> +
> + if (repair && (err & ~LAST_ITEM)) {
> + ret = repair_inode_item(root, );
>  
> + if (ret < 0)
> + goto out;
> + /*
> +  * if some errors was repaired, path shall be searched
> +  * again since path has been changed
> +  */
> + if (ret == 0) {
> + btrfs_item_key_to_cpu(path->nodes[0], ,
> +   path->slots[0]);
> + btrfs_release_path(path);
> + btrfs_search_slot(NULL, root, , path, 0, 0);
> +
> + cur = path->nodes[0];
> + }
> + }
>   if (err & LAST_ITEM)
>   goto out;
>  
> @@ -2211,7 +2244,8 @@ out:
>  }
>  
>  static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
> - unsigned int ext_ref);
> + unsigned int ext_ref,
> + struct inode_item_fix_info *info);
>  
>  static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path 
> *path,
>int *level, struct node_refs *nrefs, int ext_ref)
> @@ -2293,7 +2327,7 @@ static int walk_down_tree_v2(struct btrfs_root *root, 
> struct btrfs_path *path,
>   }
>  
>   ret = check_child_node(root, cur, path->slots[*level], next);
> - if (ret < 0) 
> + if (ret < 0)
>   break;
>  
>   if (btrfs_is_leaf(next))
> @@ -2383,6 +2417,105 @@ out:
>   return ret;
>  }
>  
> +/*
> + * Set inode's nbytes to correct value in @info
> + *
> + * Returns <0  means on error
> + * Returns  0  means successful repair
> + */
> +static int repair_inode_nbytes_lowmem(struct btrfs_trans_handle *trans,
> +   struct btrfs_root *root,
> +   struct inode_item_fix_info *info)
> +{
> + struct btrfs_inode_item *ei;
> + struct btrfs_key key;
> + struct btrfs_path path;
> + int ret;
> +
> + ASSERT(info);
> + key.objectid = info->ino;
> + key.type = BTRFS_INODE_ITEM_KEY;
> + key.offset = 0;

The path init call is missing here.

> +
> + ret = btrfs_search_slot(trans, root, , , 0, 1);
> + if (ret < 0)
> + goto out;
> + if (ret > 0) {
> + ret = -ENOENT;
> + goto out;
> + }
> +
> + ei = btrfs_item_ptr(path.nodes[0], 

Re: [PATCH 8/8] Revert "ext4: fix wrong gfp type under transaction"

2017-01-17 Thread Michal Hocko
On Tue 17-01-17 09:24:25, Michal Hocko wrote:
> On Mon 16-01-17 21:56:07, Theodore Ts'o wrote:
> > On Fri, Jan 06, 2017 at 03:11:07PM +0100, Michal Hocko wrote:
> > > From: Michal Hocko 
> > > 
> > > This reverts commit 216553c4b7f3e3e2beb4981cddca9b2027523928. Now that
> > > the transaction context uses memalloc_nofs_save and all allocations
> > > within the this context inherit GFP_NOFS automatically, there is no
> > > reason to mark specific allocations explicitly.
> > > 
> > > This patch should not introduce any functional change. The main point
> > > of this change is to reduce explicit GFP_NOFS usage inside ext4 code
> > > to make the review of the remaining usage easier.
> > > 
> > > Signed-off-by: Michal Hocko 
> > > Reviewed-by: Jan Kara 
> > 
> > Changes in the jbd2 layer aren't going to guarantee that
> > memalloc_nofs_save() will be executed if we are running ext4 without a
> > journal (aka in no journal mode).  And this is a *very* common
> > configuration; it's how ext4 is used inside Google in our production
> > servers.
> 
> OK, I wasn't aware of that.
> 
> > So that means the earlier patches will probably need to be changed so
> > the nOFS scope is done in the ext4_journal_{start,stop} functions in
> > fs/ext4/ext4_jbd2.c.
> 
> I could definitely appreciated some help here. The call paths are rather
> complex and I am not familiar with the code enough. On of the biggest
> problem I have currently is that there doesn't seem to be an easy place
> to store the old allocation context. 

OK, so I've been staring into the code and AFAIU current->journal_info
can contain my stored information. I could either hijack part of the
word as the ref counting is only consuming low 12b. But that looks too
ugly to live. Or I can allocate some placeholder.

But before going to play with that I am really wondering whether we need
all this with no journal at all. AFAIU what Jack told me it is the
journal lock(s) which is the biggest problem from the reclaim recursion
point of view. What would cause a deadlock in no journal mode?

-- 
Michal Hocko
SUSE Labs
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btfs-progs: fsck-tests: corrupt nlink value test

2017-01-17 Thread David Sterba
On Mon, Jan 16, 2017 at 09:35:52AM -0700, lakshmipath...@giis.co.in wrote:
> If btrfs-corrupt-block is in bad shape, then corruption scripts around 
> them won't help in long term.
> 
> Yes, documentation for btrfs-corrupt-block needs improvement.  imo, 
> re-arranged priority will be like : (5), (1)/(3) then (4).  Agree that 
> some corner cases, having static image is best option, i think the 
> corruption test-case needs to be mixture of both static-images and 
> scripts.

Both approaches have their pros and cons so I'll accept both. The
functionality provided by the corrupt block utility can be used, any
changes to the command line UI will be also applied to the test scripts.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs-progs: fsck-tests: missing csum test script

2017-01-17 Thread David Sterba
It's not clear from the test what's the purpose. There's one corrupted
csum but the whole csum tree rebuild option is used. This is a pretty
basic check that the --init-csum-tree works, so it should be mentioned
somewhere in the test script.

On Thu, Jan 05, 2017 at 08:26:36PM +0100, Lakshmipathi.G wrote:
> Signed-off-by: Lakshmipathi.G 
> ---
>  tests/fsck-tests/027-missing-data-csum/test.sh | 39 
> ++
>  1 file changed, 39 insertions(+)
>  create mode 100755 tests/fsck-tests/027-missing-data-csum/test.sh
> 
> diff --git a/tests/fsck-tests/027-missing-data-csum/test.sh 
> b/tests/fsck-tests/027-missing-data-csum/test.sh
> new file mode 100755
> index 000..6d1dc97
> --- /dev/null
> +++ b/tests/fsck-tests/027-missing-data-csum/test.sh
> @@ -0,0 +1,39 @@
> +#!/bin/bash
> +
> +source $TOP/tests/common
> +
> +check_prereq btrfs-corrupt-block
> +check_prereq mkfs.btrfs
> +check_prereq btrfs
> +
> +setup_root_helper
> +prepare_test_dev 512M
> +
> +
> +# simulate missing csum error and repair using init-csum option
> +test_csum_corruption()
> +{
> + run_check $SUDO_HELPER $TOP/mkfs.btrfs -f $TEST_DEV
> +
> + run_check_mount_test_dev
> +
> + export DATASET_SIZE=1
> + generate_dataset small
> +
> + run_check_umount_test_dev
> +
> + # find bytenr
> + bytenr=`$SUDO_HELPER $TOP/btrfs-debug-tree $TEST_DEV | \

Please don't use btrfs-debug-tree, it's been obsoleted by the dump-tree
subcommand. And you can use the '-t csum' option to dump just the csum
tree.

> + grep "EXTENT_CSUM EXTENT_CSUM" | \
> + cut -f1 -d')' | awk '{print $6}'`

This can be simplified to one sed command.

> +
> + # corrupt csum bytenr
> + run_check $SUDO_HELPER $TOP/btrfs-corrupt-block -C $bytenr $TEST_DEV
> +
> + $SUDO_HELPER $TOP/btrfs check $TEST_DEV >& /dev/null && \
> + _fail "btrfs check failed to detect missing csum."

Here we want to capture the output from 'check' for analysis in case of
problems.

> + run_check $SUDO_HELPER $TOP/btrfs check --repair --init-csum $TEST_DEV
> + run_check $SUDO_HELPER $TOP/btrfs check  $TEST_DEV
> +}
> +
> +test_csum_corruption
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs-progs: Corruption-framework: Include inode nlink field

2017-01-17 Thread David Sterba
On Thu, Jan 05, 2017 at 11:08:32AM +0100, Lakshmipathi.G wrote:
> Patch with fix for David Sterba review comment.
> 
> Signed-off-by: Lakshmipathi.G 

Applied, thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Unocorrectable errors with RAID1

2017-01-17 Thread Austin S. Hemmelgarn

On 2017-01-17 04:18, Christoph Groth wrote:

Austin S. Hemmelgarn wrote:


There's not really much in the way of great documentation that I know
of.  I can however cover the basics here:

(...)


Thanks for this explanation.  I'm sure it will be also useful to others.

Glad I could help.



If the chunk to be allocated was a data chunk, you get -ENOSPC
(usually, sometimes you might get other odd results) in the userspace
application that triggered the allocation.


It seems that the available space reported by the system df command
corresponds roughly to the size of the block device minus all the "used"
space as reported by "btrfs fi df".

That's correct.


If I understand what you wrote correctly this means that when writing a
huge file it may happen that the system df will report enough free
space, but btrfs will raise ENOSPC.  However, it should be possible to
keep writing small files even at this point (assuming that there's
enough space for the metadata).  Or will btrfs split the huge file into
small pieces to fit it into the fragmented free space in the chunks?
OK, so the first bit to understanding this is that an extent in a file 
can't be larger than a chunk.  This means that if you have space for 3 
1GB data chunks located in 3 different places on the storage device, you 
can still write a 3GB file to the filesystem, it will just end up with 3 
1GB extents.  The issues with ENOSPC come in when almost all of your 
space is allocated to chunks and one type gets full.  In such a 
situation, if you have metadata space, you can keep writing to the FS, 
but big writes may fail, and you'll eventually end up in a situation 
where you need to delete things to free up space.


Such a situation should be avoided of course.  I'm asking out of curiosity.


* So scrubbing is not enough to check the health of a btrfs file
system?  It’s also necessary to read all the files?



Scrubbing checks data integrity, but not the state of the data. IOW,
you're checking that the data and metadata match with the checksums,
but not necessarily that the filesystem itself is valid.


I see, but what should one then do to detect problems such as mine as
soon as possible?  Periodically calculate hashes for all files? I’ve
never seen a recommendation to do that for btrfs.



Scrub will verify that the data is the same as when the kernel
calculated the block checksum.  That's really the best that can be
done. In your case, it couldn't correct the errors because both copies
of the corrupted blocks were bad (this points at an issue with either
RAM or the storage controller BTW, not the disks themselves).  Had one
of the copies been valid, it would have intelligently detected which
one was bad and fixed things.


I think I understand the problem with the three corrupted blocks that I
was able to fix by replacing the files.

But there is also the strange "Stale file handle" error with some other
files that was not found by scrubbing, and also does not seem to appear
in the output of "btrfs dev stats", which is BTW

[/dev/sda2].write_io_errs   0
[/dev/sda2].read_io_errs0
[/dev/sda2].flush_io_errs   0
[/dev/sda2].corruption_errs 3
[/dev/sda2].generation_errs 0
[/dev/sdb2].write_io_errs   0
[/dev/sdb2].read_io_errs0
[/dev/sdb2].flush_io_errs   0
[/dev/sdb2].corruption_errs 3
[/dev/sdb2].generation_errs 0

(The 2 times 3 corruption errors seem to be the uncorrectable errors
that I could fix by replacing the files.)
Yep, those correspond directly to the uncorrectable errors you mentioned 
in your original post.


To get the "stale file handle" error I need to try to read the affected
file.  That's why I was wondering whether reading all the files
periodically is indeed a useful maintenance procedure with btrfs.
In the cases I've seen, no it isn't all that useful.  As far as the 
whole ESTALE thing, that's almost certainly a bug and you either 
shouldn't be getting an error there, or you shouldn't be getting that 
error code there.


"btrfs check" does find the problem, but it can be only run on an
unmounted file system.


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Unocorrectable errors with RAID1

2017-01-17 Thread Austin S. Hemmelgarn

On 2017-01-16 23:50, Janos Toth F. wrote:

BTRFS uses a 2 level allocation system.  At the higher level, you have
chunks.  These are just big blocks of space on the disk that get used for
only one type of lower level allocation (Data, Metadata, or System).  Data
chunks are normally 1GB, Metadata 256MB, and System depends on the size of
the FS when it was created.  Within these chunks, BTRFS then allocates
individual blocks just like any other filesystem.


This always seems to confuse me when I try to get an abstract idea
about de-/fragmentation of Btrfs.
Can meta-/data be fragmented on both levels? And if so, can defrag
and/or balance "cure" both levels of fragmentation (if any)?
But how? May be several defrag and balance runs, repeated until
returns diminish (or at least you consider them meaningless and/or
unnecessary)?
Defrag operates only at the block level.  It won't allocate chunks 
unless it has to, and it won't remove chunks unless they become empty 
from it moving things around (although that's not likely to happen most 
of the time).  Balance functionally operates at both levels, but it 
doesn't really do any defragmentation.  Balance _may_ merge extents 
sometimes, but I'm not sure of this.  It will compact allocations and 
therefore functionally defragment free space within chunks (though not 
necessarily at the chunk-level itself).


Defrag run with the same options _should_ have no net effect after the 
first run, the two exceptions being if the filesystem is close to full 
or if the data set is being modified live while the defrag is happening. 
 Balance run with the same options will eventually hit a point where it 
doesn't do anything (or only touches one chunk of each type but doesn't 
actually give any benefit).  If you're just using the usage filters or 
doing a full balance, this point is the second run.  If you're using 
other filters, it's functionally not possible to determine when that 
point will be without low-level knowledge of the chunk layout.


For an idle filesystem, if you run defrag then a full balance, that will 
get you a near optimal layout.  Running them in the reverse order will 
get you a different layout that may be less optimal than running defrag 
first because defrag may move data in such a way that new chunks get 
allocated.  Repeated runs of defrag and balance will in more than 95% of 
cases provide no extra benefit.




What balancing does is send everything back through the allocator, which in
turn back-fills chunks that are only partially full, and removes ones that
are now empty.


Does't this have a potential chance of introducing (additional)
extent-level fragmentation?
In theory, yes.  IIRC, extents can't cross a chunk boundary.  Beyond 
that packing constraint, balance shouldn't fragment things further.



FWIW, while there isn't a daemon yet that does this, it's a perfect thing
for a cronjob.  The general maintenance regimen that I use for most of my
filesystems is:
* Run 'btrfs balance start -dusage=20 -musage=20' daily.  This will complete
really fast on most filesystems, and keeps the slack-space relatively
under-control (and has the nice bonus that it helps defragment free space.
* Run a full scrub on all filesystems weekly.  This catches silent
corruption of the data, and will fix it if possible.
* Run a full defrag on all filesystems monthly.  This should be run before
the balance (reasons are complicated and require more explanation than you
probably care for).  I would run this at least weekly though on HDD's, as
they tend to be more negatively impacted by fragmentation.


I wonder if one should always run a full balance instead of a full
scrub, since balance should also read (and thus theoretically verify)
the meta-/data (does it though? I would expect it to check the
chekcsums, but who knows...? may be it's "optimized" to skip that
step?) and also perform the "consolidation" of the chunk level.
Scrub uses fewer resources than balance.  Balance has to read _and_ 
re-write all data in the FS regardless of the state of the data.  Scrub 
only needs to read the data if it's good, and if it's bad it only (for 
raid1) has to re-write the replica that's bad, not both of them.  In 
fact, the only practical reason to run balance on a regular basis at all 
is to compact allocations and defragment free space.  This is why I only 
have it balance chunks that are less than 1/5 full.


I wish there was some more "integrated" solution for this: a
balance-like operation which consolidates the chunks and also
de-fragments the file extents at the same time while passively
uncovers (and fixes if necessary and possible) any checksum mismatches
/ data errors, so that balance and defrag can't work against
each-other and the overall work is minimized (compared to several full
runs or many different commands).
More than 90% of the time, the performance difference between the 
absolute optimal layout and the one generated by just running defrag 
then balancing is 

Re: Unocorrectable errors with RAID1

2017-01-17 Thread Goldwyn Rodrigues


On 01/17/2017 02:44 AM, Christoph Groth wrote:
> Goldwyn Rodrigues wrote:
> 
>> Would you be able to upload a btrfs-image for me to examine. This is a
>> core ctree error where most probably item size is incorrectly registered.
> 
> Sure, I can do that.  I'd like to use the -s option, will this be fine? 

Yes, I think that should be fine.

> Is there some preferred place for the upload?  If not, I can use
> personal webspace.

No, there is no preferred place. As far as I can download it, it is fine.

-- 
Goldwyn



signature.asc
Description: OpenPGP digital signature


Re: corruption: yet another one after deleting a ro snapshot

2017-01-17 Thread Christoph Anton Mitterer
Am 17. Januar 2017 09:53:19 MEZ schrieb Qu Wenruo :
>Just lowmem false alert, as extent-tree dump shows complete fine
>result.
>
>I'll CC you and adds your reported-by tag when there is any update on 
>this case.

Fine, just one thing left right more from my side on this issue:
Do you want me to leave the fs untouched until I could verify a lowmem mode fix?
Or is it ok to go on using it (and running backups on it)? 

Cheers,
Chris.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Unocorrectable errors with RAID1

2017-01-17 Thread Christoph Groth

Austin S. Hemmelgarn wrote:

There's not really much in the way of great documentation that I 
know of.  I can however cover the basics here:


(...)


Thanks for this explanation.  I'm sure it will be also useful to 
others.


If the chunk to be allocated was a data chunk, you get -ENOSPC 
(usually, sometimes you might get other odd results) in the 
userspace application that triggered the allocation.


It seems that the available space reported by the system df 
command corresponds roughly to the size of the block device minus 
all the "used" space as reported by "btrfs fi df".


If I understand what you wrote correctly this means that when 
writing a huge file it may happen that the system df will report 
enough free space, but btrfs will raise ENOSPC.  However, it 
should be possible to keep writing small files even at this point 
(assuming that there's enough space for the metadata).  Or will 
btrfs split the huge file into small pieces to fit it into the 
fragmented free space in the chunks?


Such a situation should be avoided of course.  I'm asking out of 
curiosity.


* So scrubbing is not enough to check the health of a btrfs 
file system?  It’s also necessary to read all the files?


Scrubbing checks data integrity, but not the state of the 
data. IOW, you're checking that the data and metadata match 
with the checksums, but not necessarily that the filesystem 
itself is valid.


I see, but what should one then do to detect problems such as 
mine as soon as possible?  Periodically calculate hashes for 
all files? I’ve never seen a recommendation to do that for 
btrfs.


Scrub will verify that the data is the same as when the kernel 
calculated the block checksum.  That's really the best that can 
be done. In your case, it couldn't correct the errors because 
both copies of the corrupted blocks were bad (this points at an 
issue with either RAM or the storage controller BTW, not the 
disks themselves).  Had one of the copies been valid, it would 
have intelligently detected which one was bad and fixed things.


I think I understand the problem with the three corrupted blocks 
that I was able to fix by replacing the files.


But there is also the strange "Stale file handle" error with some 
other files that was not found by scrubbing, and also does not 
seem to appear in the output of "btrfs dev stats", which is BTW


[/dev/sda2].write_io_errs   0
[/dev/sda2].read_io_errs0
[/dev/sda2].flush_io_errs   0
[/dev/sda2].corruption_errs 3
[/dev/sda2].generation_errs 0
[/dev/sdb2].write_io_errs   0
[/dev/sdb2].read_io_errs0
[/dev/sdb2].flush_io_errs   0
[/dev/sdb2].corruption_errs 3
[/dev/sdb2].generation_errs 0

(The 2 times 3 corruption errors seem to be the uncorrectable 
errors that I could fix by replacing the files.)


To get the "stale file handle" error I need to try to read the 
affected file.  That's why I was wondering whether reading all the 
files periodically is indeed a useful maintenance procedure with 
btrfs.


"btrfs check" does find the problem, but it can be only run on an 
unmounted file system.


signature.asc
Description: PGP signature


Re: corruption: yet another one after deleting a ro snapshot

2017-01-17 Thread Qu Wenruo



At 01/17/2017 06:07 AM, Christoph Anton Mitterer wrote:

On Mon, 2017-01-16 at 13:47 +0800, Qu Wenruo wrote:

And I highly suspect if the subvolume 6403 is the RO snapshot you
just removed.


I guess there is no way to find out whether it was that snapshot,
is
there?


"btrfs subvolume list" could do it."

Well that was clear,... but I rather meant something that also shows me
the path of the deleted subvol.


Deleted subvol lost its ROOT_BACKREF, so there is no info where that 
subvolume used to be.



Anyway:
# btrfs subvolume list /data/data-a/3/
ID 6029 gen 2528 top level 5 path data
ID 6031 gen 3208 top level 5 path backups
ID 7285 gen 3409 top level 5 path 
snapshots/_external-fs/data-a1/data/2017-01-11_1

So since I only had two further snapshots in snapshots/_external-
fs/data-a1/data/ it must be the deleted one.

btw: data is empty, and backup contains actually some files (~25k,
~360GB)... these are not created via send/receive, but either via cp or
rsync.
And they are always in the same subvol (i.e. the backups svol isn't
deleted like the snaphots are)



Also checked the extent tree, the result is a little interesting:
1) Most tree backref are good.
In fact, 3 of all the 4 errors reported are tree blocks shared by
other subvolumes, like:

item 77 key (5120 METADATA_ITEM 1) itemoff 13070 itemsize 42
extent refs 2 gen 11 flags TREE_BLOCK|FULL_BACKREF
tree block skinny level 1
tree block backref root 7285
tree block backref root 6572

This means the tree blocks are share by 2 other subvolumes,
7285 and 6572.

7285 subvolume is completely OK, while 6572 is also undergoing
subvolume
deletion(while real deletion doesn't start yet).

Well there were in total three snapshots, the still existing:
snapshots/_external-fs/data-a1/data/2017-01-11_1
and two earlier ones,
one from around 2016-09-16_1 (= probably ID 6572?), one even a bit
earlier from 2016-08-19_1 (probably ID 6403?).
Each one was created with
send -p | receive, using the respectively earlier one as parent.
So it's
quite reasonable that they share the extents and also that it'sby 2
subvols.




And considering the generation, I assume 6403 is deleted before 6572.

Don't remember which one of the 2 subvols form 2016 I've deleted first,
the older or the more recent one... my bash history implies in this
order:
 4203  btrfs subvolume delete 2016-08-19_1
 4204  btrfs subvolume delete 2016-09-16_1



So we're almost clear that, btrfs (maybe only btrfsck) doesn't handle
it
well if there are multiple subvolume undergoing deletion.

This gives us enough info to try to build such image by ourselves
now.
(Although still quite hard to do though).

Well keep me informed if you actually find/fix something  :)



And for the scary lowmem mode, it's false alert.

I manually checked the used size of a block group and it's OK.

So you're going to fix this?


Yes, digging now.
The lowmem mode bug should be much easier to fix, compared to the lost 
backref false alert.






BTW, most of your block groups are completely used, without any
space.
But interestingly, mostly data extent size are just 512K, larger than
compressed extent upper limit, but still quite small.

Not sure if I understand this...



In other words, your fs seems to be fragmented considering the upper
limit of a data extent is 128M.
(Or your case is quite common in common world?)

No, I don't think I understand what you mean :D



So you are mostly OK to mount it rw any time you want, and I have
already downloaded the raw data.

Okay, I've remounted it now RW, called btrfs filesystem sync, and
waited until the HDD became silent and showed no further activity.

(again 3.9)

# btrfs check /dev/nbd0 ; echo $?
Checking filesystem on /dev/nbd0
UUID: 326d292d-f97b-43ca-b1e8-c722d3474719
checking extents
checking free space cache
checking fs roots
checking csums
checking root refs
found 7469206884352 bytes used err is 0
total csum bytes: 7281779252
total tree bytes: 10837262336
total fs tree bytes: 2011906048
total extent tree bytes: 1015349248
btree space waste bytes: 922444044
file data blocks allocated: 7458369622016
 referenced 7579485159424
0


Nice to see it.




=> as you can see, original mode pretends things would be fine now.


# btrfs check --mode=lowmem /dev/nbd0 ; echo $?
Checking filesystem on /dev/nbd0
UUID: 326d292d-f97b-43ca-b1e8-c722d3474719
checking extents
ERROR: block group[74117545984 1073741824] used 1073741824 but extent items 
used 0
ERROR: block group[239473786880 1073741824] used 1073741824 but extent items 
used 1207959552
ERROR: block group[500393050112 1073741824] used 1073741824 but extent items 
used 1207959552
ERROR: block group[581997428736 1073741824] used 1073741824 but extent items 
used 0
ERROR: block group[626557714432 1073741824] used 1073741824 but extent items 
used 0
ERROR: block group[668433645568 1073741824] used 1073741824 but extent items 
used 0
ERROR: block 

Re: Unocorrectable errors with RAID1

2017-01-17 Thread Christoph Groth

Goldwyn Rodrigues wrote:

Would you be able to upload a btrfs-image for me to 
examine. This is a core ctree error where most probably item 
size is incorrectly registered.


Sure, I can do that.  I'd like to use the -s option, will this be 
fine?  Is there some preferred place for the upload?  If not, I 
can use personal webspace.


signature.asc
Description: PGP signature


Re: [PATCH 8/8] Revert "ext4: fix wrong gfp type under transaction"

2017-01-17 Thread Michal Hocko
On Mon 16-01-17 21:56:07, Theodore Ts'o wrote:
> On Fri, Jan 06, 2017 at 03:11:07PM +0100, Michal Hocko wrote:
> > From: Michal Hocko 
> > 
> > This reverts commit 216553c4b7f3e3e2beb4981cddca9b2027523928. Now that
> > the transaction context uses memalloc_nofs_save and all allocations
> > within the this context inherit GFP_NOFS automatically, there is no
> > reason to mark specific allocations explicitly.
> > 
> > This patch should not introduce any functional change. The main point
> > of this change is to reduce explicit GFP_NOFS usage inside ext4 code
> > to make the review of the remaining usage easier.
> > 
> > Signed-off-by: Michal Hocko 
> > Reviewed-by: Jan Kara 
> 
> Changes in the jbd2 layer aren't going to guarantee that
> memalloc_nofs_save() will be executed if we are running ext4 without a
> journal (aka in no journal mode).  And this is a *very* common
> configuration; it's how ext4 is used inside Google in our production
> servers.

OK, I wasn't aware of that.

> So that means the earlier patches will probably need to be changed so
> the nOFS scope is done in the ext4_journal_{start,stop} functions in
> fs/ext4/ext4_jbd2.c.

I could definitely appreciated some help here. The call paths are rather
complex and I am not familiar with the code enough. On of the biggest
problem I have currently is that there doesn't seem to be an easy place
to store the old allocation context. The original patch had it inside
the journal handle. I was thinking about putting it into superblock but
ext4_journal_stop doesn't seem to have access to the sb if there is no
handle. Now, if ext4_journal_start is never called from a nested context
then this is not a big deal but there are just too many caller to
check...
-- 
Michal Hocko
SUSE Labs
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html