[PATCH v2 4/4] btrfs-progs: volumes: Use new raid5_gen_result to calculate raid5 parity

2016-09-29 Thread Qu Wenruo
Use thew raid5_gen_result() function to calculate raid5 parity.

Signed-off-by: Qu Wenruo 
---
v2:
  Patch split
---
 volumes.c | 19 ++-
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/volumes.c b/volumes.c
index 93ce934..e78d157 100644
--- a/volumes.c
+++ b/volumes.c
@@ -2112,7 +2112,6 @@ int write_raid56_with_parity(struct btrfs_fs_info *info,
 {
struct extent_buffer **ebs, *p_eb = NULL, *q_eb = NULL;
int i;
-   int j;
int ret;
int alloc_size = eb->len;
void **pointers;
@@ -2167,18 +2166,12 @@ int write_raid56_with_parity(struct btrfs_fs_info *info,
raid6_gen_syndrome(multi->num_stripes, stripe_len, pointers);
} else {
ebs[multi->num_stripes - 1] = p_eb;
-   memcpy(p_eb->data, ebs[0]->data, stripe_len);
-   for (j = 1; j < multi->num_stripes - 1; j++) {
-   for (i = 0; i < stripe_len; i += sizeof(u64)) {
-   u64 p_eb_data;
-   u64 ebs_data;
-
-   p_eb_data = get_unaligned_64(p_eb->data + i);
-   ebs_data = get_unaligned_64(ebs[j]->data + i);
-   p_eb_data ^= ebs_data;
-   put_unaligned_64(p_eb_data, p_eb->data + i);
-   }
-   }
+   for (i = 0; i < multi->num_stripes; i++)
+   pointers[i] = ebs[i]->data;
+   ret = raid5_gen_result(multi->num_stripes, stripe_len,
+  multi->num_stripes - 1, pointers);
+   if (ret < 0)
+   goto out_free_split;
}
 
for (i = 0; i < multi->num_stripes; i++) {
-- 
2.10.0



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 3/4] btrfs-progs: raid56: Introduce new function to calculate raid5 parity or data stripe

2016-09-29 Thread Qu Wenruo
Introduce new function raid5_gen_result() to calculate parity or data
stripe.

Signed-off-by: Qu Wenruo 
---
v2:
  Patch split
  Change parameter type from void* to char* for xor_range()
  Change parameter order for xxor_range()
  Handle unaligned memory address for xor_range()
---
 disk-io.h |  1 +
 raid56.c  | 63 +++
 2 files changed, 64 insertions(+)

diff --git a/disk-io.h b/disk-io.h
index 440baa9..9fc7e92 100644
--- a/disk-io.h
+++ b/disk-io.h
@@ -192,5 +192,6 @@ int write_and_map_eb(struct btrfs_trans_handle *trans, 
struct btrfs_root *root,
 
 /* raid56.c */
 void raid6_gen_syndrome(int disks, size_t bytes, void **ptrs);
+int raid5_gen_result(int nr_devs, size_t stripe_len, int dest, void **data);
 
 #endif
diff --git a/raid56.c b/raid56.c
index 833df5f..8c79c45 100644
--- a/raid56.c
+++ b/raid56.c
@@ -26,6 +26,8 @@
 #include "kerncompat.h"
 #include "ctree.h"
 #include "disk-io.h"
+#include "volumes.h"
+#include "utils.h"
 
 /*
  * This is the C data type to use
@@ -107,3 +109,64 @@ void raid6_gen_syndrome(int disks, size_t bytes, void 
**ptrs)
}
 }
 
+static void xor_range(char *dst, const char*src, size_t size)
+{
+   /* Move to DWORD aligned */
+   while (size && ((unsigned long)dst & sizeof(unsigned long))) {
+   *dst++ ^= *src++;
+   size--;
+   }
+
+   /* DWORD aligned part */
+   while (size >= sizeof(unsigned long)) {
+   *(unsigned long *)dst ^= *(unsigned long *)src;
+   src += sizeof(unsigned long);
+   dst += sizeof(unsigned long);
+   size -= sizeof(unsigned long);
+   }
+   /* Remaining */
+   while (size) {
+   *dst++ ^= *src++;
+   size--;
+   }
+}
+
+/*
+ * Generate desired data/parity stripe for RAID5
+ *
+ * @nr_devs:   Total number of devices, including parity
+ * @stripe_len:Stripe length
+ * @data:  Data, with special layout:
+ * data[0]: Data stripe 0
+ * data[nr_devs-2]: Last data stripe
+ * data[nr_devs-1]: RAID5 parity
+ * @dest:  To generate which data. should follow above data layout
+ */
+int raid5_gen_result(int nr_devs, size_t stripe_len, int dest, void **data)
+{
+   int i;
+   char *buf = data[dest];
+
+   /* Validation check */
+   if (stripe_len <= 0 || stripe_len != BTRFS_STRIPE_LEN) {
+   error("invalid parameter for %s", __func__);
+   return -EINVAL;
+   }
+
+   if (dest >= nr_devs || nr_devs < 2) {
+   error("invalid parameter for %s", __func__);
+   return -EINVAL;
+   }
+   /* Shortcut for 2 devs RAID5, which is just RAID1 */
+   if (nr_devs == 2) {
+   memcpy(data[dest], data[1 - dest], stripe_len);
+   return 0;
+   }
+   memset(buf, 0, stripe_len);
+   for (i = 0; i < nr_devs; i++) {
+   if (i == dest)
+   continue;
+   xor_range(buf, data[i], stripe_len);
+   }
+   return 0;
+}
-- 
2.10.0



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 1/4] btrfs-progs: rename raid6.c to raid56.c

2016-09-29 Thread Qu Wenruo
This allows us to put raid5 codes into that file other than creating a
new raid5.c.

Signed-off-by: Qu Wenruo 
---
v2:
  Split patch
---
 Makefile.in | 2 +-
 disk-io.h   | 2 +-
 raid6.c => raid56.c | 0
 3 files changed, 2 insertions(+), 2 deletions(-)
 rename raid6.c => raid56.c (100%)

diff --git a/Makefile.in b/Makefile.in
index 20b740a..0ae2cd5 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -90,7 +90,7 @@ CHECKER_FLAGS := -include $(check_defs) -D__CHECKER__ \
 objects = ctree.o disk-io.o kernel-lib/radix-tree.o extent-tree.o print-tree.o 
\
  root-tree.o dir-item.o file-item.o inode-item.o inode-map.o \
  extent-cache.o extent_io.o volumes.o utils.o repair.o \
- qgroup.o raid6.o free-space-cache.o kernel-lib/list_sort.o props.o \
+ qgroup.o raid56.o free-space-cache.o kernel-lib/list_sort.o props.o \
  ulist.o qgroup-verify.o backref.o string-table.o task-utils.o \
  inode.o file.o find-root.o free-space-tree.o help.o
 cmds_objects = cmds-subvolume.o cmds-filesystem.o cmds-device.o cmds-scrub.o \
diff --git a/disk-io.h b/disk-io.h
index 1080fc1..440baa9 100644
--- a/disk-io.h
+++ b/disk-io.h
@@ -190,7 +190,7 @@ int write_tree_block(struct btrfs_trans_handle *trans,
 int write_and_map_eb(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 struct extent_buffer *eb);
 
-/* raid6.c */
+/* raid56.c */
 void raid6_gen_syndrome(int disks, size_t bytes, void **ptrs);
 
 #endif
diff --git a/raid6.c b/raid56.c
similarity index 100%
rename from raid6.c
rename to raid56.c
-- 
2.10.0



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 2/4] btrfs-progs: volumes: Remove BUG_ON in raid56 write routine

2016-09-29 Thread Qu Wenruo
Remove various BUG_ON in raid56 write routine, including:
1) Memory allocation error
2) Write error
3) Validation check

Signed-off-by: Qu Wenruo 
---
v2:
  Split patch
  Remove all BUG_ON() in raid56 write routine.
---
 volumes.c | 84 +--
 1 file changed, 49 insertions(+), 35 deletions(-)

diff --git a/volumes.c b/volumes.c
index da79751..93ce934 100644
--- a/volumes.c
+++ b/volumes.c
@@ -2061,8 +2061,8 @@ static int rmw_eb(struct btrfs_fs_info *info,
return 0;
 }
 
-static void split_eb_for_raid56(struct btrfs_fs_info *info,
-   struct extent_buffer *orig_eb,
+static int split_eb_for_raid56(struct btrfs_fs_info *info,
+  struct extent_buffer *orig_eb,
   struct extent_buffer **ebs,
   u64 stripe_len, u64 *raid_map,
   int num_stripes)
@@ -2071,34 +2071,38 @@ static void split_eb_for_raid56(struct btrfs_fs_info 
*info,
u64 start = orig_eb->start;
u64 this_eb_start;
int i;
-   int ret;
+   int ret = 0;
 
+   eb = calloc(num_stripes, sizeof(*eb) + stripe_len);
+   if (!eb)
+   return -ENOMEM;
for (i = 0; i < num_stripes; i++) {
if (raid_map[i] >= BTRFS_RAID5_P_STRIPE)
break;
 
-   eb = calloc(1, sizeof(struct extent_buffer) + stripe_len);
-   if (!eb)
-   BUG();
-
-   eb->start = raid_map[i];
-   eb->len = stripe_len;
-   eb->refs = 1;
-   eb->flags = 0;
-   eb->fd = -1;
-   eb->dev_bytenr = (u64)-1;
+   eb[i].start = raid_map[i];
+   eb[i].len = stripe_len;
+   eb[i].refs = 1;
+   eb[i].flags = 0;
+   eb[i].fd = -1;
+   eb[i].dev_bytenr = (u64)-1;
 
this_eb_start = raid_map[i];
 
if (start > this_eb_start ||
start + orig_eb->len < this_eb_start + stripe_len) {
ret = rmw_eb(info, eb, orig_eb);
-   BUG_ON(ret);
+   if (ret)
+   break;
} else {
-   memcpy(eb->data, orig_eb->data + eb->start - start, 
stripe_len);
+   memcpy(eb->data, orig_eb->data + eb->start - start,
+  stripe_len);
}
-   ebs[i] = eb;
+   ebs[i] = [i];
}
+   if (ret)
+   free(eb);
+   return ret;
 }
 
 int write_raid56_with_parity(struct btrfs_fs_info *info,
@@ -2111,15 +2115,20 @@ int write_raid56_with_parity(struct btrfs_fs_info *info,
int j;
int ret;
int alloc_size = eb->len;
+   void **pointers;
 
-   ebs = kmalloc(sizeof(*ebs) * multi->num_stripes, GFP_NOFS);
-   BUG_ON(!ebs);
+   ebs = malloc(sizeof(*ebs) * multi->num_stripes);
+   pointers = malloc(sizeof(*pointers) * multi->num_stripes);
+   if (!ebs || !pointers)
+   return -ENOMEM;
 
if (stripe_len > alloc_size)
alloc_size = stripe_len;
 
-   split_eb_for_raid56(info, eb, ebs, stripe_len, raid_map,
-   multi->num_stripes);
+   ret = split_eb_for_raid56(info, eb, ebs, stripe_len, raid_map,
+ multi->num_stripes);
+   if (ret)
+   goto out;
 
for (i = 0; i < multi->num_stripes; i++) {
struct extent_buffer *new_eb;
@@ -2127,11 +2136,17 @@ int write_raid56_with_parity(struct btrfs_fs_info *info,
ebs[i]->dev_bytenr = multi->stripes[i].physical;
ebs[i]->fd = multi->stripes[i].dev->fd;
multi->stripes[i].dev->total_ios++;
-   BUG_ON(ebs[i]->start != raid_map[i]);
+   if (ebs[i]->start != raid_map[i]) {
+   ret = -EINVAL;
+   goto out_free_split;
+   }
continue;
}
-   new_eb = kmalloc(sizeof(*eb) + alloc_size, GFP_NOFS);
-   BUG_ON(!new_eb);
+   new_eb = malloc(sizeof(*eb) + alloc_size);
+   if (!new_eb) {
+   ret = -ENOMEM;
+   goto out_free_split;
+   }
new_eb->dev_bytenr = multi->stripes[i].physical;
new_eb->fd = multi->stripes[i].dev->fd;
multi->stripes[i].dev->total_ios++;
@@ -2143,12 +2158,6 @@ int write_raid56_with_parity(struct btrfs_fs_info *info,
q_eb = new_eb;
}
if (q_eb) {
-   void **pointers;
-
-   pointers = kmalloc(sizeof(*pointers) * multi->num_stripes,
-

Re: [PATCH] qgroup: Prevent qgroup->reserved from going subzero

2016-09-29 Thread Qu Wenruo



At 09/30/2016 10:18 AM, Qu Wenruo wrote:

Your original fix is good.

Feel free to my tag after enriching the comment in btrfs_invalidatepage().

Feel free to "add" my tag...


Reviewed-by: Qu Wenruo 

Thanks again for your founding and sorry for the extra time reviewing.

for your "finding"...

My English just sucks


Thanks
Qu


At 09/29/2016 07:13 PM, Goldwyn Rodrigues wrote:



On 09/29/2016 03:57 AM, Qu Wenruo wrote:

Thanks for your test script.

I succeeded in pinning down the problem.

The problem is, btrfs is invalidate pages that belongs to ordered
extent(goes through writeback)



No, I don't think invalidated pages are going through writeback. The
problem is that the space for extent allocation is done before the
writeback and if pages are invalidated before writeback, it is not
accounted for.



The ftrace(with more tracepoints to trace qgroup->reserved change)
shows:
--
btrfs_qgroup_release_data: root=257, ino=257, start=0, len=4096,
reserved=4096, op=free
qgroup_update_reserve: qgid = 257, cur_reserved = 16171008, diff = -4096
btrfs_qgroup_release_data: root=257, ino=257, start=4096, len=4096,
reserved=4096, op=free
qgroup_update_reserve: qgid = 257, cur_reserved = 16166912, diff = -4096
btrfs_qgroup_release_data: root=257, ino=257, start=8192, len=4096,
reserved=4096, op=free
qgroup_update_reserve: qgid = 257, cur_reserved = 16162816, diff = -4096
btrfs_qgroup_release_data: root=257, ino=257, start=12288, len=4096,
reserved=4096, op=free
qgroup_update_reserve: qgid = 257, cur_reserved = 16158720, diff = -4096
btrfs_qgroup_release_data: root=257, ino=257, start=16384, len=4096,
reserved=4096, op=free
qgroup_update_reserve: qgid = 257, cur_reserved = 16154624, diff = -4096


This is a little confusing. There is no qgroup_update_reserve() function
in the source. Where did you add this?




^ Btrfs invalidates 5 pages of ino 257 qg 257, range 0 ~ 20K.

btrfs_qgroup_release_data: root=257, ino=257, start=0, len=5242880,
reserved=5222400, op=release

^ Here ordered_io finishes, write the whole 5M data, while
QGROUP_RESERVED only returned 5M - 20K.
--

The problem is at btrfs_add_delayed_data_ref(), we use the assumption
that, qgroup reserved space is the same as extent size(5M)
But in fact, btrfs_qgroup_release_data() only release (5M - 20K).
Leaking the 20K.


Yes, this is more like it. However, I would put it the other way. It
releases 5M when it should have released 5M-20K, because 20k was
released when invalidating page.



Calltrace:
btrfs_finish_ordered_io()
|- inserted_reserved_file_extent()
   |- btrfs_alloc_reserved_file_extent()
   |  ^^ Here we tell delayed_ref to free 5M later
   |- btrfs_qgroup_release_data()
  ^^ We only released (5M - 20K), as the first 5 pages
 are freed by btrfs_invalidatepage()

If btrfs is designed to freeing pages which belongs to ordered_extent,
then it totally breaks the qgroup assumption.

Then we have several ways to fix it:
1) Fix race between ordered extent(writeback) with invalidatepage()
   Make btrfs_invalidatepage() skips pages that are already in
   ordered_extent, then we're OK.

   This is much like your original fix, but I'm not sure if DIRTY page
   flag is bond to ordered_extent.
   And more comment will help.



So, what you mean is that fix is correct, I just need to reword the
patch header.



2) Make btrfs_qgroup_release_data() return accurate num bytes
   And then pass it to delayed_ref head.

   This is quite anti-intuition. If we're adding a new extent, how
   could it happen that some pages are already invalidated?



I agree. It is counter-intutive and will increase complexity.


Anyway, awesome work, exposed a quite strange race and makes us re-think
the base of qgroup reserve space framework.


Thanks.




--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] qgroup: Prevent qgroup->reserved from going subzero

2016-09-29 Thread Qu Wenruo

Your original fix is good.

Feel free to my tag after enriching the comment in btrfs_invalidatepage().

Reviewed-by: Qu Wenruo 

Thanks again for your founding and sorry for the extra time reviewing.

Thanks
Qu


At 09/29/2016 07:13 PM, Goldwyn Rodrigues wrote:



On 09/29/2016 03:57 AM, Qu Wenruo wrote:

Thanks for your test script.

I succeeded in pinning down the problem.

The problem is, btrfs is invalidate pages that belongs to ordered
extent(goes through writeback)



No, I don't think invalidated pages are going through writeback. The
problem is that the space for extent allocation is done before the
writeback and if pages are invalidated before writeback, it is not
accounted for.



The ftrace(with more tracepoints to trace qgroup->reserved change) shows:
--
btrfs_qgroup_release_data: root=257, ino=257, start=0, len=4096,
reserved=4096, op=free
qgroup_update_reserve: qgid = 257, cur_reserved = 16171008, diff = -4096
btrfs_qgroup_release_data: root=257, ino=257, start=4096, len=4096,
reserved=4096, op=free
qgroup_update_reserve: qgid = 257, cur_reserved = 16166912, diff = -4096
btrfs_qgroup_release_data: root=257, ino=257, start=8192, len=4096,
reserved=4096, op=free
qgroup_update_reserve: qgid = 257, cur_reserved = 16162816, diff = -4096
btrfs_qgroup_release_data: root=257, ino=257, start=12288, len=4096,
reserved=4096, op=free
qgroup_update_reserve: qgid = 257, cur_reserved = 16158720, diff = -4096
btrfs_qgroup_release_data: root=257, ino=257, start=16384, len=4096,
reserved=4096, op=free
qgroup_update_reserve: qgid = 257, cur_reserved = 16154624, diff = -4096


This is a little confusing. There is no qgroup_update_reserve() function
in the source. Where did you add this?




^ Btrfs invalidates 5 pages of ino 257 qg 257, range 0 ~ 20K.

btrfs_qgroup_release_data: root=257, ino=257, start=0, len=5242880,
reserved=5222400, op=release

^ Here ordered_io finishes, write the whole 5M data, while
QGROUP_RESERVED only returned 5M - 20K.
--

The problem is at btrfs_add_delayed_data_ref(), we use the assumption
that, qgroup reserved space is the same as extent size(5M)
But in fact, btrfs_qgroup_release_data() only release (5M - 20K).
Leaking the 20K.


Yes, this is more like it. However, I would put it the other way. It
releases 5M when it should have released 5M-20K, because 20k was
released when invalidating page.



Calltrace:
btrfs_finish_ordered_io()
|- inserted_reserved_file_extent()
   |- btrfs_alloc_reserved_file_extent()
   |  ^^ Here we tell delayed_ref to free 5M later
   |- btrfs_qgroup_release_data()
  ^^ We only released (5M - 20K), as the first 5 pages
 are freed by btrfs_invalidatepage()

If btrfs is designed to freeing pages which belongs to ordered_extent,
then it totally breaks the qgroup assumption.

Then we have several ways to fix it:
1) Fix race between ordered extent(writeback) with invalidatepage()
   Make btrfs_invalidatepage() skips pages that are already in
   ordered_extent, then we're OK.

   This is much like your original fix, but I'm not sure if DIRTY page
   flag is bond to ordered_extent.
   And more comment will help.



So, what you mean is that fix is correct, I just need to reword the
patch header.



2) Make btrfs_qgroup_release_data() return accurate num bytes
   And then pass it to delayed_ref head.

   This is quite anti-intuition. If we're adding a new extent, how
   could it happen that some pages are already invalidated?



I agree. It is counter-intutive and will increase complexity.


Anyway, awesome work, exposed a quite strange race and makes us re-think
the base of qgroup reserve space framework.


Thanks.




--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] qgroup: Prevent qgroup->reserved from going subzero

2016-09-29 Thread Qu Wenruo



At 09/29/2016 07:13 PM, Goldwyn Rodrigues wrote:



On 09/29/2016 03:57 AM, Qu Wenruo wrote:

Thanks for your test script.

I succeeded in pinning down the problem.

The problem is, btrfs is invalidate pages that belongs to ordered
extent(goes through writeback)



No, I don't think invalidated pages are going through writeback. The
problem is that the space for extent allocation is done before the
writeback and if pages are invalidated before writeback, it is not
accounted for.



The ftrace(with more tracepoints to trace qgroup->reserved change) shows:
--
btrfs_qgroup_release_data: root=257, ino=257, start=0, len=4096,
reserved=4096, op=free
qgroup_update_reserve: qgid = 257, cur_reserved = 16171008, diff = -4096
btrfs_qgroup_release_data: root=257, ino=257, start=4096, len=4096,
reserved=4096, op=free
qgroup_update_reserve: qgid = 257, cur_reserved = 16166912, diff = -4096
btrfs_qgroup_release_data: root=257, ino=257, start=8192, len=4096,
reserved=4096, op=free
qgroup_update_reserve: qgid = 257, cur_reserved = 16162816, diff = -4096
btrfs_qgroup_release_data: root=257, ino=257, start=12288, len=4096,
reserved=4096, op=free
qgroup_update_reserve: qgid = 257, cur_reserved = 16158720, diff = -4096
btrfs_qgroup_release_data: root=257, ino=257, start=16384, len=4096,
reserved=4096, op=free
qgroup_update_reserve: qgid = 257, cur_reserved = 16154624, diff = -4096


This is a little confusing. There is no qgroup_update_reserve() function
in the source. Where did you add this?


Sorry, I just submitted them few minutes ago.

Have CCed you.






^ Btrfs invalidates 5 pages of ino 257 qg 257, range 0 ~ 20K.

btrfs_qgroup_release_data: root=257, ino=257, start=0, len=5242880,
reserved=5222400, op=release

^ Here ordered_io finishes, write the whole 5M data, while
QGROUP_RESERVED only returned 5M - 20K.
--

The problem is at btrfs_add_delayed_data_ref(), we use the assumption
that, qgroup reserved space is the same as extent size(5M)
But in fact, btrfs_qgroup_release_data() only release (5M - 20K).
Leaking the 20K.


Yes, this is more like it. However, I would put it the other way. It
releases 5M when it should have released 5M-20K, because 20k was
released when invalidating page.



Calltrace:
btrfs_finish_ordered_io()
|- inserted_reserved_file_extent()
   |- btrfs_alloc_reserved_file_extent()
   |  ^^ Here we tell delayed_ref to free 5M later
   |- btrfs_qgroup_release_data()
  ^^ We only released (5M - 20K), as the first 5 pages
 are freed by btrfs_invalidatepage()

If btrfs is designed to freeing pages which belongs to ordered_extent,
then it totally breaks the qgroup assumption.

Then we have several ways to fix it:
1) Fix race between ordered extent(writeback) with invalidatepage()
   Make btrfs_invalidatepage() skips pages that are already in
   ordered_extent, then we're OK.

   This is much like your original fix, but I'm not sure if DIRTY page
   flag is bond to ordered_extent.
   And more comment will help.



So, what you mean is that fix is correct, I just need to reword the
patch header.


Not patch header, but comment before PageDirty() check.

I'm still digging the code to see if page DIRTY check is safe and if 
there is more elegant fix.


Thanks,
Qu





2) Make btrfs_qgroup_release_data() return accurate num bytes
   And then pass it to delayed_ref head.

   This is quite anti-intuition. If we're adding a new extent, how
   could it happen that some pages are already invalidated?



I agree. It is counter-intutive and will increase complexity.


Anyway, awesome work, exposed a quite strange race and makes us re-think
the base of qgroup reserve space framework.


Thanks.




--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/2] btrfs: Add trace point for qgroup reserved space

2016-09-29 Thread Qu Wenruo
Introduce the following trace points:
qgroup_update_reserve
qgroup_meta_reserve

And modify the timing of btrfs_qgroup_free_delayed_ref() and
btrfs_qgroup_release_data() events, to work with qgroup_update_reserve()
event.

Signed-off-by: Qu Wenruo 
---
 fs/btrfs/qgroup.c| 21 ++---
 fs/btrfs/qgroup.h|  2 +-
 include/trace/events/btrfs.h | 43 +++
 3 files changed, 62 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 8532587..593a519 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1062,6 +1062,8 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info 
*fs_info,
qgroup->excl += sign * num_bytes;
qgroup->excl_cmpr += sign * num_bytes;
if (sign > 0) {
+   trace_qgroup_update_reserve(fs_info, qgroup->qgroupid,
+   qgroup->reserved, (s64)-num_bytes);
if (WARN_ON(qgroup->reserved < num_bytes))
qgroup->reserved = 0;
else
@@ -1087,6 +1089,9 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info 
*fs_info,
WARN_ON(sign < 0 && qgroup->excl < num_bytes);
qgroup->excl += sign * num_bytes;
if (sign > 0) {
+   trace_qgroup_update_reserve(fs_info, qgroup->qgroupid,
+   qgroup->reserved,
+   (s64)-num_bytes);
if (WARN_ON(qgroup->reserved < num_bytes))
qgroup->reserved = 0;
else
@@ -2164,6 +2169,8 @@ static int qgroup_reserve(struct btrfs_root *root, u64 
num_bytes)
 
qg = u64_to_ptr(unode->aux);
 
+   trace_qgroup_update_reserve(fs_info, qg->qgroupid,
+   qg->reserved, num_bytes);
qg->reserved += num_bytes;
}
 
@@ -2209,6 +2216,8 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info 
*fs_info,
 
qg = u64_to_ptr(unode->aux);
 
+   trace_qgroup_update_reserve(fs_info, qg->qgroupid,
+   qg->reserved, (s64)-num_bytes);
if (WARN_ON(qgroup->reserved < num_bytes))
qgroup->reserved = 0;
else
@@ -2637,12 +2646,12 @@ static int __btrfs_qgroup_release_data(struct inode 
*inode, u64 start, u64 len,
if (ret < 0)
goto out;
 
-   if (free) {
-   qgroup_free(BTRFS_I(inode)->root, changeset.bytes_changed);
+   if (free)
trace_op = QGROUP_FREE;
-   }
trace_btrfs_qgroup_release_data(inode, start, len,
changeset.bytes_changed, trace_op);
+   if (free)
+   qgroup_free(BTRFS_I(inode)->root, changeset.bytes_changed);
 out:
ulist_free(changeset.range_changed);
return ret;
@@ -2692,6 +2701,8 @@ int btrfs_qgroup_reserve_meta(struct btrfs_root *root, 
int num_bytes)
return 0;
 
BUG_ON(num_bytes != round_down(num_bytes, root->nodesize));
+   trace_qgroup_meta_reserve(root->fs_info, root->objectid,
+ (s64)num_bytes);
ret = qgroup_reserve(root, num_bytes);
if (ret < 0)
return ret;
@@ -2709,6 +2720,8 @@ void btrfs_qgroup_free_meta_all(struct btrfs_root *root)
reserved = atomic_xchg(>qgroup_meta_rsv, 0);
if (reserved == 0)
return;
+   trace_qgroup_meta_reserve(root->fs_info, root->objectid,
+ (s64)-reserved);
qgroup_free(root, reserved);
 }
 
@@ -2720,6 +2733,8 @@ void btrfs_qgroup_free_meta(struct btrfs_root *root, int 
num_bytes)
BUG_ON(num_bytes != round_down(num_bytes, root->nodesize));
WARN_ON(atomic_read(>qgroup_meta_rsv) < num_bytes);
atomic_sub(num_bytes, >qgroup_meta_rsv);
+   trace_qgroup_meta_reserve(root->fs_info, root->objectid,
+ (s64)-num_bytes);
qgroup_free(root, num_bytes);
 }
 
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 1bc64c8..6b6756c 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -114,8 +114,8 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info 
*fs_info,
 static inline void btrfs_qgroup_free_delayed_ref(struct btrfs_fs_info *fs_info,
 u64 ref_root, u64 num_bytes)
 {
-   btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes);
trace_btrfs_qgroup_free_delayed_ref(fs_info, ref_root, num_bytes);
+   btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes);
 }
 void assert_qgroups_uptodate(struct btrfs_trans_handle *trans);
 
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index e030d6f..fb3cb6c 100644
--- 

[PATCH 1/2] btrfs: Add WARN_ON for qgroup reserved underflow

2016-09-29 Thread Qu Wenruo
While the reason why qgroup reserved space may underflow is still under
investigation, such WARN_ON will help us to expose the bug more easily,
and for end-user we can detect and avoid underflow.

Signed-off-by: Qu Wenruo 
---
 fs/btrfs/qgroup.c | 21 -
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 8db2e29..8532587 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1061,8 +1061,12 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info 
*fs_info,
WARN_ON(sign < 0 && qgroup->excl < num_bytes);
qgroup->excl += sign * num_bytes;
qgroup->excl_cmpr += sign * num_bytes;
-   if (sign > 0)
-   qgroup->reserved -= num_bytes;
+   if (sign > 0) {
+   if (WARN_ON(qgroup->reserved < num_bytes))
+   qgroup->reserved = 0;
+   else
+   qgroup->reserved -= num_bytes;
+   }
 
qgroup_dirty(fs_info, qgroup);
 
@@ -1082,8 +1086,12 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info 
*fs_info,
qgroup->rfer_cmpr += sign * num_bytes;
WARN_ON(sign < 0 && qgroup->excl < num_bytes);
qgroup->excl += sign * num_bytes;
-   if (sign > 0)
-   qgroup->reserved -= num_bytes;
+   if (sign > 0) {
+   if (WARN_ON(qgroup->reserved < num_bytes))
+   qgroup->reserved = 0;
+   else
+   qgroup->reserved -= num_bytes;
+   }
qgroup->excl_cmpr += sign * num_bytes;
qgroup_dirty(fs_info, qgroup);
 
@@ -2201,7 +2209,10 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info 
*fs_info,
 
qg = u64_to_ptr(unode->aux);
 
-   qg->reserved -= num_bytes;
+   if (WARN_ON(qgroup->reserved < num_bytes))
+   qgroup->reserved = 0;
+   else
+   qgroup->reserved -= num_bytes;
 
list_for_each_entry(glist, >groups, next_group) {
ret = ulist_add(fs_info->qgroup_ulist,
-- 
2.10.0



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC][PATCH] btrfs-progs: move sources shared with kernel to own directory

2016-09-29 Thread Qu Wenruo



At 09/30/2016 12:38 AM, David Sterba wrote:

Hi,

I've started to reorganize the souces in progs so we can split the files and
group them. At the moment there's the kernel library (crc, list, radix, ...) as
the files are almost exactly the same as provided by kernel.

This patch moves btrfs-specific sources to own directory. As this is a bigger
change I could use some feedback if there are better alternatives (*). The
makefile is still global, object files are put next to the sources.


+1 for global makefile. We don't need to bother several makefile, that's 
nice.




To current state and plans to continue the split:

- kernel-lib/ - as described in the first paragraph


Already using devel branch. Seems good to me.


- kernel-shared/  - dtto, second
- cmds/   - (planned) cmds-*.[ch]


Sounds good, so we can skip the "cmds-" prefix then.


- convert/- move as-is, split later, eg. per filsystem
- image/  - move as-is, split if needed
- check/  - split from cmds-check


Pretty cool.
I'm also moving some code into its own dir doing the user-space scrub work.
(While I'm creating a new dir called fsck/ and put my scrub code into 
it, rename it to check/ won't be a problem though)


While I'm not sure what is going to be in kernel-shared/ dir.

Uapi headers? or things like disk-io.c volumes.c?

For uapi headers, then kernel-shared/ may contain few files.
For disk-io.c/volumes.c, they are not shared with kernel anymore.

Thanks,
Qu



There will be space for code shared potentially by any of the above, that
should remain in the toplevel directory, we'll see later how to organize.  The
remaining standalone tools shall remain in the toplevel directory.

The target for kernel-shared split is 4.9, 4.8 will have the kernel-lib split.

Preview can be found in my devel git repos in branch preview/move-shared .

Thanks for feedback.

(*) no automake please

-->8---8<--
From: David Sterba 

All files that share content and purpose with the corresponding file in
the kernel are moved to a separate directory. There's only some overlap,
can't be 100% match but many functions are shared. A diff between the
files can reveal changes that should be applied to the other codebase.

Signed-off-by: David Sterba 
---
 Makefile.in| 18 +++---
 backref.c => kernel-shared/backref.c   |  0
 backref.h => kernel-shared/backref.h   |  0
 ctree.c => kernel-shared/ctree.c   |  0
 ctree.h => kernel-shared/ctree.h   |  0
 dir-item.c => kernel-shared/dir-item.c |  0
 disk-io.c => kernel-shared/disk-io.c   |  0
 disk-io.h => kernel-shared/disk-io.h   |  0
 extent-tree.c => kernel-shared/extent-tree.c   |  0
 extent_io.c => kernel-shared/extent_io.c   |  0
 extent_io.h => kernel-shared/extent_io.h   |  0
 file-item.c => kernel-shared/file-item.c   |  0
 file.c => kernel-shared/file.c |  0
 free-space-cache.c => kernel-shared/free-space-cache.c |  0
 free-space-cache.h => kernel-shared/free-space-cache.h |  0
 free-space-tree.c => kernel-shared/free-space-tree.c   |  0
 free-space-tree.h => kernel-shared/free-space-tree.h   |  0
 inode-item.c => kernel-shared/inode-item.c |  0
 inode-map.c => kernel-shared/inode-map.c   |  0
 inode.c => kernel-shared/inode.c   |  0
 print-tree.c => kernel-shared/print-tree.c |  0
 print-tree.h => kernel-shared/print-tree.h |  0
 root-tree.c => kernel-shared/root-tree.c   |  0
 ulist.c => kernel-shared/ulist.c   |  0
 ulist.h => kernel-shared/ulist.h   |  0
 volumes.c => kernel-shared/volumes.c   |  0
 volumes.h => kernel-shared/volumes.h   |  0
 27 files changed, 11 insertions(+), 7 deletions(-)
 rename backref.c => kernel-shared/backref.c (100%)
 rename backref.h => kernel-shared/backref.h (100%)
 rename ctree.c => kernel-shared/ctree.c (100%)
 rename ctree.h => kernel-shared/ctree.h (100%)
 rename dir-item.c => kernel-shared/dir-item.c (100%)
 rename disk-io.c => kernel-shared/disk-io.c (100%)
 rename disk-io.h => kernel-shared/disk-io.h (100%)
 rename extent-tree.c => kernel-shared/extent-tree.c (100%)
 rename extent_io.c => kernel-shared/extent_io.c (100%)
 rename extent_io.h => kernel-shared/extent_io.h (100%)
 rename file-item.c => kernel-shared/file-item.c (100%)
 rename file.c => kernel-shared/file.c (100%)
 rename free-space-cache.c => kernel-shared/free-space-cache.c (100%)
 rename free-space-cache.h => kernel-shared/free-space-cache.h (100%)
 rename free-space-tree.c => kernel-shared/free-space-tree.c (100%)
 rename free-space-tree.h => kernel-shared/free-space-tree.h (100%)
 rename inode-item.c => kernel-shared/inode-item.c (100%)
 

Re: [PATCH 1/2] btrfs-progs: raid56: Add support for raid5 to calculate any stripe

2016-09-29 Thread Qu Wenruo



At 09/30/2016 01:37 AM, David Sterba wrote:

On Wed, Sep 28, 2016 at 04:30:03PM +0800, Qu Wenruo wrote:

Add a new function raid5_gen_result() to calculate raid5 parity or
recover data stripe.

Since now that raid6.c handles both raid5 and raid6, rename it to
raid56.c.


Please split changes in this patch to the following:
- rename the file
- error handling of memory allocation failures
- the actual fix

A test would be very velcome, for all the cases the code handles, 2
devices, and more. But I'm not sure if we have support for that in the
testing suite.


Makes sense.

I'll split first and try if I can create some test cases for RAID5/6 codes.

But the later work may be delayed since I'm working on user-space scrub.
(Which will lead to kernel scrub fix).

Thanks,
Qu




@@ -107,3 +108,47 @@ void raid6_gen_syndrome(int disks, size_t bytes, void 
**ptrs)
}
 }

+static void xor_range(void *src, void *dst, size_t size)
+{
+   while (size) {
+   *(unsigned long *) dst ^= *(unsigned long *) src;


This could lead to unaligned access, please update the types and
possibly add some sanity checks (alignemnt, length).


+   src += sizeof(unsigned long);
+   dst += sizeof(unsigned long);
+   size -= sizeof(unsigned long);
+   }
+}
+
+/*
+ * Generate desired data/parity for RAID5
+ *
+ * @nr_devs:   Total number of devices, including parity
+ * @stripe_len:Stripe length
+ * @data:  Data, with special layout:
+ * data[0]: Data stripe 0
+ * data[nr_devs-2]: Last data stripe
+ * data[nr_devs-1]: RAID5 parity
+ * @dest:  To generate which data. should follow above data layout
+ */
+int raid5_gen_result(int nr_devs, size_t stripe_len, int dest, void **data)
+{
+   int i;
+   char *buf = data[dest];
+
+   if (dest >= nr_devs || nr_devs < 2) {
+   error("invalid parameter for %s", __func__);
+   return -EINVAL;
+   }
+   /* Quich hack, 2 devs RAID5 is just RAID1, no need to calculate */


This is not a hack IMO, it could be a shortcut, a special case, an
optimization.


+   if (nr_devs == 2) {
+   memcpy(data[dest], data[1 - dest], stripe_len);
+   return 0;
+   }
+   /* Just in case */


Such comment is not very helpful.


+   memset(buf, 0, stripe_len);
+   for (i = 0; i < nr_devs; i++) {
+   if (i == dest)
+   continue;
+   xor_range(data[i], buf, stripe_len);
+   }
+   return 0;
+}
diff --git a/volumes.c b/volumes.c
index da79751..718e67c 100644
--- a/volumes.c
+++ b/volumes.c
@@ -2108,12 +2108,14 @@ int write_raid56_with_parity(struct btrfs_fs_info *info,
 {
struct extent_buffer **ebs, *p_eb = NULL, *q_eb = NULL;
int i;
-   int j;
int ret;
int alloc_size = eb->len;
+   void **pointers;


I see you're moving existing code, so if you're going to fix the types,
please do that in a separate patch as well.


-   ebs = kmalloc(sizeof(*ebs) * multi->num_stripes, GFP_NOFS);
-   BUG_ON(!ebs);
+   ebs = malloc(sizeof(*ebs) * multi->num_stripes);
+   pointers = malloc(sizeof(void *) * multi->num_stripes);
+   if (!ebs || !pointers)
+   return -ENOMEM;

if (stripe_len > alloc_size)
alloc_size = stripe_len;
@@ -2143,12 +2145,6 @@ int write_raid56_with_parity(struct btrfs_fs_info *info,
q_eb = new_eb;
}
if (q_eb) {
-   void **pointers;
-
-   pointers = kmalloc(sizeof(*pointers) * multi->num_stripes,
-  GFP_NOFS);
-   BUG_ON(!pointers);
-
ebs[multi->num_stripes - 2] = p_eb;
ebs[multi->num_stripes - 1] = q_eb;

@@ -2159,17 +2155,14 @@ int write_raid56_with_parity(struct btrfs_fs_info *info,
kfree(pointers);
} else {
ebs[multi->num_stripes - 1] = p_eb;
-   memcpy(p_eb->data, ebs[0]->data, stripe_len);
-   for (j = 1; j < multi->num_stripes - 1; j++) {
-   for (i = 0; i < stripe_len; i += sizeof(u64)) {
-   u64 p_eb_data;
-   u64 ebs_data;
-
-   p_eb_data = get_unaligned_64(p_eb->data + i);
-   ebs_data = get_unaligned_64(ebs[j]->data + i);
-   p_eb_data ^= ebs_data;
-   put_unaligned_64(p_eb_data, p_eb->data + i);
-   }
+   for (i = 0; i < multi->num_stripes; i++)
+   pointers[i] = ebs[i]->data;
+   ret = raid5_gen_result(multi->num_stripes, stripe_len,
+   multi->num_stripes - 1, pointers);
+   if (ret < 0) {
+   free(ebs);
+   free(pointers);
+

Re: [PATCH] btrfs-progs: qgroup: Fix regression leads to corrupted qgroup status

2016-09-29 Thread Qu Wenruo



At 09/30/2016 01:19 AM, David Sterba wrote:

On Wed, Sep 07, 2016 at 10:54:19AM +0800, Qu Wenruo wrote:

Commit 93dabf211d74daf6e3de642bdd887a90a00f7b49
Author: Mark Fasheh 
Date:   Fri Jun 17 13:37:48 2016 -0700

btrfs-progs: check: verify qgroups above level 0

This commit introduced a new regression which corrupts
read_qgroup_status, since it iterate leaf with manually specified slot,
not correct path->slot[0].

This leads to wrong slot[0] and read_qgroup_status() will read out wrong
flags, leading to regression.

Fix read_qgroup_status() by using eb and slot instread of wrong path
strucutre.

Reported-by: Tsutomu Itoh 
Cc: Mark Fasheh 
Signed-off-by: Qu Wenruo 


I'm adding this patch to devel.  Do you have a test for the regression please?



Xfstests btrfs/114 can produce it.

If you mean to add btrfs-progs test case, then I can try to create a 
minimal image to reproduce it.


Thanks,
Qu


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PULL] Btrfs, for 4.9, part 1

2016-09-29 Thread Chris Mason

On Thu, Sep 29, 2016 at 01:36:46PM +0200, David Sterba wrote:

Hi,

I'm sending the first batch for 4.9, mostly fixes, stability improvements and
cleanups. There might be a second pull if the patches get reviewed, anything
space handling related or qgroups.
Please pull, thanks.


Thanks Dave, running this through tests.

-chris
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] btrfs-progs: raid56: Add support for raid5 to calculate any stripe

2016-09-29 Thread David Sterba
On Wed, Sep 28, 2016 at 04:30:03PM +0800, Qu Wenruo wrote:
> Add a new function raid5_gen_result() to calculate raid5 parity or
> recover data stripe.
> 
> Since now that raid6.c handles both raid5 and raid6, rename it to
> raid56.c.

Please split changes in this patch to the following:
- rename the file
- error handling of memory allocation failures
- the actual fix

A test would be very velcome, for all the cases the code handles, 2
devices, and more. But I'm not sure if we have support for that in the
testing suite.

> @@ -107,3 +108,47 @@ void raid6_gen_syndrome(int disks, size_t bytes, void 
> **ptrs)
>   }
>  }
>  
> +static void xor_range(void *src, void *dst, size_t size)
> +{
> + while (size) {
> + *(unsigned long *) dst ^= *(unsigned long *) src;

This could lead to unaligned access, please update the types and
possibly add some sanity checks (alignemnt, length).

> + src += sizeof(unsigned long);
> + dst += sizeof(unsigned long);
> + size -= sizeof(unsigned long);
> + }
> +}
> +
> +/*
> + * Generate desired data/parity for RAID5
> + *
> + * @nr_devs: Total number of devices, including parity
> + * @stripe_len:  Stripe length
> + * @data:Data, with special layout:
> + *   data[0]: Data stripe 0
> + *   data[nr_devs-2]: Last data stripe
> + *   data[nr_devs-1]: RAID5 parity
> + * @dest:To generate which data. should follow above data layout
> + */
> +int raid5_gen_result(int nr_devs, size_t stripe_len, int dest, void **data)
> +{
> + int i;
> + char *buf = data[dest];
> +
> + if (dest >= nr_devs || nr_devs < 2) {
> + error("invalid parameter for %s", __func__);
> + return -EINVAL;
> + }
> + /* Quich hack, 2 devs RAID5 is just RAID1, no need to calculate */

This is not a hack IMO, it could be a shortcut, a special case, an
optimization.

> + if (nr_devs == 2) {
> + memcpy(data[dest], data[1 - dest], stripe_len);
> + return 0;
> + }
> + /* Just in case */

Such comment is not very helpful.

> + memset(buf, 0, stripe_len);
> + for (i = 0; i < nr_devs; i++) {
> + if (i == dest)
> + continue;
> + xor_range(data[i], buf, stripe_len);
> + }
> + return 0;
> +}
> diff --git a/volumes.c b/volumes.c
> index da79751..718e67c 100644
> --- a/volumes.c
> +++ b/volumes.c
> @@ -2108,12 +2108,14 @@ int write_raid56_with_parity(struct btrfs_fs_info 
> *info,
>  {
>   struct extent_buffer **ebs, *p_eb = NULL, *q_eb = NULL;
>   int i;
> - int j;
>   int ret;
>   int alloc_size = eb->len;
> + void **pointers;

I see you're moving existing code, so if you're going to fix the types,
please do that in a separate patch as well.

> - ebs = kmalloc(sizeof(*ebs) * multi->num_stripes, GFP_NOFS);
> - BUG_ON(!ebs);
> + ebs = malloc(sizeof(*ebs) * multi->num_stripes);
> + pointers = malloc(sizeof(void *) * multi->num_stripes);
> + if (!ebs || !pointers)
> + return -ENOMEM;
>  
>   if (stripe_len > alloc_size)
>   alloc_size = stripe_len;
> @@ -2143,12 +2145,6 @@ int write_raid56_with_parity(struct btrfs_fs_info 
> *info,
>   q_eb = new_eb;
>   }
>   if (q_eb) {
> - void **pointers;
> -
> - pointers = kmalloc(sizeof(*pointers) * multi->num_stripes,
> -GFP_NOFS);
> - BUG_ON(!pointers);
> -
>   ebs[multi->num_stripes - 2] = p_eb;
>   ebs[multi->num_stripes - 1] = q_eb;
>  
> @@ -2159,17 +2155,14 @@ int write_raid56_with_parity(struct btrfs_fs_info 
> *info,
>   kfree(pointers);
>   } else {
>   ebs[multi->num_stripes - 1] = p_eb;
> - memcpy(p_eb->data, ebs[0]->data, stripe_len);
> - for (j = 1; j < multi->num_stripes - 1; j++) {
> - for (i = 0; i < stripe_len; i += sizeof(u64)) {
> - u64 p_eb_data;
> - u64 ebs_data;
> -
> - p_eb_data = get_unaligned_64(p_eb->data + i);
> - ebs_data = get_unaligned_64(ebs[j]->data + i);
> - p_eb_data ^= ebs_data;
> - put_unaligned_64(p_eb_data, p_eb->data + i);
> - }
> + for (i = 0; i < multi->num_stripes; i++)
> + pointers[i] = ebs[i]->data;
> + ret = raid5_gen_result(multi->num_stripes, stripe_len,
> + multi->num_stripes - 1, pointers);
> + if (ret < 0) {
> + free(ebs);
> + free(pointers);
> + return ret;
>   }
>   }
>  
> @@ -2180,7 +2173,8 @@ int write_raid56_with_parity(struct btrfs_fs_info *info,
>   kfree(ebs[i]);
>   }
>  
> -  

Re: [PATCH 2/2] btrfs-progs: Remove unnecessary parameter to clear_extent_uptodate

2016-09-29 Thread David Sterba
On Wed, Sep 28, 2016 at 04:30:04PM +0800, Qu Wenruo wrote:
> Signed-off-by: Qu Wenruo 

Applied, thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs-progs: qgroup: Fix regression leads to corrupted qgroup status

2016-09-29 Thread David Sterba
On Wed, Sep 07, 2016 at 10:54:19AM +0800, Qu Wenruo wrote:
> Commit 93dabf211d74daf6e3de642bdd887a90a00f7b49
> Author: Mark Fasheh 
> Date:   Fri Jun 17 13:37:48 2016 -0700
> 
> btrfs-progs: check: verify qgroups above level 0
> 
> This commit introduced a new regression which corrupts
> read_qgroup_status, since it iterate leaf with manually specified slot,
> not correct path->slot[0].
> 
> This leads to wrong slot[0] and read_qgroup_status() will read out wrong
> flags, leading to regression.
> 
> Fix read_qgroup_status() by using eb and slot instread of wrong path
> strucutre.
> 
> Reported-by: Tsutomu Itoh 
> Cc: Mark Fasheh 
> Signed-off-by: Qu Wenruo 

I'm adding this patch to devel.  Do you have a test for the regression please?
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC][PATCH] btrfs-progs: move sources shared with kernel to own directory

2016-09-29 Thread David Sterba
Hi,

I've started to reorganize the souces in progs so we can split the files and
group them. At the moment there's the kernel library (crc, list, radix, ...) as
the files are almost exactly the same as provided by kernel.

This patch moves btrfs-specific sources to own directory. As this is a bigger
change I could use some feedback if there are better alternatives (*). The
makefile is still global, object files are put next to the sources.

To current state and plans to continue the split:

- kernel-lib/ - as described in the first paragraph
- kernel-shared/  - dtto, second
- cmds/   - (planned) cmds-*.[ch]
- convert/- move as-is, split later, eg. per filsystem
- image/  - move as-is, split if needed
- check/  - split from cmds-check

There will be space for code shared potentially by any of the above, that
should remain in the toplevel directory, we'll see later how to organize.  The
remaining standalone tools shall remain in the toplevel directory.

The target for kernel-shared split is 4.9, 4.8 will have the kernel-lib split.

Preview can be found in my devel git repos in branch preview/move-shared .

Thanks for feedback.

(*) no automake please

-->8---8<--
From: David Sterba 

All files that share content and purpose with the corresponding file in
the kernel are moved to a separate directory. There's only some overlap,
can't be 100% match but many functions are shared. A diff between the
files can reveal changes that should be applied to the other codebase.

Signed-off-by: David Sterba 
---
 Makefile.in| 18 +++---
 backref.c => kernel-shared/backref.c   |  0
 backref.h => kernel-shared/backref.h   |  0
 ctree.c => kernel-shared/ctree.c   |  0
 ctree.h => kernel-shared/ctree.h   |  0
 dir-item.c => kernel-shared/dir-item.c |  0
 disk-io.c => kernel-shared/disk-io.c   |  0
 disk-io.h => kernel-shared/disk-io.h   |  0
 extent-tree.c => kernel-shared/extent-tree.c   |  0
 extent_io.c => kernel-shared/extent_io.c   |  0
 extent_io.h => kernel-shared/extent_io.h   |  0
 file-item.c => kernel-shared/file-item.c   |  0
 file.c => kernel-shared/file.c |  0
 free-space-cache.c => kernel-shared/free-space-cache.c |  0
 free-space-cache.h => kernel-shared/free-space-cache.h |  0
 free-space-tree.c => kernel-shared/free-space-tree.c   |  0
 free-space-tree.h => kernel-shared/free-space-tree.h   |  0
 inode-item.c => kernel-shared/inode-item.c |  0
 inode-map.c => kernel-shared/inode-map.c   |  0
 inode.c => kernel-shared/inode.c   |  0
 print-tree.c => kernel-shared/print-tree.c |  0
 print-tree.h => kernel-shared/print-tree.h |  0
 root-tree.c => kernel-shared/root-tree.c   |  0
 ulist.c => kernel-shared/ulist.c   |  0
 ulist.h => kernel-shared/ulist.h   |  0
 volumes.c => kernel-shared/volumes.c   |  0
 volumes.h => kernel-shared/volumes.h   |  0
 27 files changed, 11 insertions(+), 7 deletions(-)
 rename backref.c => kernel-shared/backref.c (100%)
 rename backref.h => kernel-shared/backref.h (100%)
 rename ctree.c => kernel-shared/ctree.c (100%)
 rename ctree.h => kernel-shared/ctree.h (100%)
 rename dir-item.c => kernel-shared/dir-item.c (100%)
 rename disk-io.c => kernel-shared/disk-io.c (100%)
 rename disk-io.h => kernel-shared/disk-io.h (100%)
 rename extent-tree.c => kernel-shared/extent-tree.c (100%)
 rename extent_io.c => kernel-shared/extent_io.c (100%)
 rename extent_io.h => kernel-shared/extent_io.h (100%)
 rename file-item.c => kernel-shared/file-item.c (100%)
 rename file.c => kernel-shared/file.c (100%)
 rename free-space-cache.c => kernel-shared/free-space-cache.c (100%)
 rename free-space-cache.h => kernel-shared/free-space-cache.h (100%)
 rename free-space-tree.c => kernel-shared/free-space-tree.c (100%)
 rename free-space-tree.h => kernel-shared/free-space-tree.h (100%)
 rename inode-item.c => kernel-shared/inode-item.c (100%)
 rename inode-map.c => kernel-shared/inode-map.c (100%)
 rename inode.c => kernel-shared/inode.c (100%)
 rename print-tree.c => kernel-shared/print-tree.c (100%)
 rename print-tree.h => kernel-shared/print-tree.h (100%)
 rename root-tree.c => kernel-shared/root-tree.c (100%)
 rename ulist.c => kernel-shared/ulist.c (100%)
 rename ulist.h => kernel-shared/ulist.h (100%)
 rename volumes.c => kernel-shared/volumes.c (100%)
 rename volumes.h => kernel-shared/volumes.h (100%)

diff --git a/Makefile.in b/Makefile.in
index 20b740a0babd..2a08978f8b85 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -62,6 +62,7 @@ CFLAGS = @CFLAGS@ \
 -fPIC \
 -I$(TOPDIR) \
 -I$(TOPDIR)/kernel-lib \
+

Re: [PATCH v2 0/6] Btrfs: free space tree and sanity test fixes

2016-09-29 Thread David Sterba
On Thu, Sep 29, 2016 at 04:02:35PM +0300, Anatoly Pugachev wrote:
> > Try to add https://patchwork.kernel.org/patch/9332707/ aka
> > "Btrfs: improve check_node to avoid reading corrupted nodes" which should 
> > return
> > -EIO and prevent the BUG().
> 
> Guys,
> 
> I was wrong with "Btrfs: remove unnecessary btrfs_mark_buffer_dirty in
> split_leaf" , because I've used patch from email
> https://marc.info/?l=linux-btrfs=147319873029152=2 which patches
> fs/btrfs/ctree.c , but forgot to apply kdave git kernel commit
> 1ba98d086fe3a14d6a31f2f66dbab70c45d00f63 "Btrfs: detect corruption
> when non-root leaf has zero item"
> 
> :-/
> 
> Should I remake all the tests?

It would be much appreciated, as we probably won't get any other sparc64
testing. Thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 0/6] Btrfs: free space tree and sanity test fixes

2016-09-29 Thread Anatoly Pugachev
On Thu, Sep 29, 2016 at 3:52 PM, Holger Hoffstätte
 wrote:
> On 09/29/16 14:21, Anatoly Pugachev wrote:
>>> ...
>>>
>>> This is fixed by patch
>>>
>>>   "Btrfs: remove unnecessary btrfs_mark_buffer_dirty in split_leaf"
>>>
>>> that's in the 4.9 queue. Other than that, the self-tests seem to pass,
>>> thanks for the test. Would be good if you can test with the mentioned
>>> patch included or without integrity checker. Thanks for testing.
>>
>> updated git kernel to v4.8-rc8-8-gae6dd8d , applied this
>> "Btrfs: free space tree and sanity test fixes" patchset and added/applied
>> "Btrfs: remove unnecessary btrfs_mark_buffer_dirty in split_leaf" :
>>
> (snip)
>> Sep 29 00:40:55 ttip kernel: BTRFS: device fsid 
>> 7bb81df9-0e2b-47f2-81ff-c08502d38da6 devid 1 transid 5 /dev/loop4
>> Sep 29 00:41:30 ttip kernel: BTRFS info (device loop4): disk space caching 
>> is enabled
>> Sep 29 00:41:30 ttip kernel: BTRFS info (device loop4): has skinny extents
>> Sep 29 00:41:30 ttip kernel: BTRFS info (device loop4): flagging fs with big 
>> metadata feature
>> Sep 29 00:41:30 ttip kernel: BTRFS info (device loop4): creating UUID tree
>> Sep 29 00:41:31 ttip kernel: BTRFS: device fsid 
>> d0ee7ca3-3be0-465f-857b-19e681181923 devid 1 transid 5 /dev/loop0
>> Sep 29 00:41:32 ttip kernel: BTRFS info (device loop0): enabling free space 
>> tree
>> Sep 29 00:41:32 ttip kernel: BTRFS info (device loop0): using free space tree
>> Sep 29 00:41:32 ttip kernel: BTRFS info (device loop0): has skinny extents
>> Sep 29 00:41:32 ttip kernel: BTRFS info (device loop0): flagging fs with big 
>> metadata feature
>> Sep 29 00:41:32 ttip kernel: BTRFS info (device loop0): creating free space 
>> tree
>> Sep 29 00:41:32 ttip kernel: BTRFS info (device loop0): setting 1 ro feature 
>> flag
>> Sep 29 00:41:32 ttip kernel: BTRFS info (device loop0): setting 2 ro feature 
>> flag
>> Sep 29 00:41:32 ttip kernel: BTRFS info (device loop0): creating UUID tree
>> Sep 29 00:41:32 ttip kernel: BTRFS critical (device loop4): corrupt leaf, 
>> non-root leaf's nritems is 0: block=29556736,root=1, slot=0
>> Sep 29 00:41:32 ttip kernel: BTRFS info (device loop4): leaf 29556736 total 
>> ptrs 0 free space 16283
>> Sep 29 00:41:32 ttip kernel: BTRFS: assertion failed: 0, file: 
>> fs/btrfs/disk-io.c, line: 4059kernel BUG at fs/btrfs/ctree.h:3369!
>
> Try to add https://patchwork.kernel.org/patch/9332707/ aka
> "Btrfs: improve check_node to avoid reading corrupted nodes" which should 
> return
> -EIO and prevent the BUG().

Guys,

I was wrong with "Btrfs: remove unnecessary btrfs_mark_buffer_dirty in
split_leaf" , because I've used patch from email
https://marc.info/?l=linux-btrfs=147319873029152=2 which patches
fs/btrfs/ctree.c , but forgot to apply kdave git kernel commit
1ba98d086fe3a14d6a31f2f66dbab70c45d00f63 "Btrfs: detect corruption
when non-root leaf has zero item"

:-/

Should I remake all the tests?
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: multi-device btrfs with single data mode and disk failure

2016-09-29 Thread Alexandre Poux
Hi,

I finally did it : patched the kernel and removed the device.
As expected he did not scream since there was nothing at all on the device.
Now I'm checking that everything is fine:
scrub (in read only)
check (in read only)
but I think that everything will be OK
If not, I will rebuild the array from scratch (I did managed to save my
data)

Thank you both for your guidance.
I think that a warning should be put in the wiki in order for other user
to not do the same mistake I did :
never ever use the single mode

I will try to do it soon

Again thank you

Le 20/09/2016 à 23:15, Chris Murphy a écrit :
> On Tue, Sep 20, 2016 at 2:18 PM, Alexandre Poux  wrote:
>>
>> Le 20/09/2016 à 21:46, Chris Murphy a écrit :
>>> On Tue, Sep 20, 2016 at 1:31 PM, Alexandre Poux  wrote:
 Le 20/09/2016 à 21:11, Chris Murphy a écrit :
> And no backup? Umm, I'd resolve that sooner than anything else.
 Yeah you are absolutely right, this was a temporary solution which came
 to be not that temporary.
 And I regret it already...
>>> Well on the bright side, if this were LVM or mdadm linear/concat
>>> array, the whole thing would be toast because any other file system
>>> would have lost too much fs metadata on the missing device.
>>>
>  It
> should be true that it'll tolerate a read only mount indefinitely, but
> read write? Not sure. This sort of edge case isn't well tested at all
> seeing as it required changing the kernel to reduce safe guards. So
> all bets are off the whole thing could become unmountable, not even
> read only, and then it's a scraping job.
 I'm not that crazy, I tried the patch inside a virtual machine on
 virtual drives...
 And since it's only virtual, it may not work on the real partition...
>>> Are you sure the virtual setup lacked a CHUNK_ITEM on the missing
>>> device? That might be what pinned it in that case.
>> In fact in my virtual setup there was more chunk missing (1 metadata 1
>> System and 1 Data).
>> I will try to do a setup closer to my real one.
> Probably the reason why that missing device has no used chunks is
> because it's so small. Btrfs allocates block groups to devices with
> the most unallocated space first. Only once the unallocated space is
> even (approximately) on all devices would it allocate a block group to
> the small device.
>
>


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 0/6] Btrfs: free space tree and sanity test fixes

2016-09-29 Thread Holger Hoffstätte
On 09/29/16 14:21, Anatoly Pugachev wrote:
>> ...
>>
>> This is fixed by patch
>>
>>   "Btrfs: remove unnecessary btrfs_mark_buffer_dirty in split_leaf"
>>
>> that's in the 4.9 queue. Other than that, the self-tests seem to pass,
>> thanks for the test. Would be good if you can test with the mentioned
>> patch included or without integrity checker. Thanks for testing.
> 
> updated git kernel to v4.8-rc8-8-gae6dd8d , applied this 
> "Btrfs: free space tree and sanity test fixes" patchset and added/applied 
> "Btrfs: remove unnecessary btrfs_mark_buffer_dirty in split_leaf" :
> 
(snip)
> Sep 29 00:40:55 ttip kernel: BTRFS: device fsid 
> 7bb81df9-0e2b-47f2-81ff-c08502d38da6 devid 1 transid 5 /dev/loop4
> Sep 29 00:41:30 ttip kernel: BTRFS info (device loop4): disk space caching is 
> enabled
> Sep 29 00:41:30 ttip kernel: BTRFS info (device loop4): has skinny extents
> Sep 29 00:41:30 ttip kernel: BTRFS info (device loop4): flagging fs with big 
> metadata feature
> Sep 29 00:41:30 ttip kernel: BTRFS info (device loop4): creating UUID tree
> Sep 29 00:41:31 ttip kernel: BTRFS: device fsid 
> d0ee7ca3-3be0-465f-857b-19e681181923 devid 1 transid 5 /dev/loop0
> Sep 29 00:41:32 ttip kernel: BTRFS info (device loop0): enabling free space 
> tree
> Sep 29 00:41:32 ttip kernel: BTRFS info (device loop0): using free space tree
> Sep 29 00:41:32 ttip kernel: BTRFS info (device loop0): has skinny extents
> Sep 29 00:41:32 ttip kernel: BTRFS info (device loop0): flagging fs with big 
> metadata feature
> Sep 29 00:41:32 ttip kernel: BTRFS info (device loop0): creating free space 
> tree
> Sep 29 00:41:32 ttip kernel: BTRFS info (device loop0): setting 1 ro feature 
> flag
> Sep 29 00:41:32 ttip kernel: BTRFS info (device loop0): setting 2 ro feature 
> flag
> Sep 29 00:41:32 ttip kernel: BTRFS info (device loop0): creating UUID tree
> Sep 29 00:41:32 ttip kernel: BTRFS critical (device loop4): corrupt leaf, 
> non-root leaf's nritems is 0: block=29556736,root=1, slot=0
> Sep 29 00:41:32 ttip kernel: BTRFS info (device loop4): leaf 29556736 total 
> ptrs 0 free space 16283
> Sep 29 00:41:32 ttip kernel: BTRFS: assertion failed: 0, file: 
> fs/btrfs/disk-io.c, line: 4059kernel BUG at fs/btrfs/ctree.h:3369!

Try to add https://patchwork.kernel.org/patch/9332707/ aka
"Btrfs: improve check_node to avoid reading corrupted nodes" which should return
-EIO and prevent the BUG().

-h

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 0/6] Btrfs: free space tree and sanity test fixes

2016-09-29 Thread Anatoly Pugachev
On Mon, Sep 26, 2016 at 07:50:00PM +0200, David Sterba wrote:
> On Sun, Sep 25, 2016 at 10:55:24AM +0300, Anatoly Pugachev wrote:
> > applied patch to git kernel (v4.8-rc7-172-gbd5dbcb) cleanly. Did not used
> > btrfs-progs.git, but debian shipped 4.7.3-1 .
> > 
> > [3184240.135182] BTRFS info (device loop0): creating UUID tree
> > [3184240.252534] BTRFS critical (device loop4): corrupt leaf, non-root 
> > leaf's nritems is 0: block=29556736,root=1, slot=0
> 
> The error does not seem to be related to the free space bitmap issues
> (at least I don't see a connection). The message is from
> 
>   1ba98d086fe3a14d6a31f2f66dbab70c45d00f63
>   "Btrfs: detect corruption when non-root leaf has zero item"
> 
> called from btrfs_mark_buffer_dirty with integrity checker on.
> Confirmed from the log:
> 
> > [3102837.870398] Btrfs loaded, crc32c=crc32c-sparc64, debug=on, assert=on, 
> > integrity-checker=on
> ...
> 
> This is fixed by patch
> 
>   "Btrfs: remove unnecessary btrfs_mark_buffer_dirty in split_leaf"
> 
> that's in the 4.9 queue. Other than that, the self-tests seem to pass,
> thanks for the test. Would be good if you can test with the mentioned
> patch included or without integrity checker. Thanks for testing.


updated git kernel to v4.8-rc8-8-gae6dd8d , applied this 
"Btrfs: free space tree and sanity test fixes" patchset and added/applied 
"Btrfs: remove unnecessary btrfs_mark_buffer_dirty in split_leaf" :


1) kernel config BTRFS integrity-checker=on :

Sep 29 00:32:15 ttip kernel: raid6: using intx1 recovery algorithm
Sep 29 00:32:15 ttip kernel: xor: automatically using best checksumming 
function:
Sep 29 00:32:15 ttip kernel:Niagara   :  4746.000 MB/sec
Sep 29 00:32:15 ttip kernel: Btrfs loaded, crc32c=crc32c-sparc64, debug=on, 
assert=on, integrity-checker=on
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: sectorsize: 8192  nodesize: 8192
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Running btrfs free space cache 
tests
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Running extent only tests
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Running bitmap only tests
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Running bitmap and extent tests
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Running space stealing from 
bitmap to extent
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Free space cache tests finished
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Running extent buffer operation 
tests
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Running btrfs_split_item tests
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Running extent I/O tests
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Running find delalloc tests
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Running extent buffer bitmap tests
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Extent I/O tests finished
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Running btrfs_get_extent tests
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Running hole first 
btrfs_get_extent test
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Running outstanding_extents tests
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Running qgroup tests
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Qgroup basic add
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Qgroup multiple refs test
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Running free space tree tests
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: sectorsize: 8192  nodesize: 16384
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Running btrfs free space cache 
tests
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Running extent only tests
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Running bitmap only tests
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Running bitmap and extent tests
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Running space stealing from 
bitmap to extent
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Free space cache tests finished
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Running extent buffer operation 
tests
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Running btrfs_split_item tests
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Running extent I/O tests
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Running find delalloc tests
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Running extent buffer bitmap tests
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Extent I/O tests finished
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Running btrfs_get_extent tests
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Running hole first 
btrfs_get_extent test
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Running outstanding_extents tests
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Running qgroup tests
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Qgroup basic add
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Qgroup multiple refs test
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: Running free space tree tests
Sep 29 00:32:15 ttip kernel: BTRFS: selftest: sectorsize: 8192  nodesize: 32768
Sep 29 00:32:15 ttip kernel: BTRFS: 

Re: [PATCH v3] btrfs-progs: Return more meaningful value for btrfs_read_deve_super

2016-09-29 Thread David Sterba
On Mon, Sep 26, 2016 at 12:54:26PM +0800, Qu Wenruo wrote:
> btrfs_read_dev_super() only returns 0 or -1, which doesn't really help,
> caller won't know if it's caused by bad superblock or superblock out of
> range.
> 
> Return -errno if pread64() return -1, and return -EOF if none or part of
> the super is read out, and return what check_super() returned.
> 
> So caller can get -EIO to catch real corrupted super blocks.
> 
> Signed-off-by: Qu Wenruo 

Applied, thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 3/6] Btrfs: catch invalid free space trees

2016-09-29 Thread David Sterba
On Mon, Sep 26, 2016 at 04:13:29PM -0700, Omar Sandoval wrote:
> > +/*
> > + * Older kernels on big-endian systems produced broken free space tree 
> > bitmaps,
> > + * and btrfs-progs also used to corrupt the free space tree. If this bit is
> > + * clear, then the free space tree cannot be trusted. btrfs-progs can also
> > + * intentionally clear this bit to ask the kernel to rebuild the free space
> > + * tree.
> > + */
> 
> Hm, I think I changed my mind about allowing btrfs-progs to clear the
> bit intentionally. This creates a problem where we have a valid free
> space tree, modify it with btrfs-progs and clear the bit, and then mount
> it on an older kernel that doesn't check for the valid bit. If we really
> wanted to, we could add yet another bit, say, FREE_SPACE_TREE_INVALID,
> which prevents old kernels from mounting it, but I don't want to add
> more hacks just because write support hasn't been implemented in progs
> yet. That doesn't change this patch at all except for the comment here.

What you describe is possible to happen but still rare, the lowest
recommended kernel for general FST feature use will be at least 4.9. We
can describe the buggy kernel/tools combinations and recommended stafety
steps, like clearing the cache manually etc.

> Should I resend it or can this be fixed on applying?

I can update the wording.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PULL] Btrfs, for 4.9, part 1

2016-09-29 Thread David Sterba
Hi,

I'm sending the first batch for 4.9, mostly fixes, stability improvements and
cleanups. There might be a second pull if the patches get reviewed, anything
space handling related or qgroups.
Please pull, thanks.


The following changes since commit 08895a8b6b06ed2323cd97a36ee40a116b3db8ed:

  Linux 4.8-rc8 (2016-09-25 18:47:13 -0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git misc-4.9

for you to fetch changes up to 196e02490c934398f894e5cb0ee1ac8ad13ca576:

  Btrfs: remove unnecessary btrfs_mark_buffer_dirty in split_leaf (2016-09-26 
19:50:44 +0200)


Anand Jain (1):
  btrfs: fix a possible umount deadlock

Arnd Bergmann (1):
  btrfs: fix btrfs_no_printk stub helper

David Sterba (1):
  btrfs: create example debugfs file only in debugging build

Eric Sandeen (1):
  btrfs: fix perms on demonstration debugfs interface

Goldwyn Rodrigues (3):
  btrfs: Do not reassign count in btrfs_run_delayed_refs
  btrfs: Remove already completed TODO comment
  btrfs: parent_start initialization cleanup

Jeff Mahoney (7):
  btrfs: add dynamic debug support
  btrfs: clean the old superblocks before freeing the device
  btrfs: unsplit printed strings
  btrfs: convert printk(KERN_* to use pr_* calls
  btrfs: convert pr_* to btrfs_* where possible
  btrfs: convert send's verbose_printk to btrfs_debug
  btrfs: btrfs_debug should consume fs_info when DEBUG is not defined

Josef Bacik (5):
  Btrfs: add a flags field to btrfs_fs_info
  Btrfs: kill the start argument to read_extent_buffer_pages
  Btrfs: kill BUG_ON()'s in btrfs_mark_extent_written
  Btrfs: don't leak reloc root nodes on error
  Btrfs: don't BUG() during drop snapshot

Liu Bo (13):
  Btrfs: fix memory leak of block group cache
  Btrfs: remove BUG() in raid56
  Btrfs: fix memory leak in reading btree blocks
  Btrfs: bail out if block group has different mixed flag
  Btrfs: return gracefully from balance if fs tree is corrupted
  Btrfs: memset to avoid stale content in btree node block
  Btrfs: remove BUG_ON in start_transaction
  Btrfs: add error handling for extent buffer in print tree
  Btrfs: improve check_node to avoid reading corrupted nodes
  Btrfs: kill BUG_ON in run_delayed_tree_ref
  Btrfs: fix memory leak in do_walk_down
  Btrfs: memset to avoid stale content in btree leaf
  Btrfs: remove unnecessary btrfs_mark_buffer_dirty in split_leaf

Lu Fengqi (1):
  btrfs: fix check_shared for fiemap ioctl

Luis Henriques (2):
  btrfs: Fix warning "variable ‘blocksize’ set but not used"
  btrfs: Fix warning "variable ‘gen’ set but not used"

Masahiro Yamada (1):
  btrfs: squash lines for simple wrapper functions

Naohiro Aota (1):
  btrfs: let btrfs_delete_unused_bgs() to clean relocated bgs

Qu Wenruo (1):
  btrfs: extend btrfs_set_extent_delalloc and its friends to support 
in-band dedupe and subpage size patchset

 fs/btrfs/backref.c| 409 ++
 fs/btrfs/btrfs_inode.h|  11 --
 fs/btrfs/check-integrity.c| 342 +++
 fs/btrfs/compression.c|   6 +-
 fs/btrfs/ctree.c  |  56 ++
 fs/btrfs/ctree.h  | 116 
 fs/btrfs/delayed-inode.c  |  25 ++-
 fs/btrfs/delayed-ref.c|  15 +-
 fs/btrfs/dev-replace.c|  21 ++-
 fs/btrfs/dir-item.c   |   7 +-
 fs/btrfs/disk-io.c| 237 
 fs/btrfs/disk-io.h|   2 +
 fs/btrfs/extent-tree.c| 200 -
 fs/btrfs/extent_io.c  | 170 +++---
 fs/btrfs/extent_io.h  |   4 +-
 fs/btrfs/file.c   |  43 -
 fs/btrfs/free-space-cache.c   |  21 ++-
 fs/btrfs/free-space-cache.h   |   6 +-
 fs/btrfs/free-space-tree.c|  20 ++-
 fs/btrfs/inode-map.c  |  31 ++--
 fs/btrfs/inode.c  |  70 +---
 fs/btrfs/ioctl.c  |  14 +-
 fs/btrfs/lzo.c|   6 +-
 fs/btrfs/ordered-data.c   |   4 +-
 fs/btrfs/print-tree.c |  93 +-
 fs/btrfs/qgroup.c |  77 
 fs/btrfs/raid56.c |   5 +-
 fs/btrfs/reada.c  |  32 ++--
 fs/btrfs/relocation.c |  47 +++--
 fs/btrfs/root-tree.c  |  18 +-
 fs/btrfs/scrub.c  |  58 +++---
 fs/btrfs/send.c   |  79 
 fs/btrfs/super.c  |  62 ---
 fs/btrfs/sysfs.c  |  19 +-
 fs/btrfs/tests/inode-tests.c  |  12 +-
 fs/btrfs/tests/qgroup-tests.c |   2 +-
 fs/btrfs/transaction.c|  49 +++--
 fs/btrfs/transaction.h|   1 +
 fs/btrfs/tree-log.c   |  12 +-
 fs/btrfs/uuid-tree.c  |  27 +--
 fs/btrfs/volumes.c| 221 

Re: [PATCH] qgroup: Prevent qgroup->reserved from going subzero

2016-09-29 Thread Goldwyn Rodrigues


On 09/29/2016 03:57 AM, Qu Wenruo wrote:
> Thanks for your test script.
> 
> I succeeded in pinning down the problem.
> 
> The problem is, btrfs is invalidate pages that belongs to ordered
> extent(goes through writeback)


No, I don't think invalidated pages are going through writeback. The
problem is that the space for extent allocation is done before the
writeback and if pages are invalidated before writeback, it is not
accounted for.

> 
> The ftrace(with more tracepoints to trace qgroup->reserved change) shows:
> --
> btrfs_qgroup_release_data: root=257, ino=257, start=0, len=4096,
> reserved=4096, op=free
> qgroup_update_reserve: qgid = 257, cur_reserved = 16171008, diff = -4096
> btrfs_qgroup_release_data: root=257, ino=257, start=4096, len=4096,
> reserved=4096, op=free
> qgroup_update_reserve: qgid = 257, cur_reserved = 16166912, diff = -4096
> btrfs_qgroup_release_data: root=257, ino=257, start=8192, len=4096,
> reserved=4096, op=free
> qgroup_update_reserve: qgid = 257, cur_reserved = 16162816, diff = -4096
> btrfs_qgroup_release_data: root=257, ino=257, start=12288, len=4096,
> reserved=4096, op=free
> qgroup_update_reserve: qgid = 257, cur_reserved = 16158720, diff = -4096
> btrfs_qgroup_release_data: root=257, ino=257, start=16384, len=4096,
> reserved=4096, op=free
> qgroup_update_reserve: qgid = 257, cur_reserved = 16154624, diff = -4096

This is a little confusing. There is no qgroup_update_reserve() function
in the source. Where did you add this?

> 
> 
> ^ Btrfs invalidates 5 pages of ino 257 qg 257, range 0 ~ 20K.
> 
> btrfs_qgroup_release_data: root=257, ino=257, start=0, len=5242880,
> reserved=5222400, op=release
> 
> ^ Here ordered_io finishes, write the whole 5M data, while
> QGROUP_RESERVED only returned 5M - 20K.
> --
> 
> The problem is at btrfs_add_delayed_data_ref(), we use the assumption
> that, qgroup reserved space is the same as extent size(5M)
> But in fact, btrfs_qgroup_release_data() only release (5M - 20K).
> Leaking the 20K.

Yes, this is more like it. However, I would put it the other way. It
releases 5M when it should have released 5M-20K, because 20k was
released when invalidating page.

> 
> Calltrace:
> btrfs_finish_ordered_io()
> |- inserted_reserved_file_extent()
>|- btrfs_alloc_reserved_file_extent()
>|  ^^ Here we tell delayed_ref to free 5M later
>|- btrfs_qgroup_release_data()
>   ^^ We only released (5M - 20K), as the first 5 pages
>  are freed by btrfs_invalidatepage()
> 
> If btrfs is designed to freeing pages which belongs to ordered_extent,
> then it totally breaks the qgroup assumption.
> 
> Then we have several ways to fix it:
> 1) Fix race between ordered extent(writeback) with invalidatepage()
>Make btrfs_invalidatepage() skips pages that are already in
>ordered_extent, then we're OK.
> 
>This is much like your original fix, but I'm not sure if DIRTY page
>flag is bond to ordered_extent.
>And more comment will help.


So, what you mean is that fix is correct, I just need to reword the
patch header.

> 
> 2) Make btrfs_qgroup_release_data() return accurate num bytes
>And then pass it to delayed_ref head.
> 
>This is quite anti-intuition. If we're adding a new extent, how
>could it happen that some pages are already invalidated?
> 

I agree. It is counter-intutive and will increase complexity.

> Anyway, awesome work, exposed a quite strange race and makes us re-think
> the base of qgroup reserve space framework.

Thanks.

-- 
Goldwyn
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: BTRFS: space_info 4 has 18446742286429913088 free, is not full

2016-09-29 Thread Stefan Priebe - Profihost AG
Hi,

Am 29.09.2016 um 12:03 schrieb Adam Borowski:
> On Thu, Sep 29, 2016 at 09:27:01AM +0200, Stefan Priebe - Profihost AG wrote:
>> Am 29.09.2016 um 09:13 schrieb Wang Xiaoguang:
> I found that compress sometime report ENOSPC error even in 4.8-rc8,
> currently
 I cannot confirm that as i do not have anough space to test this without
 compression ;-( But yes i've compression enabled.
>>> I might not get you, my poor english :)
>>> You mean that you only get ENOSPC error when compression is enabled?
>>>
>>> And when compression is not enabled, you do not get ENOSPC error?
>>
>> I can't tell you. I cannot test with compression not enabled. I do not
>> have anough free space on this disk.
> 
> Disabling compression doesn't immediately require any space -- it affects
> only newly written data.  What you already have remains in the old
> compression setting, unless you defrag everything (a side effect of
> defragging is switching existing extents to the new compression mode).

Yes i know that but most workload is creating reflinks to old files and
modify data in them. So to create a good test i need to defrag and
uncompress all those files.

Greets,
Stefan
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: BTRFS: space_info 4 has 18446742286429913088 free, is not full

2016-09-29 Thread Adam Borowski
On Thu, Sep 29, 2016 at 09:27:01AM +0200, Stefan Priebe - Profihost AG wrote:
> Am 29.09.2016 um 09:13 schrieb Wang Xiaoguang:
> >>> I found that compress sometime report ENOSPC error even in 4.8-rc8,
> >>> currently
> >> I cannot confirm that as i do not have anough space to test this without
> >> compression ;-( But yes i've compression enabled.
> > I might not get you, my poor english :)
> > You mean that you only get ENOSPC error when compression is enabled?
> > 
> > And when compression is not enabled, you do not get ENOSPC error?
> 
> I can't tell you. I cannot test with compression not enabled. I do not
> have anough free space on this disk.

Disabling compression doesn't immediately require any space -- it affects
only newly written data.  What you already have remains in the old
compression setting, unless you defrag everything (a side effect of
defragging is switching existing extents to the new compression mode).

-- 
A MAP07 (Dead Simple) raspberry tincture recipe: 0.5l 95% alcohol, 1kg
raspberries, 0.4kg sugar; put into a big jar for 1 month.  Filter out and
throw away the fruits (can dump them into a cake, etc), let the drink age
at least 3-6 months.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] qgroup: Prevent qgroup->reserved from going subzero

2016-09-29 Thread Qu Wenruo

Thanks for your test script.

I succeeded in pinning down the problem.

The problem is, btrfs is invalidate pages that belongs to ordered 
extent(goes through writeback)


The ftrace(with more tracepoints to trace qgroup->reserved change) shows:
--
btrfs_qgroup_release_data: root=257, ino=257, start=0, len=4096, 
reserved=4096, op=free

qgroup_update_reserve: qgid = 257, cur_reserved = 16171008, diff = -4096
btrfs_qgroup_release_data: root=257, ino=257, start=4096, len=4096, 
reserved=4096, op=free

qgroup_update_reserve: qgid = 257, cur_reserved = 16166912, diff = -4096
btrfs_qgroup_release_data: root=257, ino=257, start=8192, len=4096, 
reserved=4096, op=free

qgroup_update_reserve: qgid = 257, cur_reserved = 16162816, diff = -4096
btrfs_qgroup_release_data: root=257, ino=257, start=12288, len=4096, 
reserved=4096, op=free

qgroup_update_reserve: qgid = 257, cur_reserved = 16158720, diff = -4096
btrfs_qgroup_release_data: root=257, ino=257, start=16384, len=4096, 
reserved=4096, op=free

qgroup_update_reserve: qgid = 257, cur_reserved = 16154624, diff = -4096


^ Btrfs invalidates 5 pages of ino 257 qg 257, range 0 ~ 20K.

btrfs_qgroup_release_data: root=257, ino=257, start=0, len=5242880, 
reserved=5222400, op=release


^ Here ordered_io finishes, write the whole 5M data, while 
QGROUP_RESERVED only returned 5M - 20K.

--

The problem is at btrfs_add_delayed_data_ref(), we use the assumption 
that, qgroup reserved space is the same as extent size(5M)

But in fact, btrfs_qgroup_release_data() only release (5M - 20K).
Leaking the 20K.

Calltrace:
btrfs_finish_ordered_io()
|- inserted_reserved_file_extent()
   |- btrfs_alloc_reserved_file_extent()
   |  ^^ Here we tell delayed_ref to free 5M later
   |- btrfs_qgroup_release_data()
  ^^ We only released (5M - 20K), as the first 5 pages
 are freed by btrfs_invalidatepage()

If btrfs is designed to freeing pages which belongs to ordered_extent, 
then it totally breaks the qgroup assumption.


Then we have several ways to fix it:
1) Fix race between ordered extent(writeback) with invalidatepage()
   Make btrfs_invalidatepage() skips pages that are already in
   ordered_extent, then we're OK.

   This is much like your original fix, but I'm not sure if DIRTY page
   flag is bond to ordered_extent.
   And more comment will help.

2) Make btrfs_qgroup_release_data() return accurate num bytes
   And then pass it to delayed_ref head.

   This is quite anti-intuition. If we're adding a new extent, how
   could it happen that some pages are already invalidated?


Anyway, awesome work, exposed a quite strange race and makes us re-think 
the base of qgroup reserve space framework.


Thanks,
Qu

At 09/28/2016 10:19 AM, Goldwyn Rodrigues wrote:



On 09/27/2016 08:44 PM, Qu Wenruo wrote:




Finally reproduced it.

Although in a low chance, about 1/10.

Under most case, the final remove gets executed without error.


Change 50m to 500m while setting the qgroup limit, the probability will
increase.




--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: BTRFS: space_info 4 has 18446742286429913088 free, is not full

2016-09-29 Thread Stefan Priebe - Profihost AG
Am 29.09.2016 um 09:13 schrieb Wang Xiaoguang:
>>> I found that compress sometime report ENOSPC error even in 4.8-rc8,
>>> currently
>> I cannot confirm that as i do not have anough space to test this without
>> compression ;-( But yes i've compression enabled.
> I might not get you, my poor english :)
> You mean that you only get ENOSPC error when compression is enabled?
> 
> And when compression is not enabled, you do not get ENOSPC error?

I can't tell you. I cannot test with compression not enabled. I do not
have anough free space on this disk.

>>> I'm trying to fix it.
>> That sounds good but do you also get the
>> BTRFS: space_info 4 has 18446742286429913088 free, is not full
>>
>> kernel messages on umount? if not you might have found another problem.
> Yes, I seem similar messages, you can paste you whole dmesg info here.

[ cut here ]
WARNING: CPU: 2 PID: 5187 at fs/btrfs/extent-tree.c:5790
btrfs_free_block_groups+0x346/0x430 [btrfs]()
Modules linked in: netconsole xt_multiport iptable_filter ip_tables
x_tables 8021q garp bonding x86_pkg_temp_thermal coretemp kvm_intel kvm
irqbypass sb_edac crc32_pclmul edac_core i2c_i801 i40e(O) vxlan
ip6_udp_tunnel udp_tunnel shpchp ipmi_si ipmi_msghandler button loop
btrfs dm_mod raid10 raid0 multipath linear raid456 async_raid6_recov
async_memcpy async_pq async_xor async_tx xor raid6_pq igb i2c_algo_bit
i2c_core usbhid raid1 md_mod xhci_pci sg ehci_pci xhci_hcd ehci_hcd
sd_mod ahci usbcore ptp libahci usb_common pps_core aacraid
CPU: 2 PID: 5187 Comm: umount Tainted: G O 4.4.22+63-ph #1
Hardware name: Supermicro Super Server/X10SRi-F, BIOS 2.0 12/17/2015
 880fda777d00 813b69c3 
c067a099 880fda777d38 810821c6 
880074bf0a00 88103c10c088 88103c10c000 88103c10c098
Call Trace:
[] dump_stack+0x63/0x90
[] warn_slowpath_common+0x86/0xc0
[] warn_slowpath_null+0x1a/0x20
[] btrfs_free_block_groups+0x346/0x430 [btrfs]
[] close_ctree+0x15d/0x330 [btrfs]
[] btrfs_put_super+0x19/0x20 [btrfs]
[] generic_shutdown_super+0x6f/0x100
[] kill_anon_super+0x12/0x20
[] btrfs_kill_super+0x16/0xa0 [btrfs]
[] deactivate_locked_super+0x43/0x70
[] deactivate_super+0x5c/0x60
[] cleanup_mnt+0x3f/0x90
[] __cleanup_mnt+0x12/0x20
[] task_work_run+0x81/0xa0
[] exit_to_usermode_loop+0xb0/0xc0
[] syscall_return_slowpath+0xd4/0x130
[] int_ret_from_sys_call+0x25/0x8f
---[ end trace cee6ace13018e13e ]---
[ cut here ]
WARNING: CPU: 2 PID: 5187 at fs/btrfs/extent-tree.c:5791
btrfs_free_block_groups+0x365/0x430 [btrfs]()
Modules linked in: netconsole xt_multiport iptable_filter ip_tables
x_tables 8021q garp bonding x86_pkg_temp_thermal coretemp kvm_intel kvm
irqbypass sb_edac crc32_pclmul edac_core i2c_i801 i40e(O) vxlan
ip6_udp_tunnel udp_tunnel shpchp ipmi_si ipmi_msghandler button loop
btrfs dm_mod raid10 raid0 multipath linear raid456 async_raid6_recov
async_memcpy async_pq async_xor async_tx xor raid6_pq igb i2c_algo_bit
i2c_core usbhid raid1 md_mod xhci_pci sg ehci_pci xhci_hcd ehci_hcd
sd_mod ahci usbcore ptp libahci usb_common pps_core aacraid
CPU: 2 PID: 5187 Comm: umount Tainted: G W O 4.4.22+63-ph #1
Hardware name: Supermicro Super Server/X10SRi-F, BIOS 2.0 12/17/2015
 880fda777d00 813b69c3 
c067a099 880fda777d38 810821c6 
880074bf0a00 88103c10c088 88103c10c000 88103c10c098
Call Trace:
[] dump_stack+0x63/0x90
[] warn_slowpath_common+0x86/0xc0
[] warn_slowpath_null+0x1a/0x20
[] btrfs_free_block_groups+0x365/0x430 [btrfs]
[] close_ctree+0x15d/0x330 [btrfs]
[] btrfs_put_super+0x19/0x20 [btrfs]
[] generic_shutdown_super+0x6f/0x100
[] kill_anon_super+0x12/0x20
[] btrfs_kill_super+0x16/0xa0 [btrfs]
[] deactivate_locked_super+0x43/0x70
[] deactivate_super+0x5c/0x60
[] cleanup_mnt+0x3f/0x90
[] __cleanup_mnt+0x12/0x20
[] task_work_run+0x81/0xa0
[] exit_to_usermode_loop+0xb0/0xc0
[] syscall_return_slowpath+0xd4/0x130
[] int_ret_from_sys_call+0x25/0x8f
---[ end trace cee6ace13018e13f ]---
[ cut here ]
WARNING: CPU: 2 PID: 5187 at fs/btrfs/extent-tree.c:10151
btrfs_free_block_groups+0x291/0x430 [btrfs]()
Modules linked in: netconsole xt_multiport iptable_filter ip_tables
x_tables 8021q garp bonding x86_pkg_temp_thermal coretemp kvm_intel kvm
irqbypass sb_edac crc32_pclmul edac_core i2c_i801 i40e(O) vxlan
ip6_udp_tunnel udp_tunnel shpchp ipmi_si ipmi_msghandler button loop
btrfs dm_mod raid10 raid0 multipath linear raid456 async_raid6_recov
async_memcpy async_pq async_xor async_tx xor raid6_pq igb i2c_algo_bit
i2c_core usbhid raid1 md_mod xhci_pci sg ehci_pci xhci_hcd ehci_hcd
sd_mod ahci usbcore ptp libahci usb_common pps_core aacraid
CPU: 2 PID: 5187 Comm: umount Tainted: G W O 4.4.22+63-ph #1
Hardware name: Supermicro Super Server/X10SRi-F, BIOS 2.0 12/17/2015
 880fda777d00 813b69c3 

Re: BTRFS: space_info 4 has 18446742286429913088 free, is not full

2016-09-29 Thread Wang Xiaoguang

Hi,

On 09/29/2016 03:09 PM, Stefan Priebe - Profihost AG wrote:

Am 29.09.2016 um 08:55 schrieb Wang Xiaoguang:

Hi,

On 09/29/2016 02:49 PM, Stefan Priebe - Profihost AG wrote:

Hi,

Am 28.09.2016 um 14:10 schrieb Wang Xiaoguang:

OK, I see.
But given that you often run into enospc errors, can you work out a
reproduce
script according to you work load. That will give us great help.

You got ENOSPC errors only when you have compress enabled?

I found that compress sometime report ENOSPC error even in 4.8-rc8,
currently

I cannot confirm that as i do not have anough space to test this without
compression ;-( But yes i've compression enabled.

I might not get you, my poor english :)
You mean that you only get ENOSPC error when compression is enabled?

And when compression is not enabled, you do not get ENOSPC error?



I'm trying to fix it.

That sounds good but do you also get the
BTRFS: space_info 4 has 18446742286429913088 free, is not full

kernel messages on umount? if not you might have found another problem.

Yes, I seem similar messages, you can paste you whole dmesg info here.

Regards,
Xiaoguang Wang



Stefan


Regards,
Xiaoguang Wang


I tried hard to reproduce it but i can't get it to reproduce with a test
script. Any ideas?

Stefan


Reagrds,
Xiaoguang Wang


Greets,
Stefan


Regards,
Xiaoguang Wang

Greets,
Stefan
--
To unsubscribe from this list: send the line "unsubscribe
linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html













--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: BTRFS: space_info 4 has 18446742286429913088 free, is not full

2016-09-29 Thread Stefan Priebe - Profihost AG

Am 29.09.2016 um 08:55 schrieb Wang Xiaoguang:
> Hi,
> 
> On 09/29/2016 02:49 PM, Stefan Priebe - Profihost AG wrote:
>> Hi,
>>
>> Am 28.09.2016 um 14:10 schrieb Wang Xiaoguang:
>>> OK, I see.
>>> But given that you often run into enospc errors, can you work out a
>>> reproduce
>>> script according to you work load. That will give us great help.
> You got ENOSPC errors only when you have compress enabled?
> 
> I found that compress sometime report ENOSPC error even in 4.8-rc8,
> currently

I cannot confirm that as i do not have anough space to test this without
compression ;-( But yes i've compression enabled.

> I'm trying to fix it.

That sounds good but do you also get the
BTRFS: space_info 4 has 18446742286429913088 free, is not full

kernel messages on umount? if not you might have found another problem.

Stefan

> 
> Regards,
> Xiaoguang Wang
> 
>> I tried hard to reproduce it but i can't get it to reproduce with a test
>> script. Any ideas?
>>
>> Stefan
>>
>>> Reagrds,
>>> Xiaoguang Wang
>>>
 Greets,
 Stefan

> Regards,
> Xiaoguang Wang
>> Greets,
>> Stefan
>> -- 
>> To unsubscribe from this list: send the line "unsubscribe
>> linux-btrfs" in
>> the body of a message to majord...@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
>>
>
>>>
>>>
>>
> 
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: BTRFS: space_info 4 has 18446742286429913088 free, is not full

2016-09-29 Thread Wang Xiaoguang

Hi,

On 09/29/2016 02:49 PM, Stefan Priebe - Profihost AG wrote:

Hi,

Am 28.09.2016 um 14:10 schrieb Wang Xiaoguang:

OK, I see.
But given that you often run into enospc errors, can you work out a
reproduce
script according to you work load. That will give us great help.

You got ENOSPC errors only when you have compress enabled?

I found that compress sometime report ENOSPC error even in 4.8-rc8, 
currently

I'm trying to fix it.

Regards,
Xiaoguang Wang


I tried hard to reproduce it but i can't get it to reproduce with a test
script. Any ideas?

Stefan


Reagrds,
Xiaoguang Wang


Greets,
Stefan


Regards,
Xiaoguang Wang

Greets,
Stefan
--
To unsubscribe from this list: send the line "unsubscribe
linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html













--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: BTRFS: space_info 4 has 18446742286429913088 free, is not full

2016-09-29 Thread Stefan Priebe - Profihost AG
Hi,

Am 28.09.2016 um 14:10 schrieb Wang Xiaoguang:
> OK, I see.
> But given that you often run into enospc errors, can you work out a
> reproduce
> script according to you work load. That will give us great help.

I tried hard to reproduce it but i can't get it to reproduce with a test
script. Any ideas?

Stefan

> 
> Reagrds,
> Xiaoguang Wang
> 
>>
>> Greets,
>> Stefan
>>
>>> Regards,
>>> Xiaoguang Wang
 Greets,
 Stefan
 -- 
 To unsubscribe from this list: send the line "unsubscribe
 linux-btrfs" in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html


>>>
>>>
>>
> 
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html