[PATCH] Btrfs: merge pending IO for tree log write back

2013-05-28 Thread Miao Xie
Before applying this patch, we flushed the log tree of the fs/file
tree firstly, and then flushed the log root tree. It is ineffective,
especially on the hard disk. This patch improved this problem by wrapping
the above two flushes by the same blk_plug.

By test, the performance of the sync write went up ~60%(2.9MB/s - 4.6MB/s)
on my scsi disk whose disk buffer was enabled.

Test step:
 # mkfs.btrfs -f -m single disk
 # mount disk mnt
 # dd if=/dev/zero of=mnt/file0 bs=32K count=1024 oflag=sync

Signed-off-by: Miao Xie mi...@cn.fujitsu.com
---
 fs/btrfs/transaction.c |  6 +++---
 fs/btrfs/tree-log.c| 17 ++---
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 0544587..ce51603 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -736,9 +736,7 @@ int btrfs_write_marked_extents(struct btrfs_root *root,
struct extent_state *cached_state = NULL;
u64 start = 0;
u64 end;
-   struct blk_plug plug;
 
-   blk_start_plug(plug);
while (!find_first_extent_bit(dirty_pages, start, start, end,
  mark, cached_state)) {
convert_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT,
@@ -752,7 +750,6 @@ int btrfs_write_marked_extents(struct btrfs_root *root,
}
if (err)
werr = err;
-   blk_finish_plug(plug);
return werr;
 }
 
@@ -797,8 +794,11 @@ int btrfs_write_and_wait_marked_extents(struct btrfs_root 
*root,
 {
int ret;
int ret2;
+   struct blk_plug plug;
 
+   blk_start_plug(plug);
ret = btrfs_write_marked_extents(root, dirty_pages, mark);
+   blk_finish_plug(plug);
ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark);
 
if (ret)
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index c276ac9..209d789 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -18,6 +18,7 @@
 
 #include linux/sched.h
 #include linux/slab.h
+#include linux/blkdev.h
 #include linux/list_sort.h
 #include ctree.h
 #include transaction.h
@@ -2358,6 +2359,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
struct btrfs_root *log = root-log_root;
struct btrfs_root *log_root_tree = root-fs_info-log_root_tree;
unsigned long log_transid = 0;
+   struct blk_plug plug;
 
mutex_lock(root-log_mutex);
log_transid = root-log_transid;
@@ -2401,8 +2403,10 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
/* we start IO on  all the marked extents here, but we don't actually
 * wait for them until later.
 */
+   blk_start_plug(plug);
ret = btrfs_write_marked_extents(log, log-dirty_log_pages, mark);
if (ret) {
+   blk_finish_plug(plug);
btrfs_abort_transaction(trans, root, ret);
btrfs_free_logged_extents(log, log_transid);
mutex_unlock(root-log_mutex);
@@ -2437,6 +2441,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
}
 
if (ret) {
+   blk_finish_plug(plug);
if (ret != -ENOSPC) {
btrfs_abort_transaction(trans, root, ret);
mutex_unlock(log_root_tree-log_mutex);
@@ -2452,6 +2457,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 
index2 = log_root_tree-log_transid % 2;
if (atomic_read(log_root_tree-log_commit[index2])) {
+   blk_finish_plug(plug);
btrfs_wait_marked_extents(log, log-dirty_log_pages, mark);
wait_log_commit(trans, log_root_tree,
log_root_tree-log_transid);
@@ -2474,6 +2480,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 * check the full commit flag again
 */
if (root-fs_info-last_trans_log_full_commit == trans-transid) {
+   blk_finish_plug(plug);
btrfs_wait_marked_extents(log, log-dirty_log_pages, mark);
btrfs_free_logged_extents(log, log_transid);
mutex_unlock(log_root_tree-log_mutex);
@@ -2481,9 +2488,10 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
goto out_wake_log_root;
}
 
-   ret = btrfs_write_and_wait_marked_extents(log_root_tree,
-   log_root_tree-dirty_log_pages,
-   EXTENT_DIRTY | EXTENT_NEW);
+   ret = btrfs_write_marked_extents(log_root_tree,
+log_root_tree-dirty_log_pages,
+EXTENT_DIRTY | EXTENT_NEW);
+   blk_finish_plug(plug);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
btrfs_free_logged_extents(log, log_transid);
@@ -2491,6 +2499,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
goto out_wake_log_root;
}

[no subject]

2013-05-28 Thread Alex Lyakas
Hello all,
I have the following unresponsive btrfs:

btrfs_end_transaction() is called and is stuck in btrfs_tree_lock():

May 27 16:13:55 vc kernel: [ 7130.421159] kworker/u:85D
 0 19859  2 0x
May 27 16:13:55 vc kernel: [ 7130.421159]  880095335568
0046 00010093cb38 880083b11b48
May 27 16:13:55 vc kernel: [ 7130.421159]  880095335fd8
880095335fd8 880095335fd8 00013f40
May 27 16:13:55 vc kernel: [ 7130.421159]  8800a1fddd00
88008b1fc5c0 880095335578 880090f736d8
May 27 16:13:55 vc kernel: [ 7130.421159] Call Trace:
May 27 16:13:55 vc kernel: [ 7130.421159]  [816eb399]
schedule+0x29/0x70
May 27 16:13:55 vc kernel: [ 7130.421159]  [a03665ad]
btrfs_tree_lock+0xcd/0x250 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  [8107fcc0] ?
add_wait_queue+0x60/0x60
May 27 16:13:55 vc kernel: [ 7130.421159]  [a031d558]
btrfs_init_new_buffer+0x68/0x140 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  [a031d70d]
btrfs_alloc_free_block+0xdd/0x460 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  [8113ff9b] ?
__set_page_dirty_nobuffers+0x1b/0x20
May 27 16:13:55 vc kernel: [ 7130.421159]  [a0327b2e] ?
btree_set_page_dirty+0xe/0x10 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  [a0307756]
__btrfs_cow_block+0x126/0x4f0 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  [a0307cc3]
btrfs_cow_block+0x123/0x1d0 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  [a030c281]
btrfs_search_slot+0x381/0x820 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  [a03138ce]
lookup_inline_extent_backref+0x8e/0x5b0 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  [a032b6e9] ?
btrfs_mark_buffer_dirty+0x99/0xf0 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  [a031301e] ?
setup_inline_extent_backref+0x18e/0x290 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  [a0313e53]
insert_inline_extent_backref+0x63/0x130 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  [a030677a] ?
btrfs_alloc_path+0x1a/0x20 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  [a031486f]
__btrfs_inc_extent_ref+0x9f/0x240 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  [a0377aa9] ?
btrfs_merge_delayed_refs+0x289/0x300 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  [a031b3a1]
run_clustered_refs+0x971/0xd00 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  [a030714d] ?
btrfs_put_tree_mod_seq+0x10d/0x150 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  [a031f7f0]
btrfs_run_delayed_refs+0xd0/0x320 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  [a0330bf7]
__btrfs_end_transaction+0xf7/0x410 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  [a0330f60]
btrfs_end_transaction+0x10/0x20 [btrfs]

As a result, transaction cannot commit, it waits for all writers to
detach in the do-while loop.

May 27 16:13:55 vc kernel: [ 7130.419009] btrfs-transacti D
 0 15150  2 0x
May 27 16:13:55 vc kernel: [ 7130.419012]  88009f86bce8
0046 032d032d 
May 27 16:13:55 vc kernel: [ 7130.419016]  88009f86bfd8
88009f86bfd8 88009f86bfd8 00013f40
May 27 16:13:55 vc kernel: [ 7130.419020]  8800af1e9740
8800a03f8000 0090 88009693cb00
May 27 16:13:55 vc kernel: [ 7130.419023] Call Trace:
May 27 16:13:55 vc kernel: [ 7130.419027]  [816eb399]
schedule+0x29/0x70
May 27 16:13:55 vc kernel: [ 7130.419031]  [816e9b1d]
schedule_timeout+0x1ed/0x250
May 27 16:13:55 vc kernel: [ 7130.419055]  [a03497a3] ?
btrfs_run_ordered_operations+0x2b3/0x2e0 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.419060]  [81045cd9] ?
default_spin_lock_flags+0x9/0x10
May 27 16:13:55 vc kernel: [ 7130.419081]  [a0330388]
btrfs_commit_transaction+0x3b8/0xae0 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.419085]  [8107fcc0] ?
add_wait_queue+0x60/0x60
May 27 16:13:55 vc kernel: [ 7130.419104]  [a0328525]
transaction_kthread+0x1b5/0x230 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.419124]  [a0328370] ?
btree_invalidatepage+0x80/0x80 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.419128]  [8107f0d0]
kthread+0xc0/0xd0
May 27 16:13:55 vc kernel: [ 7130.419132]  [8107f010] ?
flush_kthread_worker+0xb0/0xb0
May 27 16:13:55 vc kernel: [ 7130.419136]  [816f506c]
ret_from_fork+0x7c/0xb0
May 27 16:13:55 vc kernel: [ 7130.419140]  [8107f010] ?
flush_kthread_worker+0xb0/0xb0

There is additional thread stuck in btrfs_tree_lock(), not sure how it
is related, perhaps there's some deadlock between the two?

May 27 16:13:55 vc kernel: [ 7130.421159] flush-btrfs-2   D
0001 0 18816  2 0x
May 27 16:13:55 vc kernel: [ 7130.421159]  88008b553948
0046 880017991050 
May 27 16:13:55 vc kernel: [ 7130.421159]  

Re: nocow 'C' flag ignored after balance

2013-05-28 Thread Kyle Gates

From: Liu Bo bo.li@oracle.com

Subject: [PATCH] Btrfs: fix broken nocow after a normal balance

Balance will create reloc_root for each fs root, and it's going to
record last_snapshot to filter shared blocks.  The side effect of
setting last_snapshot is to break nocow attributes of files.

So here we update file extent's generation while walking relocated
file extents in data reloc root, and use file extent's generation
instead for checking if we have cross refs for the file extent.

That way we can make nocow happy again and have no impact on others.

Reported-by: Kyle Gates kylega...@hotmail.com
Signed-off-by: Liu Bo bo.li@oracle.com
---
fs/btrfs/ctree.h   |2 +-
fs/btrfs/extent-tree.c |   18 +-
fs/btrfs/inode.c   |   10 --
fs/btrfs/relocation.c  |1 +
4 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4560052..eb2e782 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3090,7 +3090,7 @@ int btrfs_pin_extent_for_log_replay(struct 
btrfs_root *root,

 u64 bytenr, u64 num_bytes);
int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
   struct btrfs_root *root,
-   u64 objectid, u64 offset, u64 bytenr);
+   u64 objectid, u64 offset, u64 bytenr, u64 gen);
struct btrfs_block_group_cache *btrfs_lookup_block_group(
 struct btrfs_fs_info *info,
 u64 bytenr);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1e84c74..f3b3616 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2816,7 +2816,8 @@ out:
static noinline int check_committed_ref(struct btrfs_trans_handle *trans,
 struct btrfs_root *root,
 struct btrfs_path *path,
- u64 objectid, u64 offset, u64 bytenr)
+ u64 objectid, u64 offset, u64 bytenr,
+ u64 fi_gen)
{
 struct btrfs_root *extent_root = root-fs_info-extent_root;
 struct extent_buffer *leaf;
@@ -2861,8 +2862,15 @@ static noinline int check_committed_ref(struct 
btrfs_trans_handle

*trans,
 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
 goto out;

- if (btrfs_extent_generation(leaf, ei) =
- btrfs_root_last_snapshot(root-root_item))
+ /*
+ * Usually generation in extent item is larger than that in file extent
+ * item because of delay refs.  But we don't want balance to break
+ * file's nocow behaviour, so use file_extent's generation which has
+ * been updates when we update fs root to point to relocated file
+ * extents in data reloc root.
+ */
+ fi_gen = max_t(u64, btrfs_extent_generation(leaf, ei), fi_gen);
+ if (fi_gen = btrfs_root_last_snapshot(root-root_item))
 goto out;

 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
@@ -2886,7 +2894,7 @@ out:

int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
   struct btrfs_root *root,
-   u64 objectid, u64 offset, u64 bytenr)
+   u64 objectid, u64 offset, u64 bytenr, u64 gen)
{
 struct btrfs_path *path;
 int ret;
@@ -2898,7 +2906,7 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle 
*trans,


 do {
 ret = check_committed_ref(trans, root, path, objectid,
-   offset, bytenr);
+   offset, bytenr, gen);
 if (ret  ret != -ENOENT)
 goto out;

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2cfdd33..976b045 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1727,6 +1727,8 @@ next_slot:
 ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
 if (extent_type == BTRFS_FILE_EXTENT_REG ||
 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
+ u64 gen;
+ gen = btrfs_file_extent_generation(leaf, fi);
 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
 extent_offset = btrfs_file_extent_offset(leaf, fi);
 extent_end = found_key.offset +
@@ -1749,7 +1751,8 @@ next_slot:
 goto out_check;
 if (btrfs_cross_ref_exist(trans, root, ino,
   found_key.offset -
-   extent_offset, disk_bytenr))
+   extent_offset, disk_bytenr,
+   gen))
 goto out_check;
 disk_bytenr += extent_offset;
 disk_bytenr += cur_offset - found_key.offset;
@@ -7002,6 +7005,7 @@ static noinline int can_nocow_odirect(struct 
btrfs_trans_handle

*trans,
 struct btrfs_key key;
 u64 disk_bytenr;
 u64 backref_offset;
+ u64 fi_gen;
 u64 extent_end;
 u64 num_bytes;
 int slot;
@@ -7048,6 +7052,7 @@ static noinline int can_nocow_odirect(struct 
btrfs_trans_handle

*trans,
 }
 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
 backref_offset = btrfs_file_extent_offset(leaf, fi);
+ fi_gen = btrfs_file_extent_generation(leaf, fi);

 *orig_start = key.offset - backref_offset;
 *orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
@@ -7067,7 +7072,8 @@ static noinline int can_nocow_odirect(struct 
btrfs_trans_handle

*trans,
 * find any we must cow
 */
 if (btrfs_cross_ref_exist(trans, root, btrfs_ino(inode),
-   key.offset - backref_offset, disk_bytenr))
+   key.offset - backref_offset, disk_bytenr,
+   fi_gen))
 goto out;

 /*
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 704a1b8..07faabf 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1637,6 +1637,7 @@ int 

[PATCH 0/3] Btrfs: qgroup rescan fixes for next rc

2013-05-28 Thread Jan Schmidt
Here are three fixes for the new qgroup rescan feature. The first two
are quite small, the third one is a little bigger. I thought about
splitting that one up, but in the end I didn't find a good point to
break that up. It achieves more than one goal, I agree, but its more or
less a compact code change that need not be split artifically in my
opinion.

Jan Schmidt (3):
  Btrfs: fix memory patcher through fs_info-qgroup_ulist
  Btrfs: avoid double free of fs_info-qgroup_ulist
  Btrfs: fix qgroup rescan resume on mount

 fs/btrfs/ctree.h   |2 +
 fs/btrfs/disk-io.c |2 +
 fs/btrfs/qgroup.c  |  198 +---
 3 files changed, 131 insertions(+), 71 deletions(-)

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/3] Btrfs: fix memory patcher through fs_info-qgroup_ulist

2013-05-28 Thread Jan Schmidt
Commit 5b7c665e introduced fs_info-qgroup_ulist, that is allocated during
btrfs_read_qgroup_config and meant to be used later by the qgroup accounting
code. However, it is always freed before btrfs_read_qgroup_config returns,
becuase the commit mentioned above adds a check for (ret), where a check
for (ret  0) would have been the right choice. This commit fixes the check.

Cc: Wang Shilong wangsl-f...@cn.fujitsu.com
Signed-off-by: Jan Schmidt list.bt...@jan-o-sch.net
---
 fs/btrfs/qgroup.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index d059d86..74b432d 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -430,7 +430,7 @@ out:
}
btrfs_free_path(path);
 
-   if (ret)
+   if (ret  0)
ulist_free(fs_info-qgroup_ulist);
 
return ret  0 ? ret : 0;
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/3] Btrfs: fix qgroup rescan resume on mount

2013-05-28 Thread Jan Schmidt
When called during mount, we cannot start the rescan worker thread until
open_ctree is done. This commit restuctures the qgroup rescan internals to
enable a clean deferral of the rescan resume operation.

First of all, the struct qgroup_rescan is removed, saving us a malloc and
some initialization synchronizations problems. Its only element (the worker
struct) now lives within fs_info just as the rest of the rescan code.

Then setting up a rescan worker is split into several reusable stages.
Currently we have three different rescan startup scenarios:
(A) rescan ioctl
(B) rescan resume by mount
(C) rescan by quota enable

Each case needs its own combination of the four following steps:
(1) set the progress [A, C: zero; B: state of umount]
(2) commit the transaction [A]
(3) set the counters [A, C: zero; B: state of umount]
(4) start worker [A, B, C]

qgroup_rescan_init does step (1). There's no extra function added to commit
a transaction, we've got that already. qgroup_rescan_zero_tracking does
step (3). Step (4) is nothing more than a call to the generic
btrfs_queue_worker.

We also get rid of a double check for the rescan progress during
btrfs_qgroup_account_ref, which is no longer required due to having step 2
from the list above.

As a side effect, this commit prepares to move the rescan start code from
btrfs_run_qgroups (which is run during commit) to a less time critical
section.

Signed-off-by: Jan Schmidt list.bt...@jan-o-sch.net
---
 fs/btrfs/ctree.h   |2 +
 fs/btrfs/disk-io.c |2 +
 fs/btrfs/qgroup.c  |  190 +---
 3 files changed, 125 insertions(+), 69 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index fd62aa8..8ac8d52 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1610,6 +1610,7 @@ struct btrfs_fs_info {
struct btrfs_key qgroup_rescan_progress;
struct btrfs_workers qgroup_rescan_workers;
struct completion qgroup_rescan_completion;
+   struct btrfs_work qgroup_rescan_work;
 
/* filesystem state */
unsigned long fs_state;
@@ -3856,6 +3857,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
 int btrfs_quota_disable(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
 int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
+void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info);
 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
  struct btrfs_fs_info *fs_info, u64 src, u64 dst);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index d7b46c6..da4a10c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2879,6 +2879,8 @@ retry_root_backup:
return ret;
}
 
+   btrfs_qgroup_rescan_resume(fs_info);
+
return 0;
 
 fail_qgroup:
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index c6ce642..1280eff 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -98,13 +98,10 @@ struct btrfs_qgroup_list {
struct btrfs_qgroup *member;
 };
 
-struct qgroup_rescan {
-   struct btrfs_work   work;
-   struct btrfs_fs_info*fs_info;
-};
-
-static void qgroup_rescan_start(struct btrfs_fs_info *fs_info,
-   struct qgroup_rescan *qscan);
+static int
+qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
+  int init_flags);
+static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info);
 
 /* must be called with qgroup_ioctl_lock held */
 static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
@@ -255,6 +252,7 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
int slot;
int ret = 0;
u64 flags = 0;
+   u64 rescan_progress = 0;
 
if (!fs_info-quota_enabled)
return 0;
@@ -312,20 +310,7 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
}
fs_info-qgroup_flags = btrfs_qgroup_status_flags(l,
  ptr);
-   fs_info-qgroup_rescan_progress.objectid =
-   btrfs_qgroup_status_rescan(l, ptr);
-   if (fs_info-qgroup_flags 
-   BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
-   struct qgroup_rescan *qscan =
-   kmalloc(sizeof(*qscan), GFP_NOFS);
-   if (!qscan) {
-   ret = -ENOMEM;
-   goto out;
-   }
-   fs_info-qgroup_rescan_progress.type = 0;
-   fs_info-qgroup_rescan_progress.offset = 0;
- 

[PATCH 2/3] Btrfs: avoid double free of fs_info-qgroup_ulist

2013-05-28 Thread Jan Schmidt
When btrfs_read_qgroup_config or btrfs_quota_enable return non-zero, we've
already freed the fs_info-qgroup_ulist. The final btrfs_free_qgroup_config
called from quota_disable makes another ulist_free(fs_info-qgroup_ulist)
call.

We set fs_info-qgroup_ulist to NULL on the mentioned error paths, turning
the ulist_free in btrfs_free_qgroup_config into a noop.

Cc: Wang Shilong wangsl-f...@cn.fujitsu.com
Signed-off-by: Jan Schmidt list.bt...@jan-o-sch.net
---
 fs/btrfs/qgroup.c |8 ++--
 1 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 74b432d..c6ce642 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -430,8 +430,10 @@ out:
}
btrfs_free_path(path);
 
-   if (ret  0)
+   if (ret  0) {
ulist_free(fs_info-qgroup_ulist);
+   fs_info-qgroup_ulist = NULL;
+   }
 
return ret  0 ? ret : 0;
 }
@@ -932,8 +934,10 @@ out_free_root:
kfree(quota_root);
}
 out:
-   if (ret)
+   if (ret) {
ulist_free(fs_info-qgroup_ulist);
+   fs_info-qgroup_ulist = NULL;
+   }
mutex_unlock(fs_info-qgroup_ioctl_lock);
return ret;
 }
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/3] Btrfs: qgroup rescan fixes for next rc

2013-05-28 Thread Jan Schmidt
Hi Wang,

Please have a look at these patches, you should have been CCed but I just
realized git send-email doesn't care about Cc lines in the patch file. Sigh.

Thanks,
-Jan
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs: set the free space control unit properly

2013-05-28 Thread Josef Bacik
Stefan pointed out that xfstests generic/013 was failing because the free space
cache checker was complaining with leafsize of 16k.  Turns out this is because
we were unconditionally using root-sectorsize as the free space ctl unit in the
kernel, which doesn't work out if leafsize != sectorsize.  This caused the in
memory free space cache to get screwed up which translated to a wrong space
cache on disk.  This patch fixes the problem by not carrying the sectorsize in
the block group since we have the ctl-unit, and we set the ctl-unit according
to the type of block group we are.  This made generic/013 pass with 16k
leafsize, whereas before it failed every single time.  Thanks,

Cc: sta...@vger.kernel.org
Reported-by: Stefan Behrens sbehr...@giantdisaster.de
Signed-off-by: Josef Bacik jba...@fusionio.com
---
 fs/btrfs/ctree.h|1 -
 fs/btrfs/extent-tree.c  |7 ++-
 fs/btrfs/free-space-cache.c |   32 +---
 fs/btrfs/free-space-cache.h |3 ++-
 4 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index fd62aa8..3442976 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1206,7 +1206,6 @@ struct btrfs_block_group_cache {
u64 reserved;
u64 bytes_super;
u64 flags;
-   u64 sectorsize;
u64 cache_generation;
 
/* for raid56, this is a full stripe, without parity */
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4ec8305..f7af6a0 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -8128,11 +8128,10 @@ int btrfs_read_block_groups(struct btrfs_root *root)
key.objectid = found_key.objectid + found_key.offset;
btrfs_release_path(path);
cache-flags = btrfs_block_group_flags(cache-item);
-   cache-sectorsize = root-sectorsize;
cache-full_stripe_len = btrfs_full_stripe_len(root,
   root-fs_info-mapping_tree,
   found_key.objectid);
-   btrfs_init_free_space_ctl(cache);
+   btrfs_init_free_space_ctl(cache, root);
 
/*
 * We need to exclude the super stripes now so that the space
@@ -8283,7 +8282,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle 
*trans,
cache-key.objectid = chunk_offset;
cache-key.offset = size;
cache-key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
-   cache-sectorsize = root-sectorsize;
cache-fs_info = root-fs_info;
cache-full_stripe_len = btrfs_full_stripe_len(root,
   root-fs_info-mapping_tree,
@@ -8295,12 +8293,11 @@ int btrfs_make_block_group(struct btrfs_trans_handle 
*trans,
INIT_LIST_HEAD(cache-cluster_list);
INIT_LIST_HEAD(cache-new_bg_list);
 
-   btrfs_init_free_space_ctl(cache);
-
btrfs_set_block_group_used(cache-item, bytes_used);
btrfs_set_block_group_chunk_objectid(cache-item, chunk_objectid);
cache-flags = type;
btrfs_set_block_group_flags(cache-item, type);
+   btrfs_init_free_space_ctl(cache, root);
 
cache-last_byte_to_unpin = (u64)-1;
cache-cached = BTRFS_CACHE_FINISHED;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 7517285..ec43e422 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1654,7 +1654,7 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
 * of cache left then go ahead an dadd them, no sense in adding
 * the overhead of a bitmap if we don't have to.
 */
-   if (info-bytes = block_group-sectorsize * 4) {
+   if (info-bytes = ctl-unit * 4) {
if (ctl-free_extents * 2 = ctl-extents_thresh)
return false;
} else {
@@ -2001,12 +2001,19 @@ void btrfs_dump_free_space(struct 
btrfs_block_group_cache *block_group,
   \n, count);
 }
 
-void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
+void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group,
+  struct btrfs_root *root)
 {
struct btrfs_free_space_ctl *ctl = block_group-free_space_ctl;
 
spin_lock_init(ctl-tree_lock);
-   ctl-unit = block_group-sectorsize;
+
+   /* This works for mixed block groups too since sectorsize == leafsize */
+   if (block_group-flags  (BTRFS_BLOCK_GROUP_METADATA |
+ BTRFS_BLOCK_GROUP_SYSTEM))
+   ctl-unit = root-leafsize;
+   else
+   ctl-unit = root-sectorsize;
ctl-start = block_group-key.objectid;
ctl-private = block_group;
ctl-op = free_space_op;
@@ -2548,10 +2555,10 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle 
*trans,

Re: [RFC PATCH v1 0/5] BTRFS hot relocation support

2013-05-28 Thread Kent Overstreet
On Tue, May 21, 2013 at 02:22:34AM +, Duncan wrote:
 zwu.kernel posted on Mon, 20 May 2013 23:11:22 +0800 as excerpted:
 
  The patchset is trying to introduce hot relocation support
  for BTRFS. In hybrid storage environment, when the data in rotating disk
  get hot, it can be relocated to nonrotating disk by BTRFS hot relocation
  support automatically; also, if nonrotating disk ratio exceed its upper
  threshold, the data which get cold can be looked up and relocated to
  rotating disk to make more space in nonrotating disk at first, and then
  the data which get hot will be relocated to nonrotating disk
  automatically.
 
 One advantage of a filesystem implementation, as opposed to bcache or 
 dmcache, is arguably a corner-case, but it's /my/ corner-case, so...
 
 I run an intr*-less (I guess technically, empty initramfs) monolithic-
 kernel boot, using the kernel commandline root= and (formerly) md= and 
 related logic to choose/assemble/mount root directly from the kernel 
 command line via bootloader (grub2).  Thus, any user-space-required-to-
 mount-root is out, since I don't have an initr* and thus no early 
 userspace.  That means both lvm2 and dmcache (AFAIK) are out.  I'm not 
 sure about bcache, but it has other negatives, particularly against btrfs-
 raid-1 and I'd guess md/raid-1 as well.
 
 Much like md before it, btrfs, while normally requiring the user-space-
 required device-scan to properly handle multiple devices, has kernel-
 command-line options that allow direct kernel multi-device assembly 
 without the help of early-userspace/initr*.

I wouldn't be averse to adding such functionality to bcache, provided it
could be done reasonably cleanly/sensibly. It's not high on my list but
I'd accept patches :)
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: nocow 'C' flag ignored after balance

2013-05-28 Thread Liu Bo
On Tue, May 28, 2013 at 09:22:11AM -0500, Kyle Gates wrote:
 From: Liu Bo bo.li@oracle.com
 
 Subject: [PATCH] Btrfs: fix broken nocow after a normal balance
 
[...]
 
 Sorry for the long wait in replying.
 This patch was unsuccessful in fixing the problem (on my 3.8 Ubuntu
 Raring kernel). I can probably try again on a newer version if you
 think it will help.
 This was my first kernel compile so I patched by hand and waited (10
 hours on my old 32 bit single core machine).
 
 I did move some of the files off and back on to the filesystem to
 start fresh and compare but all seem to exhibit the same behavior
 after a balance.


Thanks for testing the patch although it didn't help you.
Actually I tested it to be sure that it fixed the problems in my reproducer.

So anyway can you please apply this debug patch in order to nail it down?

thanks,
liubo

 
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index df472ab..c12a11c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2857,8 +2857,12 @@ static noinline int check_committed_ref(struct 
btrfs_trans_handle *trans,
goto out;
 
if (btrfs_extent_generation(leaf, ei) =
-   btrfs_root_last_snapshot(root-root_item))
+   btrfs_root_last_snapshot(root-root_item)) {
+   printk(extent gen %llu last_snap %llu\n,
+   btrfs_extent_generation(leaf, ei),
+   btrfs_root_last_snapshot(root-root_item));
goto out;
+   }
 
iref = (struct btrfs_extent_inline_ref *)(ei + 1);
if (btrfs_extent_inline_ref_type(leaf, iref) !=
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 23c596c..8cad6ee 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1317,16 +1317,24 @@ next_slot:
goto out_check;
if (btrfs_file_extent_compression(leaf, fi) ||
btrfs_file_extent_encryption(leaf, fi) ||
-   btrfs_file_extent_other_encoding(leaf, fi))
+   btrfs_file_extent_other_encoding(leaf, fi)) {
+   printk(special encoding\n);
goto out_check;
-   if (extent_type == BTRFS_FILE_EXTENT_REG  !force)
+   }
+   if (extent_type == BTRFS_FILE_EXTENT_REG  !force) {
+   printk(BTRFS_FILE_EXTENT_REF\n);
goto out_check;
-   if (btrfs_extent_readonly(root, disk_bytenr))
+   }
+   if (btrfs_extent_readonly(root, disk_bytenr)) {
+   printk(ro\n);
goto out_check;
+   }
if (btrfs_cross_ref_exist(trans, root, ino,
  found_key.offset -
- extent_offset, disk_bytenr))
+ extent_offset, disk_bytenr)) {
+   printk(cross ref\n);
goto out_check;
+   }
disk_bytenr += extent_offset;
disk_bytenr += cur_offset - found_key.offset;
num_bytes = min(end + 1, extent_end) - cur_offset;

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: set the free space control unit properly

2013-05-28 Thread Miao Xie
On  tue, 28 May 2013 14:50:25 -0400, Josef Bacik wrote:
 Stefan pointed out that xfstests generic/013 was failing because the free 
 space
 cache checker was complaining with leafsize of 16k.  Turns out this is because
 we were unconditionally using root-sectorsize as the free space ctl unit in 
 the
 kernel, which doesn't work out if leafsize != sectorsize.  This caused the in
 memory free space cache to get screwed up which translated to a wrong space
 cache on disk.  This patch fixes the problem by not carrying the sectorsize in
 the block group since we have the ctl-unit, and we set the ctl-unit 
 according
 to the type of block group we are.  This made generic/013 pass with 16k
 leafsize, whereas before it failed every single time.  Thanks,

But this patch will make the old filesystem be corrupted because one bit in it
equals one sector(4K), not 16K.

Thanks
Miao

 
 Cc: sta...@vger.kernel.org
 Reported-by: Stefan Behrens sbehr...@giantdisaster.de
 Signed-off-by: Josef Bacik jba...@fusionio.com
 ---
  fs/btrfs/ctree.h|1 -
  fs/btrfs/extent-tree.c  |7 ++-
  fs/btrfs/free-space-cache.c |   32 +---
  fs/btrfs/free-space-cache.h |3 ++-
  4 files changed, 29 insertions(+), 14 deletions(-)
 
 diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
 index fd62aa8..3442976 100644
 --- a/fs/btrfs/ctree.h
 +++ b/fs/btrfs/ctree.h
 @@ -1206,7 +1206,6 @@ struct btrfs_block_group_cache {
   u64 reserved;
   u64 bytes_super;
   u64 flags;
 - u64 sectorsize;
   u64 cache_generation;
  
   /* for raid56, this is a full stripe, without parity */
 diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
 index 4ec8305..f7af6a0 100644
 --- a/fs/btrfs/extent-tree.c
 +++ b/fs/btrfs/extent-tree.c
 @@ -8128,11 +8128,10 @@ int btrfs_read_block_groups(struct btrfs_root *root)
   key.objectid = found_key.objectid + found_key.offset;
   btrfs_release_path(path);
   cache-flags = btrfs_block_group_flags(cache-item);
 - cache-sectorsize = root-sectorsize;
   cache-full_stripe_len = btrfs_full_stripe_len(root,
  root-fs_info-mapping_tree,
  found_key.objectid);
 - btrfs_init_free_space_ctl(cache);
 + btrfs_init_free_space_ctl(cache, root);
  
   /*
* We need to exclude the super stripes now so that the space
 @@ -8283,7 +8282,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle 
 *trans,
   cache-key.objectid = chunk_offset;
   cache-key.offset = size;
   cache-key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
 - cache-sectorsize = root-sectorsize;
   cache-fs_info = root-fs_info;
   cache-full_stripe_len = btrfs_full_stripe_len(root,
  root-fs_info-mapping_tree,
 @@ -8295,12 +8293,11 @@ int btrfs_make_block_group(struct btrfs_trans_handle 
 *trans,
   INIT_LIST_HEAD(cache-cluster_list);
   INIT_LIST_HEAD(cache-new_bg_list);
  
 - btrfs_init_free_space_ctl(cache);
 -
   btrfs_set_block_group_used(cache-item, bytes_used);
   btrfs_set_block_group_chunk_objectid(cache-item, chunk_objectid);
   cache-flags = type;
   btrfs_set_block_group_flags(cache-item, type);
 + btrfs_init_free_space_ctl(cache, root);
  
   cache-last_byte_to_unpin = (u64)-1;
   cache-cached = BTRFS_CACHE_FINISHED;
 diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
 index 7517285..ec43e422 100644
 --- a/fs/btrfs/free-space-cache.c
 +++ b/fs/btrfs/free-space-cache.c
 @@ -1654,7 +1654,7 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
* of cache left then go ahead an dadd them, no sense in adding
* the overhead of a bitmap if we don't have to.
*/
 - if (info-bytes = block_group-sectorsize * 4) {
 + if (info-bytes = ctl-unit * 4) {
   if (ctl-free_extents * 2 = ctl-extents_thresh)
   return false;
   } else {
 @@ -2001,12 +2001,19 @@ void btrfs_dump_free_space(struct 
 btrfs_block_group_cache *block_group,
  \n, count);
  }
  
 -void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
 +void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group,
 +struct btrfs_root *root)
  {
   struct btrfs_free_space_ctl *ctl = block_group-free_space_ctl;
  
   spin_lock_init(ctl-tree_lock);
 - ctl-unit = block_group-sectorsize;
 +
 + /* This works for mixed block groups too since sectorsize == leafsize */
 + if (block_group-flags  (BTRFS_BLOCK_GROUP_METADATA |
 +   BTRFS_BLOCK_GROUP_SYSTEM))
 + ctl-unit = root-leafsize;
 + else
 + ctl-unit = root-sectorsize;
   ctl-start =