Hi Miao,
Chris' stress test, stress.sh -n 50 -c /mnt/linux-2.6 /mnt gave me another
lockdep splat
(see below). I applied your V5 patches on top of the next-rc branch.
I haven't triggered it in my actual testing, but do you think we can iterate a
list of block
groups in an lockless manner using rcu?
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2164296..f40ff4e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -740,6 +740,7 @@ struct btrfs_space_info {
struct list_head block_groups[BTRFS_NR_RAID_TYPES];
spinlock_t lock;
struct rw_semaphore groups_sem;
+ struct srcu_struct groups_srcu;
atomic_t caching_threads;
};
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9e4c9f4..22d6dbb 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3003,6 +3003,7 @@ static int update_space_info(struct btrfs_fs_info *info,
u64 flags,
for (i = 0; i BTRFS_NR_RAID_TYPES; i++)
INIT_LIST_HEAD(found-block_groups[i]);
init_rwsem(found-groups_sem);
+ init_srcu_struct(found-groups_srcu);
spin_lock_init(found-lock);
found-flags = flags (BTRFS_BLOCK_GROUP_DATA |
BTRFS_BLOCK_GROUP_SYSTEM |
@@ -4853,6 +4854,7 @@ static noinline int find_free_extent(struct
btrfs_trans_handle *trans,
int data)
{
int ret = 0;
+ int idx;
struct btrfs_root *root = orig_root-fs_info-extent_root;
struct btrfs_free_cluster *last_ptr = NULL;
struct btrfs_block_group_cache *block_group = NULL;
@@ -4929,7 +4931,7 @@ ideal_cache:
if (block_group block_group_bits(block_group, data)
(block_group-cached != BTRFS_CACHE_NO ||
search_start == ideal_cache_offset)) {
- down_read(space_info-groups_sem);
+ idx = srcu_read_lock(space_info-groups_srcu);
if (list_empty(block_group-list) ||
block_group-ro) {
/*
@@ -4939,7 +4941,7 @@ ideal_cache:
* valid
*/
btrfs_put_block_group(block_group);
- up_read(space_info-groups_sem);
+ srcu_read_unlock(space_info-groups_srcu, idx);
} else {
index = get_block_group_index(block_group);
goto have_block_group;
@@ -4949,8 +4951,8 @@ ideal_cache:
}
}
search:
- down_read(space_info-groups_sem);
- list_for_each_entry(block_group, space_info-block_groups[index],
+ idx = srcu_read_lock(space_info-groups_srcu);
+ list_for_each_entry_rcu(block_group, space_info-block_groups[index],
list) {
u64 offset;
int cached;
@@ -5197,8 +5199,8 @@ loop:
BUG_ON(index != get_block_group_index(block_group));
btrfs_put_block_group(block_group);
}
- up_read(space_info-groups_sem);
-
+ srcu_read_unlock(space_info-groups_srcu, idx);
+
if (!ins-objectid ++index BTRFS_NR_RAID_TYPES)
goto search;
=
[ INFO: possible irq lock inversion dependency detected ]
2.6.36-v5+ #2
-
kswapd0/49 just changed the state of lock:
(delayed_node-mutex){+.+.-.}, at: [812131f7]
btrfs_remove_delayed_node+0x3e/0xd2
but this lock took another, RECLAIM_FS-READ-unsafe lock in the past:
(found-groups_sem){.+}
and interrupts could create inverse lock ordering between them.
other info that might help us debug this:
2 locks held by kswapd0/49:
#0: (shrinker_rwsem){..}, at: [810e242a] shrink_slab+0x3d/0x164
#1: (iprune_sem){.-}, at: [811316d0]
shrink_icache_memory+0x4d/0x213
the shortest dependencies between 2nd lock and 1st lock:
- (found-groups_sem){.+} ops: 1334 {
HARDIRQ-ON-W at:
[81075ec0] __lock_acquire+0x346/0xda6
[81076a3d] lock_acquire+0x11d/0x143
[814c6a2a] down_write+0x55/0x9b
[811c352a] __link_block_group+0x5a/0x83
[811ca562]
btrfs_read_block_groups+0x2fb/0x56c
[811d4921] open_ctree+0xf78/0x14ab
[811bafdf] btrfs_get_sb+0x236/0x467
[8111f25e] vfs_kern_mount+0xbd/0x1a7
[8111f3b0] do_kern_mount+0x4d/0xed
[8113668d] do_mount+0x74e/0x7c5
[8113678c] sys_mount+0x88/0xc2