Re: [PATCH] Btrfs: throttle delayed refs better

2014-02-27 Thread 钱凯
I'm a little confused of what avg_delayed_ref_runtime means.

In __btrfs_run_delayed_refs(), avg_delayed_ref_runtime is set to the
runtime of all delayed refs processed in current transaction commit.
However, in btrfs_should_throttle_delayed_refs(), we based on the
following condition to decide whether throttle refs or not:
*
   avg_runtime = fs_info-avg_delayed_ref_runtime;
   if (num_entries * avg_runtime = NSEC_PER_SEC)
   return 1;
*
It looks like avg_delayed_ref_runtime is used as runtime of each
delayed ref processed in average here. So what does it really means?

Thanks,
Kai

2014-01-24 2:07 GMT+08:00 Josef Bacik jba...@fb.com:
 On one of our gluster clusters we noticed some pretty big lag spikes.  This
 turned out to be because our transaction commit was taking like 3 minutes to
 complete.  This is because we have like 30 gigs of metadata, so our global
 reserve would end up being the max which is like 512 mb.  So our throttling 
 code
 would allow a ridiculous amount of delayed refs to build up and then they'd 
 all
 get run at transaction commit time, and for a cold mounted file system that
 could take up to 3 minutes to run.  So fix the throttling to be based on both
 the size of the global reserve and how long it takes us to run delayed refs.
 This patch tracks the time it takes to run delayed refs and then only allows 1
 seconds worth of outstanding delayed refs at a time.  This way it will 
 auto-tune
 itself from cold cache up to when everything is in memory and it no longer has
 to go to disk.  This makes our transaction commits take much less time to run.
 Thanks,

 Signed-off-by: Josef Bacik jba...@fb.com
 ---
  fs/btrfs/ctree.h   |  3 +++
  fs/btrfs/disk-io.c |  2 +-
  fs/btrfs/extent-tree.c | 41 -
  fs/btrfs/transaction.c |  4 ++--
  4 files changed, 46 insertions(+), 4 deletions(-)

 diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
 index 3cebb4a..ca6bcc3 100644
 --- a/fs/btrfs/ctree.h
 +++ b/fs/btrfs/ctree.h
 @@ -1360,6 +1360,7 @@ struct btrfs_fs_info {

 u64 generation;
 u64 last_trans_committed;
 +   u64 avg_delayed_ref_runtime;

 /*
  * this is updated to the current trans every time a full commit
 @@ -3172,6 +3173,8 @@ static inline u64 btrfs_calc_trunc_metadata_size(struct 
 btrfs_root *root,

  int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
 +int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
 +  struct btrfs_root *root);
  void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
  int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_root *root, unsigned long count);
 diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
 index ed23127..f0e7bbe 100644
 --- a/fs/btrfs/disk-io.c
 +++ b/fs/btrfs/disk-io.c
 @@ -2185,7 +2185,7 @@ int open_ctree(struct super_block *sb,
 fs_info-free_chunk_space = 0;
 fs_info-tree_mod_log = RB_ROOT;
 fs_info-commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
 -
 +   fs_info-avg_delayed_ref_runtime = div64_u64(NSEC_PER_SEC, 64);
 /* readahead state */
 INIT_RADIX_TREE(fs_info-reada_tree, GFP_NOFS  ~__GFP_WAIT);
 spin_lock_init(fs_info-reada_lock);
 diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
 index c77156c..b532259 100644
 --- a/fs/btrfs/extent-tree.c
 +++ b/fs/btrfs/extent-tree.c
 @@ -2322,8 +2322,10 @@ static noinline int __btrfs_run_delayed_refs(struct 
 btrfs_trans_handle *trans,
 struct btrfs_delayed_ref_head *locked_ref = NULL;
 struct btrfs_delayed_extent_op *extent_op;
 struct btrfs_fs_info *fs_info = root-fs_info;
 +   ktime_t start = ktime_get();
 int ret;
 unsigned long count = 0;
 +   unsigned long actual_count = 0;
 int must_insert_reserved = 0;

 delayed_refs = trans-transaction-delayed_refs;
 @@ -2452,6 +2454,7 @@ static noinline int __btrfs_run_delayed_refs(struct 
 btrfs_trans_handle *trans,
  delayed_refs-href_root);
 spin_unlock(delayed_refs-lock);
 } else {
 +   actual_count++;
 ref-in_tree = 0;
 rb_erase(ref-rb_node, locked_ref-ref_root);
 }
 @@ -2502,6 +2505,26 @@ static noinline int __btrfs_run_delayed_refs(struct 
 btrfs_trans_handle *trans,
 count++;
 cond_resched();
 }
 +
 +   /*
 +* We don't want to include ref heads since we can have empty ref 
 heads
 +* and those will drastically skew our runtime down since we just do
 +* accounting, no actual extent tree updates.
 +*/
 +   if 

Re: [PATCH] Btrfs: throttle delayed refs better

2014-02-27 Thread Josef Bacik
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA1

On 02/27/2014 10:38 AM, 钱凯 wrote:
 I'm a little confused of what avg_delayed_ref_runtime means.
 
 In __btrfs_run_delayed_refs(), avg_delayed_ref_runtime is set to
 the runtime of all delayed refs processed in current transaction
 commit. However, in btrfs_should_throttle_delayed_refs(), we based
 on the following condition to decide whether throttle refs or not: 
 * avg_runtime =
 fs_info-avg_delayed_ref_runtime; if (num_entries * avg_runtime =
 NSEC_PER_SEC) return 1; * 
 It looks like avg_delayed_ref_runtime is used as runtime of each 
 delayed ref processed in average here. So what does it really
 means?
 

Yeah I screwed this up, I should have been dividing the total time by
the number of delayed refs I ran.  I have a patch locally to fix it
and I'll send it out after I finish my qgroup work.  Thanks,

Josef

-BEGIN PGP SIGNATURE-
Version: GnuPG v1
Comment: Using GnuPG with Thunderbird - http://www.enigmail.net/

iQIcBAEBAgAGBQJTD2AlAAoJEANb+wAKly3BQkEP/0F/LGGDsO+x63SAFh/apRZo
ZVmzi1yJGiArFImFs8IwZHKgr/HpP9yYYFqyDCTSYrErI32bjpPbSDKlFDiIKYBq
6mTptPlC6AJQcMJf3oV2SqUoQxI6Ea+04QaTtZwE5pDaTZsjD47QYfSyw/i+YwOr
Ds11ayDeU3FSj8JVYDKFg5ZBifv/mIHbh1fb8xc4R5XCWsbRzIL9LiQa9c56EEOq
vzXp57TIetbJdliK0cYQtPkA7R40us8TqVBH5MfcZPgITyBun3e0zrGxWmW6caTs
viejEbqDhyHLHCing+mMI6GX7w16duq5oG+w4nnjjyuMzWAyNN2pxloqQsWwOyv8
7+33JZCtVG/txRMIXkvc3bqzetrUyPAruo+M3pstN7B2dph6TDV0QJSFnxee6mKf
4/zseNOJtQqjHe5QJNcVJtkDaxgGBkSONHLm5Gz8rFU3XKcNZQcocV+0EtIjE7Zs
D5oDYCAyrxG1VKoFWhdaS883PDokRr75jcnFui4GhhFr5OAOdS3OOTLKVizWUag1
O11d9XsjnzLWiVTsZH+f4K0ONQcUwJFV0zADgYsXtU2LDHHNIPZX9+qSAa+L66hT
Ki6hocoZ4cXyGWcTZPtlGHxAmV2kEh8/Tr1ePfwy7FzTrg9hWUGLXY0DliQDPmIB
w3TdOa+Ghjl8dcaGc2rX
=kSsY
-END PGP SIGNATURE-
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: throttle delayed refs better

2014-02-15 Thread Johannes Hirte
On Fri, 14 Feb 2014 14:29:35 -0500
Josef Bacik jba...@fb.com wrote:

 -BEGIN PGP SIGNED MESSAGE-
 Hash: SHA1
 
 
 
 On 02/14/2014 02:25 PM, Johannes Hirte wrote:
  On Thu, 6 Feb 2014 16:19:46 -0500 Josef Bacik jba...@fb.com
  wrote:
  
  Ok so I thought I reproduced the problem but I just reproduced a 
  different problem.  Please undo any changes you've made and
  apply this patch and reproduce and then provide me with any debug
  output that gets spit out.  I'm sending this via thunderbird with
  6 different extensions to make sure it comes out right so if it
  doesn't work let me know and I'll just paste it somewhere.
  Thanks,
  
  Sorry for the long delay. Was to busy last week.
  
 
 Ok perfect this is fixed by
 
 [PATCH] Btrfs: don't loop forever if we can't run because of the tree
 mod log
 
 and it went into -rc2 iirc, so give that a whirl and make sure it
 fixes your problem.  Thanks,

Yes, seems to be fixed now. I wasn't able to reproduce it anymore.

regards,
  Johannes
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: throttle delayed refs better

2014-02-14 Thread Johannes Hirte
On Thu, 6 Feb 2014 16:19:46 -0500
Josef Bacik jba...@fb.com wrote:

 Ok so I thought I reproduced the problem but I just reproduced a
 different problem.  Please undo any changes you've made and apply
 this patch and reproduce and then provide me with any debug output
 that gets spit out.  I'm sending this via thunderbird with 6
 different extensions to make sure it comes out right so if it doesn't
 work let me know and I'll just paste it somewhere.  Thanks,

Sorry for the long delay. Was to busy last week.

Here is the output:

[   25.240971] looped a lot, count 14, nr 32, no_selected_ref 99986
[   25.267639] looped a lot, count 14, nr 32, no_selected_ref 199987
[   25.294308] looped a lot, count 14, nr 32, no_selected_ref 299988
[   25.320605] looped a lot, count 14, nr 32, no_selected_ref 399989
[   25.346639] looped a lot, count 14, nr 32, no_selected_ref 40
[   25.372517] looped a lot, count 14, nr 32, no_selected_ref 51
[   25.398924] looped a lot, count 14, nr 32, no_selected_ref 62
[   25.425443] looped a lot, count 14, nr 32, no_selected_ref 73
[   25.451344] looped a lot, count 14, nr 32, no_selected_ref 84
[   25.477350] looped a lot, count 14, nr 32, no_selected_ref 95
[   25.503069] looped a lot, count 14, nr 32, no_selected_ref 106
[   25.529372] looped a lot, count 14, nr 32, no_selected_ref 117
[   25.49] looped a lot, count 14, nr 32, no_selected_ref 128
[   25.581418] looped a lot, count 14, nr 32, no_selected_ref 139
[   25.607514] looped a lot, count 14, nr 32, no_selected_ref 150
[   25.633794] looped a lot, count 14, nr 32, no_selected_ref 161
[   25.659699] looped a lot, count 14, nr 32, no_selected_ref 172
[   25.686095] looped a lot, count 14, nr 32, no_selected_ref 183
[   25.711906] looped a lot, count 14, nr 32, no_selected_ref 194
[   25.752255] looped a lot, count 14, nr 32, no_selected_ref 205
[   25.788077] looped a lot, count 0, nr 32, no_selected_ref 10
[   25.811966] looped a lot, count 14, nr 32, no_selected_ref 216
[  360.749227] looped a lot, count 8, nr 32, no_selected_ref 2
[  360.770434] looped a lot, count 8, nr 32, no_selected_ref 13
[  360.792136] looped a lot, count 8, nr 32, no_selected_ref 24
[  360.813571] looped a lot, count 8, nr 32, no_selected_ref 35
[  360.834932] looped a lot, count 8, nr 32, no_selected_ref 46
[  360.856085] looped a lot, count 8, nr 32, no_selected_ref 57
[  360.877374] looped a lot, count 8, nr 32, no_selected_ref 68
[  360.899455] looped a lot, count 8, nr 32, no_selected_ref 79
[  360.921175] looped a lot, count 8, nr 32, no_selected_ref 90
[  360.942409] looped a lot, count 8, nr 32, no_selected_ref 101
[  360.963800] looped a lot, count 8, nr 32, no_selected_ref 112
[  360.985397] looped a lot, count 8, nr 32, no_selected_ref 123
[  361.007148] looped a lot, count 8, nr 32, no_selected_ref 134
[  361.028789] looped a lot, count 8, nr 32, no_selected_ref 145
[  361.050564] looped a lot, count 8, nr 32, no_selected_ref 156
[  361.072008] looped a lot, count 8, nr 32, no_selected_ref 167
[  361.093269] looped a lot, count 8, nr 32, no_selected_ref 178
[  361.114645] looped a lot, count 8, nr 32, no_selected_ref 189
[  361.136099] looped a lot, count 8, nr 32, no_selected_ref 1900010
[  361.157566] looped a lot, count 8, nr 32, no_selected_ref 211
[  361.178969] looped a lot, count 8, nr 32, no_selected_ref 2100012
[  361.200397] looped a lot, count 8, nr 32, no_selected_ref 2200013
[  361.221980] looped a lot, count 8, nr 32, no_selected_ref 2300014
[  361.243435] looped a lot, count 8, nr 32, no_selected_ref 2400015
[  361.264777] looped a lot, count 8, nr 32, no_selected_ref 2500016
[  361.286518] looped a lot, count 8, nr 32, no_selected_ref 2600017
[  361.308240] looped a lot, count 8, nr 32, no_selected_ref 2700018
[  361.329850] looped a lot, count 8, nr 32, no_selected_ref 2800019
[  361.351420] looped a lot, count 8, nr 32, no_selected_ref 2900020
[  361.372633] looped a lot, count 8, nr 32, no_selected_ref 321
[  361.394330] looped a lot, count 8, nr 32, no_selected_ref 3100022
[  361.416039] looped a lot, count 8, nr 32, no_selected_ref 3200023
[  361.437659] looped a lot, count 8, nr 32, no_selected_ref 3300024
[  361.459181] looped a lot, count 8, nr 32, no_selected_ref 3400025
[  361.481058] looped a lot, count 8, nr 32, no_selected_ref 3500026
[  361.502441] looped a lot, count 8, nr 32, no_selected_ref 3600027
[  361.523964] looped a lot, count 8, nr 32, no_selected_ref 3700028
[  361.545387] looped a lot, count 8, nr 32, no_selected_ref 3800029
[  361.566717] looped a lot, count 8, nr 32, no_selected_ref 3900030
[  361.588079] looped a lot, count 8, nr 32, no_selected_ref 431
[  361.609673] looped a lot, count 8, nr 32, no_selected_ref 4100032
[  361.631028] looped a lot, count 8, nr 32, no_selected_ref 4200033
[  361.652498] looped a lot, count 8, nr 32, 

Re: [PATCH] Btrfs: throttle delayed refs better

2014-02-14 Thread Josef Bacik
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA1



On 02/14/2014 02:25 PM, Johannes Hirte wrote:
 On Thu, 6 Feb 2014 16:19:46 -0500 Josef Bacik jba...@fb.com
 wrote:
 
 Ok so I thought I reproduced the problem but I just reproduced a 
 different problem.  Please undo any changes you've made and
 apply this patch and reproduce and then provide me with any debug
 output that gets spit out.  I'm sending this via thunderbird with
 6 different extensions to make sure it comes out right so if it
 doesn't work let me know and I'll just paste it somewhere.
 Thanks,
 
 Sorry for the long delay. Was to busy last week.
 

Ok perfect this is fixed by

[PATCH] Btrfs: don't loop forever if we can't run because of the tree
mod log

and it went into -rc2 iirc, so give that a whirl and make sure it
fixes your problem.  Thanks,

Josef
-BEGIN PGP SIGNATURE-
Version: GnuPG v1
Comment: Using GnuPG with Thunderbird - http://www.enigmail.net/

iQIcBAEBAgAGBQJS/m6fAAoJEANb+wAKly3B73IP/052xDlBavgP5GTMhwnn2+yA
fY862NUlwQbb+5MlMi1DseG0lBp1/j0M8XkMq/F0btZSrAJcem+mZPSfeHHbYoxG
4kO5pjXQY3ha1Wj8Lc30HqF2hGGIIfr9zOyNq1d7t/w2wXXi84VkwRJkBlZWHROy
RjoK2eKv94MJtMnL4FRxew4Pkvg2y+kqnZeaL6DL84fno6wPIqf09RXwy6i5AZMD
AuOpbs5HFkQC2tb/C1ZvWZibDSXeI/nvQPDFMaFPtD4vRLT1KdpxceNErNtMGDTK
D6YmD+XYdFkg9kNPvgeRQOPyhcdEPWvUI5mWC6lRmQu/CK+7Qf5HPoHbHr+vZB1m
IwvO34bzUVDLAHkr9kCP4+QAz+GDm7LuhvFcc2uhaZqlLYZzTszG/HqXCNBx86+f
Y8RjJvSmU+j23bQlvso1FsHUP5d0ihUaEtU+FvG0mCtFMb3gOOqTusEEH0k2x0rD
SR12DCyR9nV/lSPXEtso+8Mtrkjarw76ZV7IJnZoAxOlHsK3vvuO1xNdJGxG45aV
k+hLuoXjuQtULydkkGPgQzfzd7s9Ol2NuvhezFjCF/0nC44UWtS4LcA1W41Xcy2M
3FeuKdWsBucvHwGAc/GSAS8U6oKvCAIUeFTD3Ui2OcXBDiMQYI9jPzGoBmyCnUVQ
gBiCLWxGejAMN8z2qfCZ
=7BA7
-END PGP SIGNATURE-
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: throttle delayed refs better

2014-02-06 Thread Josef Bacik


On 02/05/2014 05:57 PM, Johannes Hirte wrote:

On Wed, 5 Feb 2014 16:46:57 -0500
Josef Bacik jba...@fb.com wrote:


On 02/05/2014 04:42 PM, Johannes Hirte wrote:

On Wed, 5 Feb 2014 14:36:39 -0500
Josef Bacik jba...@fb.com wrote:


On 02/05/2014 02:30 PM, Johannes Hirte wrote:

On Wed, 5 Feb 2014 14:00:57 -0500
Josef Bacik jba...@fb.com wrote:


On 02/05/2014 12:34 PM, Johannes Hirte wrote:

On Wed, 5 Feb 2014 10:49:15 -0500
Josef Bacik jba...@fb.com wrote:


Ok none of those make sense which makes me think it may be the
ktime bits, instead of un-applying the whole patch could you
just comment out the parts

 ktime_t start = ktime_get();

and

 if (actual_count  0) {
 u64 runtime =
ktime_to_ns(ktime_sub(ktime_get(), start)); u64 avg;

 /*
  * We weigh the current average higher than
our current runtime
  * to avoid large swings in the average.
  */
 spin_lock(delayed_refs-lock);
 avg = fs_info-avg_delayed_ref_runtime * 3
+ runtime; avg = div64_u64(avg, 4);
 fs_info-avg_delayed_ref_runtime = avg;
 spin_unlock(delayed_refs-lock);
 }

in __btrfs_run_delayed_refs and see if that makes the problem
stop? If it does will you try chris's for-linus branch to see
if it still reproduces there?  Maybe some patch changed
ktime_get() in -rc1 that is causing issues and we're just now
exposing it. Thanks,

With the ktime bits disabled, I wasn't able to reproduce the
problem anymore. With Chris' for-linus branch it took longer but
still appeared.


Ok can you send your .config, maybe there's some weird time bug
being exposed.  What kind of CPU do you have?  Thanks,

Josef

It's a Core i5-540M, dualcore + hyperthreading

Ok while I'm doing this can you change
btrfs_should_throttle_delayed_refs to _always_ return 1, still with
all the ktime stuff commented out, and see if that causes the
problem to happen?  Thanks,

Yes it does. Same behavior as without ktime stuff commented out.


Ok perfect, can you send me a btrfs fi df of that volume, and do you
have any snapshots or anything?  Thanks,

btrfs fi df /
Data, single: total=220.01GiB, used=210.85GiB
System, DUP: total=8.00MiB, used=32.00KiB
System, single: total=4.00MiB, used=0.00
Metadata, DUP: total=4.00GiB, used=2.93GiB
Metadata, single: total=8.00MiB, used=0.00

No snapshots but several subvolumes. / itself is a seperate subvolume
and subvol 0 only contains the other subvolumes (5 at moment). qgroups
aren't enabled.

mount options are noatime,inode_cache, if this matters

I've managed to reproduce on one of my test boxes at work, I'll get to 
the bottom of this.  Thanks,


Josef
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: throttle delayed refs better

2014-02-06 Thread Josef Bacik
On 02/05/2014 05:57 PM, Johannes Hirte wrote:
 On Wed, 5 Feb 2014 16:46:57 -0500
 Josef Bacik jba...@fb.com wrote:
 

 On 02/05/2014 04:42 PM, Johannes Hirte wrote:
 On Wed, 5 Feb 2014 14:36:39 -0500
 Josef Bacik jba...@fb.com wrote:

 On 02/05/2014 02:30 PM, Johannes Hirte wrote:
 On Wed, 5 Feb 2014 14:00:57 -0500
 Josef Bacik jba...@fb.com wrote:

 On 02/05/2014 12:34 PM, Johannes Hirte wrote:
 On Wed, 5 Feb 2014 10:49:15 -0500
 Josef Bacik jba...@fb.com wrote:

 Ok none of those make sense which makes me think it may be the
 ktime bits, instead of un-applying the whole patch could you
 just comment out the parts

  ktime_t start = ktime_get();

 and

  if (actual_count  0) {
  u64 runtime =
 ktime_to_ns(ktime_sub(ktime_get(), start)); u64 avg;

  /*
   * We weigh the current average higher than
 our current runtime
   * to avoid large swings in the average.
   */
  spin_lock(delayed_refs-lock);
  avg = fs_info-avg_delayed_ref_runtime * 3
 + runtime; avg = div64_u64(avg, 4);
  fs_info-avg_delayed_ref_runtime = avg;
  spin_unlock(delayed_refs-lock);
  }

 in __btrfs_run_delayed_refs and see if that makes the problem
 stop? If it does will you try chris's for-linus branch to see
 if it still reproduces there?  Maybe some patch changed
 ktime_get() in -rc1 that is causing issues and we're just now
 exposing it. Thanks,
 With the ktime bits disabled, I wasn't able to reproduce the
 problem anymore. With Chris' for-linus branch it took longer but
 still appeared.

 Ok can you send your .config, maybe there's some weird time bug
 being exposed.  What kind of CPU do you have?  Thanks,

 Josef
 It's a Core i5-540M, dualcore + hyperthreading
 Ok while I'm doing this can you change
 btrfs_should_throttle_delayed_refs to _always_ return 1, still with
 all the ktime stuff commented out, and see if that causes the
 problem to happen?  Thanks,
 Yes it does. Same behavior as without ktime stuff commented out.

 Ok perfect, can you send me a btrfs fi df of that volume, and do you
 have any snapshots or anything?  Thanks,
 
 btrfs fi df /
 Data, single: total=220.01GiB, used=210.85GiB
 System, DUP: total=8.00MiB, used=32.00KiB
 System, single: total=4.00MiB, used=0.00
 Metadata, DUP: total=4.00GiB, used=2.93GiB
 Metadata, single: total=8.00MiB, used=0.00
 
 No snapshots but several subvolumes. / itself is a seperate subvolume
 and subvol 0 only contains the other subvolumes (5 at moment). qgroups
 aren't enabled.
 
 mount options are noatime,inode_cache, if this matters
 

Ok so I thought I reproduced the problem but I just reproduced a different
problem.  Please undo any changes you've made and apply this patch and reproduce
and then provide me with any debug output that gets spit out.  I'm sending this
via thunderbird with 6 different extensions to make sure it comes out right so
if it doesn't work let me know and I'll just paste it somewhere.  Thanks,

Josef

diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index f3bff89..b025a04 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -204,8 +204,12 @@ find_ref_head(struct rb_root *root, u64 bytenr,
struct rb_node *n;
struct btrfs_delayed_ref_head *entry;
int cmp = 0;
+   unsigned long loops = 0;
 
 again:
+   loops++;
+   if (loops  2)
+   printk(KERN_ERR we have fucked up\n);
n = root-rb_node;
entry = NULL;
while (n) {
@@ -232,6 +236,7 @@ again:
n = rb_next(entry-href_node);
if (!n)
n = rb_first(root);
+   BUG_ON(!n);
entry = rb_entry(n, struct btrfs_delayed_ref_head,
 href_node);
bytenr = entry-node.bytenr;
@@ -410,10 +415,14 @@ btrfs_select_ref_head(struct btrfs_trans_handle *trans)
struct btrfs_delayed_ref_head *head;
u64 start;
bool loop = false;
+   unsigned long loops = 0;
 
delayed_refs = trans-transaction-delayed_refs;
 
 again:
+   loops++;
+   if (loops  5)
+   printk(KERN_ERR houston we have a problem\n);
start = delayed_refs-run_delayed_start;
head = find_ref_head(delayed_refs-href_root, start, NULL, 1);
if (!head  !loop) {
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9c9ecc9..91dacf4 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2327,9 +2327,16 @@ static noinline int __btrfs_run_delayed_refs(struct 
btrfs_trans_handle *trans,
unsigned long count = 0;
unsigned long actual_count = 0;
int must_insert_reserved = 0;
+   unsigned long loops = 0;
+   unsigned long no_selected_ref = 0;
 
delayed_refs = 

Re: [PATCH] Btrfs: throttle delayed refs better

2014-02-05 Thread Johannes Hirte
On Tue, 4 Feb 2014 09:12:54 -0500
Josef Bacik jba...@fb.com wrote:

 Hrm I was hoping that was going to be more helpful.  Can you get perf 
 record -ag and then perf report while it's at full cpu and get the
 first 3 or 4 things with their traces?

Here it comes:

# 
# captured on: Wed Feb  5 00:11:41 2014
# 
#
no symbols found in /usr/sbin/acpid, maybe install a debug package?
unexpected end of event stream
# Samples: 168K of event 'cycles'   


# Event count (approx.): 126847081763   


#   


# Overhead  Command   Shared Object 
  Symbol

#   ...  ..  
...

#   


18.48%  btrfs-freespace  [kernel.kallsyms]   [k] state_store


|
--- state_store

10.25%  btrfs-freespace  [kernel.kallsyms]   [k] 
sys_sched_rr_get_interval   
   
|
--- sys_sched_rr_get_interval

 9.02%  btrfs-freespace  [kernel.kallsyms]   [k] 
rt_mutex_slowunlock 
   
|
--- rt_mutex_slowunlock

 8.76%  btrfs-freespace  [kernel.kallsyms]   [k] 
btrfs_submit_compressed_write   
   
|
--- btrfs_submit_compressed_write

 6.63%  btrfs-freespace  [kernel.kallsyms]   [k] sched_show_task

|
--- sched_show_task

 5.19%  btrfs-freespace  [kernel.kallsyms]   [k] find_free_extent   

|
--- find_free_extent

 5.15%  btrfs-freespace  [kernel.kallsyms]   [k] 
trace_print_graph_duration  
   
|
--- trace_print_graph_duration

 I'm going to try and
 reproduce today, is there anything special about your fs?
 Compression, large blocksizes, skinny metadata?  Thanks,

Filesystem was created with -l 32768 -n 32768 and skinny metadata enabled.

regards,
  Johannes
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: throttle delayed refs better

2014-02-05 Thread Josef Bacik


On 02/05/2014 03:14 AM, Johannes Hirte wrote:

On Tue, 4 Feb 2014 09:12:54 -0500
Josef Bacik jba...@fb.com wrote:


Hrm I was hoping that was going to be more helpful.  Can you get perf
record -ag and then perf report while it's at full cpu and get the
first 3 or 4 things with their traces?

Here it comes:

# 
# captured on: Wed Feb  5 00:11:41 2014
# 
#
no symbols found in /usr/sbin/acpid, maybe install a debug package?
unexpected end of event stream
# Samples: 168K of event 'cycles'
# Event count (approx.): 126847081763
#
# Overhead  Command   Shared Object 
  Symbol
#   ...  ..  
...
#
 18.48%  btrfs-freespace  [kernel.kallsyms]   [k] state_store
 |
 --- state_store

 10.25%  btrfs-freespace  [kernel.kallsyms]   [k] 
sys_sched_rr_get_interval
 |
 --- sys_sched_rr_get_interval

  9.02%  btrfs-freespace  [kernel.kallsyms]   [k] 
rt_mutex_slowunlock
 |
 --- rt_mutex_slowunlock

  8.76%  btrfs-freespace  [kernel.kallsyms]   [k] 
btrfs_submit_compressed_write
 |
 --- btrfs_submit_compressed_write

  6.63%  btrfs-freespace  [kernel.kallsyms]   [k] sched_show_task
 |
 --- sched_show_task

  5.19%  btrfs-freespace  [kernel.kallsyms]   [k] find_free_extent
 |
 --- find_free_extent

  5.15%  btrfs-freespace  [kernel.kallsyms]   [k] 
trace_print_graph_duration
 |
 --- trace_print_graph_duration


I'm going to try and
reproduce today, is there anything special about your fs?
Compression, large blocksizes, skinny metadata?  Thanks,

Filesystem was created with -l 32768 -n 32768 and skinny metadata enabled.

Ok none of those make sense which makes me think it may be the ktime 
bits, instead of un-applying the whole patch could you just comment out 
the parts


ktime_t start = ktime_get();

and

if (actual_count  0) {
u64 runtime = ktime_to_ns(ktime_sub(ktime_get(), start));
u64 avg;

/*
 * We weigh the current average higher than our current 
runtime

 * to avoid large swings in the average.
 */
spin_lock(delayed_refs-lock);
avg = fs_info-avg_delayed_ref_runtime * 3 + runtime;
avg = div64_u64(avg, 4);
fs_info-avg_delayed_ref_runtime = avg;
spin_unlock(delayed_refs-lock);
}

in __btrfs_run_delayed_refs and see if that makes the problem stop? If 
it does will you try chris's for-linus branch to see if it still 
reproduces there?  Maybe some patch changed ktime_get() in -rc1 that is 
causing issues and we're just now exposing it.  Thanks,


Josef
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: throttle delayed refs better

2014-02-05 Thread Johannes Hirte
On Wed, 5 Feb 2014 10:49:15 -0500
Josef Bacik jba...@fb.com wrote:

 Ok none of those make sense which makes me think it may be the ktime 
 bits, instead of un-applying the whole patch could you just comment
 out the parts
 
  ktime_t start = ktime_get();
 
 and
 
  if (actual_count  0) {
  u64 runtime = ktime_to_ns(ktime_sub(ktime_get(),
 start)); u64 avg;
 
  /*
   * We weigh the current average higher than our
 current runtime
   * to avoid large swings in the average.
   */
  spin_lock(delayed_refs-lock);
  avg = fs_info-avg_delayed_ref_runtime * 3 + runtime;
  avg = div64_u64(avg, 4);
  fs_info-avg_delayed_ref_runtime = avg;
  spin_unlock(delayed_refs-lock);
  }
 
 in __btrfs_run_delayed_refs and see if that makes the problem stop?
 If it does will you try chris's for-linus branch to see if it still 
 reproduces there?  Maybe some patch changed ktime_get() in -rc1 that
 is causing issues and we're just now exposing it.  Thanks,

With the ktime bits disabled, I wasn't able to reproduce the
problem anymore. With Chris' for-linus branch it took longer but still
appeared.

regards,
  Johannes
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: throttle delayed refs better

2014-02-05 Thread Josef Bacik


On 02/05/2014 12:34 PM, Johannes Hirte wrote:

On Wed, 5 Feb 2014 10:49:15 -0500
Josef Bacik jba...@fb.com wrote:


Ok none of those make sense which makes me think it may be the ktime
bits, instead of un-applying the whole patch could you just comment
out the parts

  ktime_t start = ktime_get();

and

  if (actual_count  0) {
  u64 runtime = ktime_to_ns(ktime_sub(ktime_get(),
start)); u64 avg;

  /*
   * We weigh the current average higher than our
current runtime
   * to avoid large swings in the average.
   */
  spin_lock(delayed_refs-lock);
  avg = fs_info-avg_delayed_ref_runtime * 3 + runtime;
  avg = div64_u64(avg, 4);
  fs_info-avg_delayed_ref_runtime = avg;
  spin_unlock(delayed_refs-lock);
  }

in __btrfs_run_delayed_refs and see if that makes the problem stop?
If it does will you try chris's for-linus branch to see if it still
reproduces there?  Maybe some patch changed ktime_get() in -rc1 that
is causing issues and we're just now exposing it.  Thanks,

With the ktime bits disabled, I wasn't able to reproduce the
problem anymore. With Chris' for-linus branch it took longer but still
appeared.

Ok can you send your .config, maybe there's some weird time bug being 
exposed.  What kind of CPU do you have?  Thanks,


Josef
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: throttle delayed refs better

2014-02-05 Thread Josef Bacik


On 02/05/2014 02:30 PM, Johannes Hirte wrote:

On Wed, 5 Feb 2014 14:00:57 -0500
Josef Bacik jba...@fb.com wrote:


On 02/05/2014 12:34 PM, Johannes Hirte wrote:

On Wed, 5 Feb 2014 10:49:15 -0500
Josef Bacik jba...@fb.com wrote:


Ok none of those make sense which makes me think it may be the
ktime bits, instead of un-applying the whole patch could you just
comment out the parts

   ktime_t start = ktime_get();

and

   if (actual_count  0) {
   u64 runtime = ktime_to_ns(ktime_sub(ktime_get(),
start)); u64 avg;

   /*
* We weigh the current average higher than our
current runtime
* to avoid large swings in the average.
*/
   spin_lock(delayed_refs-lock);
   avg = fs_info-avg_delayed_ref_runtime * 3 +
runtime; avg = div64_u64(avg, 4);
   fs_info-avg_delayed_ref_runtime = avg;
   spin_unlock(delayed_refs-lock);
   }

in __btrfs_run_delayed_refs and see if that makes the problem stop?
If it does will you try chris's for-linus branch to see if it still
reproduces there?  Maybe some patch changed ktime_get() in -rc1
that is causing issues and we're just now exposing it.  Thanks,

With the ktime bits disabled, I wasn't able to reproduce the
problem anymore. With Chris' for-linus branch it took longer but
still appeared.


Ok can you send your .config, maybe there's some weird time bug being
exposed.  What kind of CPU do you have?  Thanks,

Josef

It's a Core i5-540M, dualcore + hyperthreading
Ok while I'm doing this can you change 
btrfs_should_throttle_delayed_refs to _always_ return 1, still with all 
the ktime stuff commented out, and see if that causes the problem to 
happen?  Thanks,


Josef
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: throttle delayed refs better

2014-02-05 Thread Johannes Hirte
On Wed, 5 Feb 2014 14:36:39 -0500
Josef Bacik jba...@fb.com wrote:

 
 On 02/05/2014 02:30 PM, Johannes Hirte wrote:
  On Wed, 5 Feb 2014 14:00:57 -0500
  Josef Bacik jba...@fb.com wrote:
 
  On 02/05/2014 12:34 PM, Johannes Hirte wrote:
  On Wed, 5 Feb 2014 10:49:15 -0500
  Josef Bacik jba...@fb.com wrote:
 
  Ok none of those make sense which makes me think it may be the
  ktime bits, instead of un-applying the whole patch could you just
  comment out the parts
 
 ktime_t start = ktime_get();
 
  and
 
 if (actual_count  0) {
 u64 runtime =
  ktime_to_ns(ktime_sub(ktime_get(), start)); u64 avg;
 
 /*
  * We weigh the current average higher than
  our current runtime
  * to avoid large swings in the average.
  */
 spin_lock(delayed_refs-lock);
 avg = fs_info-avg_delayed_ref_runtime * 3 +
  runtime; avg = div64_u64(avg, 4);
 fs_info-avg_delayed_ref_runtime = avg;
 spin_unlock(delayed_refs-lock);
 }
 
  in __btrfs_run_delayed_refs and see if that makes the problem
  stop? If it does will you try chris's for-linus branch to see if
  it still reproduces there?  Maybe some patch changed ktime_get()
  in -rc1 that is causing issues and we're just now exposing it.
  Thanks,
  With the ktime bits disabled, I wasn't able to reproduce the
  problem anymore. With Chris' for-linus branch it took longer but
  still appeared.
 
  Ok can you send your .config, maybe there's some weird time bug
  being exposed.  What kind of CPU do you have?  Thanks,
 
  Josef
  It's a Core i5-540M, dualcore + hyperthreading
 Ok while I'm doing this can you change 
 btrfs_should_throttle_delayed_refs to _always_ return 1, still with
 all the ktime stuff commented out, and see if that causes the problem
 to happen?  Thanks,

Yes it does. Same behavior as without ktime stuff commented out.

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: throttle delayed refs better

2014-02-05 Thread Josef Bacik


On 02/05/2014 04:42 PM, Johannes Hirte wrote:

On Wed, 5 Feb 2014 14:36:39 -0500
Josef Bacik jba...@fb.com wrote:


On 02/05/2014 02:30 PM, Johannes Hirte wrote:

On Wed, 5 Feb 2014 14:00:57 -0500
Josef Bacik jba...@fb.com wrote:


On 02/05/2014 12:34 PM, Johannes Hirte wrote:

On Wed, 5 Feb 2014 10:49:15 -0500
Josef Bacik jba...@fb.com wrote:


Ok none of those make sense which makes me think it may be the
ktime bits, instead of un-applying the whole patch could you just
comment out the parts

ktime_t start = ktime_get();

and

if (actual_count  0) {
u64 runtime =
ktime_to_ns(ktime_sub(ktime_get(), start)); u64 avg;

/*
 * We weigh the current average higher than
our current runtime
 * to avoid large swings in the average.
 */
spin_lock(delayed_refs-lock);
avg = fs_info-avg_delayed_ref_runtime * 3 +
runtime; avg = div64_u64(avg, 4);
fs_info-avg_delayed_ref_runtime = avg;
spin_unlock(delayed_refs-lock);
}

in __btrfs_run_delayed_refs and see if that makes the problem
stop? If it does will you try chris's for-linus branch to see if
it still reproduces there?  Maybe some patch changed ktime_get()
in -rc1 that is causing issues and we're just now exposing it.
Thanks,

With the ktime bits disabled, I wasn't able to reproduce the
problem anymore. With Chris' for-linus branch it took longer but
still appeared.


Ok can you send your .config, maybe there's some weird time bug
being exposed.  What kind of CPU do you have?  Thanks,

Josef

It's a Core i5-540M, dualcore + hyperthreading

Ok while I'm doing this can you change
btrfs_should_throttle_delayed_refs to _always_ return 1, still with
all the ktime stuff commented out, and see if that causes the problem
to happen?  Thanks,

Yes it does. Same behavior as without ktime stuff commented out.

Ok perfect, can you send me a btrfs fi df of that volume, and do you 
have any snapshots or anything?  Thanks,


Josef
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: throttle delayed refs better

2014-02-05 Thread Josef Bacik


On 02/05/2014 04:42 PM, Johannes Hirte wrote:

On Wed, 5 Feb 2014 14:36:39 -0500
Josef Bacik jba...@fb.com wrote:


On 02/05/2014 02:30 PM, Johannes Hirte wrote:

On Wed, 5 Feb 2014 14:00:57 -0500
Josef Bacik jba...@fb.com wrote:


On 02/05/2014 12:34 PM, Johannes Hirte wrote:

On Wed, 5 Feb 2014 10:49:15 -0500
Josef Bacik jba...@fb.com wrote:


Ok none of those make sense which makes me think it may be the
ktime bits, instead of un-applying the whole patch could you just
comment out the parts

ktime_t start = ktime_get();

and

if (actual_count  0) {
u64 runtime =
ktime_to_ns(ktime_sub(ktime_get(), start)); u64 avg;

/*
 * We weigh the current average higher than
our current runtime
 * to avoid large swings in the average.
 */
spin_lock(delayed_refs-lock);
avg = fs_info-avg_delayed_ref_runtime * 3 +
runtime; avg = div64_u64(avg, 4);
fs_info-avg_delayed_ref_runtime = avg;
spin_unlock(delayed_refs-lock);
}

in __btrfs_run_delayed_refs and see if that makes the problem
stop? If it does will you try chris's for-linus branch to see if
it still reproduces there?  Maybe some patch changed ktime_get()
in -rc1 that is causing issues and we're just now exposing it.
Thanks,

With the ktime bits disabled, I wasn't able to reproduce the
problem anymore. With Chris' for-linus branch it took longer but
still appeared.


Ok can you send your .config, maybe there's some weird time bug
being exposed.  What kind of CPU do you have?  Thanks,

Josef

It's a Core i5-540M, dualcore + hyperthreading

Ok while I'm doing this can you change
btrfs_should_throttle_delayed_refs to _always_ return 1, still with
all the ktime stuff commented out, and see if that causes the problem
to happen?  Thanks,

Yes it does. Same behavior as without ktime stuff commented out.


Do you happen to have qgroups enabled?  Thanks,

Josef
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: throttle delayed refs better

2014-02-05 Thread Johannes Hirte
On Wed, 5 Feb 2014 16:46:57 -0500
Josef Bacik jba...@fb.com wrote:

 
 On 02/05/2014 04:42 PM, Johannes Hirte wrote:
  On Wed, 5 Feb 2014 14:36:39 -0500
  Josef Bacik jba...@fb.com wrote:
 
  On 02/05/2014 02:30 PM, Johannes Hirte wrote:
  On Wed, 5 Feb 2014 14:00:57 -0500
  Josef Bacik jba...@fb.com wrote:
 
  On 02/05/2014 12:34 PM, Johannes Hirte wrote:
  On Wed, 5 Feb 2014 10:49:15 -0500
  Josef Bacik jba...@fb.com wrote:
 
  Ok none of those make sense which makes me think it may be the
  ktime bits, instead of un-applying the whole patch could you
  just comment out the parts
 
  ktime_t start = ktime_get();
 
  and
 
  if (actual_count  0) {
  u64 runtime =
  ktime_to_ns(ktime_sub(ktime_get(), start)); u64 avg;
 
  /*
   * We weigh the current average higher than
  our current runtime
   * to avoid large swings in the average.
   */
  spin_lock(delayed_refs-lock);
  avg = fs_info-avg_delayed_ref_runtime * 3
  + runtime; avg = div64_u64(avg, 4);
  fs_info-avg_delayed_ref_runtime = avg;
  spin_unlock(delayed_refs-lock);
  }
 
  in __btrfs_run_delayed_refs and see if that makes the problem
  stop? If it does will you try chris's for-linus branch to see
  if it still reproduces there?  Maybe some patch changed
  ktime_get() in -rc1 that is causing issues and we're just now
  exposing it. Thanks,
  With the ktime bits disabled, I wasn't able to reproduce the
  problem anymore. With Chris' for-linus branch it took longer but
  still appeared.
 
  Ok can you send your .config, maybe there's some weird time bug
  being exposed.  What kind of CPU do you have?  Thanks,
 
  Josef
  It's a Core i5-540M, dualcore + hyperthreading
  Ok while I'm doing this can you change
  btrfs_should_throttle_delayed_refs to _always_ return 1, still with
  all the ktime stuff commented out, and see if that causes the
  problem to happen?  Thanks,
  Yes it does. Same behavior as without ktime stuff commented out.
 
 Ok perfect, can you send me a btrfs fi df of that volume, and do you 
 have any snapshots or anything?  Thanks,

btrfs fi df /
Data, single: total=220.01GiB, used=210.85GiB
System, DUP: total=8.00MiB, used=32.00KiB
System, single: total=4.00MiB, used=0.00
Metadata, DUP: total=4.00GiB, used=2.93GiB
Metadata, single: total=8.00MiB, used=0.00

No snapshots but several subvolumes. / itself is a seperate subvolume
and subvol 0 only contains the other subvolumes (5 at moment). qgroups
aren't enabled.

mount options are noatime,inode_cache, if this matters

regards,
  Johannes
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: throttle delayed refs better

2014-02-04 Thread Josef Bacik


On 02/03/2014 05:53 PM, Johannes Hirte wrote:

On Mon, 3 Feb 2014 16:08:08 -0500
Josef Bacik jba...@fb.com wrote:


On 02/03/2014 01:28 PM, Johannes Hirte wrote:

On Thu, 23 Jan 2014 13:07:52 -0500
Josef Bacik jba...@fb.com wrote:


On one of our gluster clusters we noticed some pretty big lag
spikes.  This turned out to be because our transaction commit was
taking like 3 minutes to complete.  This is because we have like 30
gigs of metadata, so our global reserve would end up being the max
which is like 512 mb.  So our throttling code would allow a
ridiculous amount of delayed refs to build up and then they'd all
get run at transaction commit time, and for a cold mounted file
system that could take up to 3 minutes to run.  So fix the
throttling to be based on both the size of the global reserve and
how long it takes us to run delayed refs. This patch tracks the
time it takes to run delayed refs and then only allows 1 seconds
worth of outstanding delayed refs at a time.  This way it will
auto-tune itself from cold cache up to when everything is in
memory and it no longer has to go to disk.  This makes our
transaction commits take much less time to run. Thanks,

Signed-off-by: Josef Bacik jba...@fb.com

This one breaks my system. Shortly after boot the btrfs-freespace
thread goes up to 100% CPU usage and the system is nearly
unresponsive. I've seen it first with the full pull request for
3.14-rc1 and was able to track it down to this patch.

Could you turn on the softlockup timer and see if you can get a
backtrace of where it is stuck?  In the meantime I will go through
and see if I can pinpoint where it may be happening.  Thanks,

Josef

This is what I've got with

Hrm I was hoping that was going to be more helpful.  Can you get perf 
record -ag and then perf report while it's at full cpu and get the first 
3 or 4 things with their traces?  I'm going to try and reproduce today, 
is there anything special about your fs? Compression, large blocksizes, 
skinny metadata?  Thanks,


Josef
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: throttle delayed refs better

2014-02-03 Thread Johannes Hirte
On Thu, 23 Jan 2014 13:07:52 -0500
Josef Bacik jba...@fb.com wrote:

 On one of our gluster clusters we noticed some pretty big lag
 spikes.  This turned out to be because our transaction commit was
 taking like 3 minutes to complete.  This is because we have like 30
 gigs of metadata, so our global reserve would end up being the max
 which is like 512 mb.  So our throttling code would allow a
 ridiculous amount of delayed refs to build up and then they'd all get
 run at transaction commit time, and for a cold mounted file system
 that could take up to 3 minutes to run.  So fix the throttling to be
 based on both the size of the global reserve and how long it takes us
 to run delayed refs. This patch tracks the time it takes to run
 delayed refs and then only allows 1 seconds worth of outstanding
 delayed refs at a time.  This way it will auto-tune itself from cold
 cache up to when everything is in memory and it no longer has to go
 to disk.  This makes our transaction commits take much less time to
 run. Thanks,
 
 Signed-off-by: Josef Bacik jba...@fb.com

This one breaks my system. Shortly after boot the btrfs-freespace
thread goes up to 100% CPU usage and the system is nearly unresponsive.
I've seen it first with the full pull request for 3.14-rc1 and was able
to track it down to this patch.

regards,
  Johannes
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: throttle delayed refs better

2014-02-03 Thread Josef Bacik


On 02/03/2014 01:28 PM, Johannes Hirte wrote:

On Thu, 23 Jan 2014 13:07:52 -0500
Josef Bacik jba...@fb.com wrote:


On one of our gluster clusters we noticed some pretty big lag
spikes.  This turned out to be because our transaction commit was
taking like 3 minutes to complete.  This is because we have like 30
gigs of metadata, so our global reserve would end up being the max
which is like 512 mb.  So our throttling code would allow a
ridiculous amount of delayed refs to build up and then they'd all get
run at transaction commit time, and for a cold mounted file system
that could take up to 3 minutes to run.  So fix the throttling to be
based on both the size of the global reserve and how long it takes us
to run delayed refs. This patch tracks the time it takes to run
delayed refs and then only allows 1 seconds worth of outstanding
delayed refs at a time.  This way it will auto-tune itself from cold
cache up to when everything is in memory and it no longer has to go
to disk.  This makes our transaction commits take much less time to
run. Thanks,

Signed-off-by: Josef Bacik jba...@fb.com

This one breaks my system. Shortly after boot the btrfs-freespace
thread goes up to 100% CPU usage and the system is nearly unresponsive.
I've seen it first with the full pull request for 3.14-rc1 and was able
to track it down to this patch.
Could you turn on the softlockup timer and see if you can get a 
backtrace of where it is stuck?  In the meantime I will go through and 
see if I can pinpoint where it may be happening.  Thanks,


Josef
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: throttle delayed refs better

2014-02-03 Thread Johannes Hirte
On Mon, 3 Feb 2014 16:08:08 -0500
Josef Bacik jba...@fb.com wrote:

 
 On 02/03/2014 01:28 PM, Johannes Hirte wrote:
  On Thu, 23 Jan 2014 13:07:52 -0500
  Josef Bacik jba...@fb.com wrote:
 
  On one of our gluster clusters we noticed some pretty big lag
  spikes.  This turned out to be because our transaction commit was
  taking like 3 minutes to complete.  This is because we have like 30
  gigs of metadata, so our global reserve would end up being the max
  which is like 512 mb.  So our throttling code would allow a
  ridiculous amount of delayed refs to build up and then they'd all
  get run at transaction commit time, and for a cold mounted file
  system that could take up to 3 minutes to run.  So fix the
  throttling to be based on both the size of the global reserve and
  how long it takes us to run delayed refs. This patch tracks the
  time it takes to run delayed refs and then only allows 1 seconds
  worth of outstanding delayed refs at a time.  This way it will
  auto-tune itself from cold cache up to when everything is in
  memory and it no longer has to go to disk.  This makes our
  transaction commits take much less time to run. Thanks,
 
  Signed-off-by: Josef Bacik jba...@fb.com
  This one breaks my system. Shortly after boot the btrfs-freespace
  thread goes up to 100% CPU usage and the system is nearly
  unresponsive. I've seen it first with the full pull request for
  3.14-rc1 and was able to track it down to this patch.
 Could you turn on the softlockup timer and see if you can get a 
 backtrace of where it is stuck?  In the meantime I will go through
 and see if I can pinpoint where it may be happening.  Thanks,
 
 Josef

This is what I've got with

CONFIG_LOCKUP_DETECTOR=y
CONFIG_HARDLOCKUP_DETECTOR=y
# CONFIG_BOOTPARAM_HARDLOCKUP_PANIC is not set
CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE=0
# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=120
# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
# CONFIG_PANIC_ON_OOPS is not set
CONFIG_PANIC_ON_OOPS_VALUE=0
CONFIG_PANIC_TIMEOUT=0
CONFIG_SCHED_DEBUG=y
CONFIG_SCHEDSTATS=y
CONFIG_TIMER_STATS=y
CONFIG_DEBUG_PREEMPT=y

[  203.610758] perf samples too long (2513  2500), lowering 
kernel.perf_event_max_sample_rate to 5
[  360.625822] INFO: task btrfs-endio-wri:1075 blocked for more than 120 
seconds.
[  360.625826]   Not tainted 3.14.0-rc1 #19
[  360.625828] echo 0  /proc/sys/kernel/hung_task_timeout_secs disables this 
message.
[  360.625829] btrfs-endio-wri D 880137c12d00 0  1075  2 0x
[  360.625833]  8800b6b10950 0002 00012d00 
8800b6b10950
[  360.625837]  8801325b3fd8 8800a2dcc000 8801325719e8 

[  360.625840]   880132571800 8800b635ba00 
81256192
[  360.625844] Call Trace:
[  360.625854]  [81256192] ? wait_current_trans.isra.19+0xbb/0xdf
[  360.625858]  [81089730] ? finish_wait+0x65/0x65
[  360.625860]  [8125740a] ? start_transaction+0x2f1/0x4e3
[  360.625864]  [81260b8a] ? btrfs_finish_ordered_io+0x44c/0x7b2
[  360.625869]  [81062dab] ? try_to_del_timer_sync+0x53/0x5e
[  360.625871]  [81062ddc] ? del_timer_sync+0x26/0x43
[  360.625875]  [815a9a7b] ? schedule_timeout+0xeb/0x104
[  360.625877]  [81061f10] ? rcu_read_unlock_sched_notrace+0x11/0x11
[  360.625882]  [8127bd1c] ? worker_loop+0x162/0x4c3
[  360.625884]  [8127bbba] ? btrfs_queue_worker+0x275/0x275
[  360.625888]  [81072f8b] ? kthread+0xa3/0xab
[  360.625893]  [810da9c1] ? trace_preempt_on+0xd/0x2a
[  360.625895]  [8107] ? freeze_workqueues_begin+0x8/0x11e
[  360.625897]  [81072ee8] ? __kthread_parkme+0x5a/0x5a
[  360.625901]  [815ad8ec] ? ret_from_fork+0x7c/0xb0
[  360.625903]  [81072ee8] ? __kthread_parkme+0x5a/0x5a
[  360.625906] INFO: task btrfs-transacti:1084 blocked for more than 120 
seconds.
[  360.625908]   Not tainted 3.14.0-rc1 #19
[  360.625909] echo 0  /proc/sys/kernel/hung_task_timeout_secs disables this 
message.
[  360.625910] btrfs-transacti D 880137c52d00 0  1084  2 0x
[  360.625912]  880132428950 0002 00012d00 
880132428950
[  360.625915]  8800b5a35fd8 8801331a5a70 8801331a5ae8 

[  360.625918]  8800aba981b8 00015000 0001 
8126b986
[  360.625921] Call Trace:
[  360.625925]  [8126b986] ? btrfs_start_ordered_extent+0x91/0xdf
[  360.625928]  [81089730] ? finish_wait+0x65/0x65
[  360.625931]  [8126bbff] ? btrfs_wait_ordered_range+0xab/0x10a
[  360.625934]  [8128ac45] ? __btrfs_write_out_cache+0x43c/0x67f
[  360.625939]  [8112d81b] ? kmem_cache_free+0x66/0x10d
[  360.625942]  [8125970a] ? btrfs_update_inode_item+0xb9/0xcd
[  360.625944]  

Re: [PATCH] Btrfs: throttle delayed refs better

2014-01-24 Thread Josef Bacik


On 01/24/2014 02:34 AM, Liu Bo wrote:

On Thu, Jan 23, 2014 at 01:07:52PM -0500, Josef Bacik wrote:

On one of our gluster clusters we noticed some pretty big lag spikes.  This
turned out to be because our transaction commit was taking like 3 minutes to
complete.  This is because we have like 30 gigs of metadata, so our global
reserve would end up being the max which is like 512 mb.  So our throttling code
would allow a ridiculous amount of delayed refs to build up and then they'd all
get run at transaction commit time, and for a cold mounted file system that
could take up to 3 minutes to run.  So fix the throttling to be based on both
the size of the global reserve and how long it takes us to run delayed refs.
This patch tracks the time it takes to run delayed refs and then only allows 1
seconds worth of outstanding delayed refs at a time.  This way it will auto-tune
itself from cold cache up to when everything is in memory and it no longer has
to go to disk.  This makes our transaction commits take much less time to run.
Thanks,

Which version of btrfs is the patch made for?

I checked the code and it doesn't seem to be btrfs-next, either...we don't
have a __btrfs_run_delayed_refs().


It depends on the patch I sent before where I move delayed refs onto a 
delayed ref head rb tree.  Thanks,


Josef
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs: throttle delayed refs better

2014-01-23 Thread Josef Bacik
On one of our gluster clusters we noticed some pretty big lag spikes.  This
turned out to be because our transaction commit was taking like 3 minutes to
complete.  This is because we have like 30 gigs of metadata, so our global
reserve would end up being the max which is like 512 mb.  So our throttling code
would allow a ridiculous amount of delayed refs to build up and then they'd all
get run at transaction commit time, and for a cold mounted file system that
could take up to 3 minutes to run.  So fix the throttling to be based on both
the size of the global reserve and how long it takes us to run delayed refs.
This patch tracks the time it takes to run delayed refs and then only allows 1
seconds worth of outstanding delayed refs at a time.  This way it will auto-tune
itself from cold cache up to when everything is in memory and it no longer has
to go to disk.  This makes our transaction commits take much less time to run.
Thanks,

Signed-off-by: Josef Bacik jba...@fb.com
---
 fs/btrfs/ctree.h   |  3 +++
 fs/btrfs/disk-io.c |  2 +-
 fs/btrfs/extent-tree.c | 41 -
 fs/btrfs/transaction.c |  4 ++--
 4 files changed, 46 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 3cebb4a..ca6bcc3 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1360,6 +1360,7 @@ struct btrfs_fs_info {
 
u64 generation;
u64 last_trans_committed;
+   u64 avg_delayed_ref_runtime;
 
/*
 * this is updated to the current trans every time a full commit
@@ -3172,6 +3173,8 @@ static inline u64 btrfs_calc_trunc_metadata_size(struct 
btrfs_root *root,
 
 int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
   struct btrfs_root *root);
+int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
+  struct btrfs_root *root);
 void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
   struct btrfs_root *root, unsigned long count);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index ed23127..f0e7bbe 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2185,7 +2185,7 @@ int open_ctree(struct super_block *sb,
fs_info-free_chunk_space = 0;
fs_info-tree_mod_log = RB_ROOT;
fs_info-commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
-
+   fs_info-avg_delayed_ref_runtime = div64_u64(NSEC_PER_SEC, 64);
/* readahead state */
INIT_RADIX_TREE(fs_info-reada_tree, GFP_NOFS  ~__GFP_WAIT);
spin_lock_init(fs_info-reada_lock);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index c77156c..b532259 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2322,8 +2322,10 @@ static noinline int __btrfs_run_delayed_refs(struct 
btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *locked_ref = NULL;
struct btrfs_delayed_extent_op *extent_op;
struct btrfs_fs_info *fs_info = root-fs_info;
+   ktime_t start = ktime_get();
int ret;
unsigned long count = 0;
+   unsigned long actual_count = 0;
int must_insert_reserved = 0;
 
delayed_refs = trans-transaction-delayed_refs;
@@ -2452,6 +2454,7 @@ static noinline int __btrfs_run_delayed_refs(struct 
btrfs_trans_handle *trans,
 delayed_refs-href_root);
spin_unlock(delayed_refs-lock);
} else {
+   actual_count++;
ref-in_tree = 0;
rb_erase(ref-rb_node, locked_ref-ref_root);
}
@@ -2502,6 +2505,26 @@ static noinline int __btrfs_run_delayed_refs(struct 
btrfs_trans_handle *trans,
count++;
cond_resched();
}
+
+   /*
+* We don't want to include ref heads since we can have empty ref heads
+* and those will drastically skew our runtime down since we just do
+* accounting, no actual extent tree updates.
+*/
+   if (actual_count  0) {
+   u64 runtime = ktime_to_ns(ktime_sub(ktime_get(), start));
+   u64 avg;
+
+   /*
+* We weigh the current average higher than our current runtime
+* to avoid large swings in the average.
+*/
+   spin_lock(delayed_refs-lock);
+   avg = fs_info-avg_delayed_ref_runtime * 3 + runtime;
+   avg = div64_u64(avg, 4);
+   fs_info-avg_delayed_ref_runtime = avg;
+   spin_unlock(delayed_refs-lock);
+   }
return 0;
 }
 
@@ -2600,7 +2623,7 @@ static inline u64 heads_to_leaves(struct btrfs_root 
*root, u64 heads)
return div64_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root));
 }
 
-int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
+int 

Re: [PATCH] Btrfs: throttle delayed refs better

2014-01-23 Thread Liu Bo
On Thu, Jan 23, 2014 at 01:07:52PM -0500, Josef Bacik wrote:
 On one of our gluster clusters we noticed some pretty big lag spikes.  This
 turned out to be because our transaction commit was taking like 3 minutes to
 complete.  This is because we have like 30 gigs of metadata, so our global
 reserve would end up being the max which is like 512 mb.  So our throttling 
 code
 would allow a ridiculous amount of delayed refs to build up and then they'd 
 all
 get run at transaction commit time, and for a cold mounted file system that
 could take up to 3 minutes to run.  So fix the throttling to be based on both
 the size of the global reserve and how long it takes us to run delayed refs.
 This patch tracks the time it takes to run delayed refs and then only allows 1
 seconds worth of outstanding delayed refs at a time.  This way it will 
 auto-tune
 itself from cold cache up to when everything is in memory and it no longer has
 to go to disk.  This makes our transaction commits take much less time to run.
 Thanks,

Which version of btrfs is the patch made for?

I checked the code and it doesn't seem to be btrfs-next, either...we don't
have a __btrfs_run_delayed_refs(). 

-liubo

 
 Signed-off-by: Josef Bacik jba...@fb.com
 ---
  fs/btrfs/ctree.h   |  3 +++
  fs/btrfs/disk-io.c |  2 +-
  fs/btrfs/extent-tree.c | 41 -
  fs/btrfs/transaction.c |  4 ++--
  4 files changed, 46 insertions(+), 4 deletions(-)
 
 diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
 index 3cebb4a..ca6bcc3 100644
 --- a/fs/btrfs/ctree.h
 +++ b/fs/btrfs/ctree.h
 @@ -1360,6 +1360,7 @@ struct btrfs_fs_info {
  
   u64 generation;
   u64 last_trans_committed;
 + u64 avg_delayed_ref_runtime;
  
   /*
* this is updated to the current trans every time a full commit
 @@ -3172,6 +3173,8 @@ static inline u64 btrfs_calc_trunc_metadata_size(struct 
 btrfs_root *root,
  
  int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
  struct btrfs_root *root);
 +int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
 +struct btrfs_root *root);
  void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
  int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
  struct btrfs_root *root, unsigned long count);
 diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
 index ed23127..f0e7bbe 100644
 --- a/fs/btrfs/disk-io.c
 +++ b/fs/btrfs/disk-io.c
 @@ -2185,7 +2185,7 @@ int open_ctree(struct super_block *sb,
   fs_info-free_chunk_space = 0;
   fs_info-tree_mod_log = RB_ROOT;
   fs_info-commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
 -
 + fs_info-avg_delayed_ref_runtime = div64_u64(NSEC_PER_SEC, 64);
   /* readahead state */
   INIT_RADIX_TREE(fs_info-reada_tree, GFP_NOFS  ~__GFP_WAIT);
   spin_lock_init(fs_info-reada_lock);
 diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
 index c77156c..b532259 100644
 --- a/fs/btrfs/extent-tree.c
 +++ b/fs/btrfs/extent-tree.c
 @@ -2322,8 +2322,10 @@ static noinline int __btrfs_run_delayed_refs(struct 
 btrfs_trans_handle *trans,
   struct btrfs_delayed_ref_head *locked_ref = NULL;
   struct btrfs_delayed_extent_op *extent_op;
   struct btrfs_fs_info *fs_info = root-fs_info;
 + ktime_t start = ktime_get();
   int ret;
   unsigned long count = 0;
 + unsigned long actual_count = 0;
   int must_insert_reserved = 0;
  
   delayed_refs = trans-transaction-delayed_refs;
 @@ -2452,6 +2454,7 @@ static noinline int __btrfs_run_delayed_refs(struct 
 btrfs_trans_handle *trans,
delayed_refs-href_root);
   spin_unlock(delayed_refs-lock);
   } else {
 + actual_count++;
   ref-in_tree = 0;
   rb_erase(ref-rb_node, locked_ref-ref_root);
   }
 @@ -2502,6 +2505,26 @@ static noinline int __btrfs_run_delayed_refs(struct 
 btrfs_trans_handle *trans,
   count++;
   cond_resched();
   }
 +
 + /*
 +  * We don't want to include ref heads since we can have empty ref heads
 +  * and those will drastically skew our runtime down since we just do
 +  * accounting, no actual extent tree updates.
 +  */
 + if (actual_count  0) {
 + u64 runtime = ktime_to_ns(ktime_sub(ktime_get(), start));
 + u64 avg;
 +
 + /*
 +  * We weigh the current average higher than our current runtime
 +  * to avoid large swings in the average.
 +  */
 + spin_lock(delayed_refs-lock);
 + avg = fs_info-avg_delayed_ref_runtime * 3 + runtime;
 + avg = div64_u64(avg, 4);
 + fs_info-avg_delayed_ref_runtime = avg;
 + spin_unlock(delayed_refs-lock);
 + }
   return 0;
  }
  
 @@