[ 06/52] bcache: Fix a writeback performance regression

2013-10-02 Thread Greg Kroah-Hartman
3.10-stable review patch.  If anyone has any objections, please let me know.

--

From: Kent Overstreet 

commit c2a4f3183a1248f615a695fbd8905da55ad11bba upstream.

Background writeback works by scanning the btree for dirty data and
adding those keys into a fixed size buffer, then for each dirty key in
the keybuf writing it to the backing device.

When read_dirty() finishes and it's time to scan for more dirty data, we
need to wait for the outstanding writeback IO to finish - they still
take up slots in the keybuf (so that foreground writes can check for
them to avoid races) - without that wait, we'll continually rescan when
we'll be able to add at most a key or two to the keybuf, and that takes
locks that starves foreground IO.  Doh.

Signed-off-by: Kent Overstreet 
Signed-off-by: Linus Torvalds 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/md/bcache/bcache.h|7 ++
 drivers/md/bcache/util.c  |   11 +-
 drivers/md/bcache/util.h  |   12 ---
 drivers/md/bcache/writeback.c |   43 --
 4 files changed, 43 insertions(+), 30 deletions(-)

--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -499,7 +499,7 @@ struct cached_dev {
 */
atomic_thas_dirty;
 
-   struct ratelimitwriteback_rate;
+   struct bch_ratelimitwriteback_rate;
struct delayed_work writeback_rate_update;
 
/*
@@ -508,10 +508,9 @@ struct cached_dev {
 */
sector_tlast_read;
 
-   /* Number of writeback bios in flight */
-   atomic_tin_flight;
+   /* Limit number of writeback bios in flight */
+   struct semaphorein_flight;
struct closure_with_timer writeback;
-   struct closure_waitlist writeback_wait;
 
struct keybuf   writeback_keys;
 
--- a/drivers/md/bcache/util.c
+++ b/drivers/md/bcache/util.c
@@ -190,7 +190,16 @@ void bch_time_stats_update(struct time_s
stats->last = now ?: 1;
 }
 
-unsigned bch_next_delay(struct ratelimit *d, uint64_t done)
+/**
+ * bch_next_delay() - increment @d by the amount of work done, and return how
+ * long to delay until the next time to do some work.
+ *
+ * @d - the struct bch_ratelimit to update
+ * @done - the amount of work done, in arbitrary units
+ *
+ * Returns the amount of time to delay by, in jiffies
+ */
+uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done)
 {
uint64_t now = local_clock();
 
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -452,17 +452,23 @@ read_attribute(name ## _last_ ## frequen
(ewma) >> factor;   \
 })
 
-struct ratelimit {
+struct bch_ratelimit {
+   /* Next time we want to do some work, in nanoseconds */
uint64_tnext;
+
+   /*
+* Rate at which we want to do work, in units per nanosecond
+* The units here correspond to the units passed to bch_next_delay()
+*/
unsignedrate;
 };
 
-static inline void ratelimit_reset(struct ratelimit *d)
+static inline void bch_ratelimit_reset(struct bch_ratelimit *d)
 {
d->next = local_clock();
 }
 
-unsigned bch_next_delay(struct ratelimit *d, uint64_t done);
+uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done);
 
 #define __DIV_SAFE(n, d, zero) \
 ({ \
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -91,11 +91,15 @@ static void update_writeback_rate(struct
 
 static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors)
 {
+   uint64_t ret;
+
if (atomic_read(&dc->disk.detaching) ||
!dc->writeback_percent)
return 0;
 
-   return bch_next_delay(&dc->writeback_rate, sectors * 1000ULL);
+   ret = bch_next_delay(&dc->writeback_rate, sectors * 1000ULL);
+
+   return min_t(uint64_t, ret, HZ);
 }
 
 /* Background writeback */
@@ -165,7 +169,7 @@ static void refill_dirty(struct closure
 
up_write(&dc->writeback_lock);
 
-   ratelimit_reset(&dc->writeback_rate);
+   bch_ratelimit_reset(&dc->writeback_rate);
 
/* Punt to workqueue only so we don't recurse and blow the stack */
continue_at(cl, read_dirty, dirty_wq);
@@ -246,9 +250,7 @@ static void write_dirty_finish(struct cl
}
 
bch_keybuf_del(&dc->writeback_keys, w);
-   atomic_dec_bug(&dc->in_flight);
-
-   closure_wake_up(&dc->writeback_wait);
+   up(&dc->in_flight);
 
closure_return_with_destructor(cl, dirty_io_destructor);
 }
@@ -278,7 +280,7 @@ static void write_dirty(struct closure *
trace_bcache_write_dirty(&io->bio);
closure_bio_submit(&io->bio, cl, &io->dc->disk);
 
-   continue_at(cl, write_dirty_finish, dirty_wq);
+   continue_at(cl, write

[ 06/57] bcache: Fix a writeback performance regression

2013-10-02 Thread Greg Kroah-Hartman
3.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Kent Overstreet 

commit c2a4f3183a1248f615a695fbd8905da55ad11bba upstream.

Background writeback works by scanning the btree for dirty data and
adding those keys into a fixed size buffer, then for each dirty key in
the keybuf writing it to the backing device.

When read_dirty() finishes and it's time to scan for more dirty data, we
need to wait for the outstanding writeback IO to finish - they still
take up slots in the keybuf (so that foreground writes can check for
them to avoid races) - without that wait, we'll continually rescan when
we'll be able to add at most a key or two to the keybuf, and that takes
locks that starves foreground IO.  Doh.

Signed-off-by: Kent Overstreet 
Signed-off-by: Linus Torvalds 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/md/bcache/bcache.h|7 ++
 drivers/md/bcache/util.c  |   11 +-
 drivers/md/bcache/util.h  |   12 ---
 drivers/md/bcache/writeback.c |   43 --
 4 files changed, 43 insertions(+), 30 deletions(-)

--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -498,7 +498,7 @@ struct cached_dev {
 */
atomic_thas_dirty;
 
-   struct ratelimitwriteback_rate;
+   struct bch_ratelimitwriteback_rate;
struct delayed_work writeback_rate_update;
 
/*
@@ -507,10 +507,9 @@ struct cached_dev {
 */
sector_tlast_read;
 
-   /* Number of writeback bios in flight */
-   atomic_tin_flight;
+   /* Limit number of writeback bios in flight */
+   struct semaphorein_flight;
struct closure_with_timer writeback;
-   struct closure_waitlist writeback_wait;
 
struct keybuf   writeback_keys;
 
--- a/drivers/md/bcache/util.c
+++ b/drivers/md/bcache/util.c
@@ -190,7 +190,16 @@ void bch_time_stats_update(struct time_s
stats->last = now ?: 1;
 }
 
-unsigned bch_next_delay(struct ratelimit *d, uint64_t done)
+/**
+ * bch_next_delay() - increment @d by the amount of work done, and return how
+ * long to delay until the next time to do some work.
+ *
+ * @d - the struct bch_ratelimit to update
+ * @done - the amount of work done, in arbitrary units
+ *
+ * Returns the amount of time to delay by, in jiffies
+ */
+uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done)
 {
uint64_t now = local_clock();
 
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -450,17 +450,23 @@ read_attribute(name ## _last_ ## frequen
(ewma) >> factor;   \
 })
 
-struct ratelimit {
+struct bch_ratelimit {
+   /* Next time we want to do some work, in nanoseconds */
uint64_tnext;
+
+   /*
+* Rate at which we want to do work, in units per nanosecond
+* The units here correspond to the units passed to bch_next_delay()
+*/
unsignedrate;
 };
 
-static inline void ratelimit_reset(struct ratelimit *d)
+static inline void bch_ratelimit_reset(struct bch_ratelimit *d)
 {
d->next = local_clock();
 }
 
-unsigned bch_next_delay(struct ratelimit *d, uint64_t done);
+uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done);
 
 #define __DIV_SAFE(n, d, zero) \
 ({ \
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -94,11 +94,15 @@ static void update_writeback_rate(struct
 
 static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors)
 {
+   uint64_t ret;
+
if (atomic_read(&dc->disk.detaching) ||
!dc->writeback_percent)
return 0;
 
-   return bch_next_delay(&dc->writeback_rate, sectors * 1000ULL);
+   ret = bch_next_delay(&dc->writeback_rate, sectors * 1000ULL);
+
+   return min_t(uint64_t, ret, HZ);
 }
 
 /* Background writeback */
@@ -208,7 +212,7 @@ normal_refill:
 
up_write(&dc->writeback_lock);
 
-   ratelimit_reset(&dc->writeback_rate);
+   bch_ratelimit_reset(&dc->writeback_rate);
 
/* Punt to workqueue only so we don't recurse and blow the stack */
continue_at(cl, read_dirty, dirty_wq);
@@ -318,9 +322,7 @@ static void write_dirty_finish(struct cl
}
 
bch_keybuf_del(&dc->writeback_keys, w);
-   atomic_dec_bug(&dc->in_flight);
-
-   closure_wake_up(&dc->writeback_wait);
+   up(&dc->in_flight);
 
closure_return_with_destructor(cl, dirty_io_destructor);
 }
@@ -349,7 +351,7 @@ static void write_dirty(struct closure *
 
closure_bio_submit(&io->bio, cl, &io->dc->disk);
 
-   continue_at(cl, write_dirty_finish, dirty_wq);
+   continue_at(cl, write_dirty_finish, system_wq);
 }
 
 static void read_dirty_endio(struc

[PATCH 06/11] bcache: Fix a writeback performance regression

2013-09-23 Thread Kent Overstreet
Background writeback works by scanning the btree for dirty data and
adding those keys into a fixed size buffer, then for each dirty key in
the keybuf writing it to the backing device.

When read_dirty() finishes and it's time to scan for more dirty data, we
need to wait for the outstanding writeback IO to finish - they still
take up slots in the keybuf (so that foreground writes can check for
them to avoid races) - without that wait, we'll continually rescan when
we'll be able to add at most a key or two to the keybuf, and that takes
locks that starves foreground IO. Doh.

Signed-off-by: Kent Overstreet 
Cc: linux-stable  # >= v3.10
---
 drivers/md/bcache/bcache.h|  7 +++
 drivers/md/bcache/util.c  | 11 ++-
 drivers/md/bcache/util.h  | 12 +---
 drivers/md/bcache/writeback.c | 43 +--
 4 files changed, 43 insertions(+), 30 deletions(-)

diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index b39f6f0..0f12382 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -498,7 +498,7 @@ struct cached_dev {
 */
atomic_thas_dirty;
 
-   struct ratelimitwriteback_rate;
+   struct bch_ratelimitwriteback_rate;
struct delayed_work writeback_rate_update;
 
/*
@@ -507,10 +507,9 @@ struct cached_dev {
 */
sector_tlast_read;
 
-   /* Number of writeback bios in flight */
-   atomic_tin_flight;
+   /* Limit number of writeback bios in flight */
+   struct semaphorein_flight;
struct closure_with_timer writeback;
-   struct closure_waitlist writeback_wait;
 
struct keybuf   writeback_keys;
 
diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c
index 98eb811..420dad5 100644
--- a/drivers/md/bcache/util.c
+++ b/drivers/md/bcache/util.c
@@ -190,7 +190,16 @@ void bch_time_stats_update(struct time_stats *stats, 
uint64_t start_time)
stats->last = now ?: 1;
 }
 
-unsigned bch_next_delay(struct ratelimit *d, uint64_t done)
+/**
+ * bch_next_delay() - increment @d by the amount of work done, and return how
+ * long to delay until the next time to do some work.
+ *
+ * @d - the struct bch_ratelimit to update
+ * @done - the amount of work done, in arbitrary units
+ *
+ * Returns the amount of time to delay by, in jiffies
+ */
+uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done)
 {
uint64_t now = local_clock();
 
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index 1ae2a73..ea345c6 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -450,17 +450,23 @@ read_attribute(name ## _last_ ## frequency_units)
(ewma) >> factor;   \
 })
 
-struct ratelimit {
+struct bch_ratelimit {
+   /* Next time we want to do some work, in nanoseconds */
uint64_tnext;
+
+   /*
+* Rate at which we want to do work, in units per nanosecond
+* The units here correspond to the units passed to bch_next_delay()
+*/
unsignedrate;
 };
 
-static inline void ratelimit_reset(struct ratelimit *d)
+static inline void bch_ratelimit_reset(struct bch_ratelimit *d)
 {
d->next = local_clock();
 }
 
-unsigned bch_next_delay(struct ratelimit *d, uint64_t done);
+uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done);
 
 #define __DIV_SAFE(n, d, zero) \
 ({ \
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 22cbff5..27ac519 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -94,11 +94,15 @@ static void update_writeback_rate(struct work_struct *work)
 
 static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors)
 {
+   uint64_t ret;
+
if (atomic_read(&dc->disk.detaching) ||
!dc->writeback_percent)
return 0;
 
-   return bch_next_delay(&dc->writeback_rate, sectors * 1000ULL);
+   ret = bch_next_delay(&dc->writeback_rate, sectors * 1000ULL);
+
+   return min_t(uint64_t, ret, HZ);
 }
 
 /* Background writeback */
@@ -208,7 +212,7 @@ normal_refill:
 
up_write(&dc->writeback_lock);
 
-   ratelimit_reset(&dc->writeback_rate);
+   bch_ratelimit_reset(&dc->writeback_rate);
 
/* Punt to workqueue only so we don't recurse and blow the stack */
continue_at(cl, read_dirty, dirty_wq);
@@ -318,9 +322,7 @@ static void write_dirty_finish(struct closure *cl)
}
 
bch_keybuf_del(&dc->writeback_keys, w);
-   atomic_dec_bug(&dc->in_flight);
-
-   closure_wake_up(&dc->writeback_wait);
+   up(&dc->in_flight);
 
closure_return_with_destructor(cl, dirty_io_destructor);
 }
@@ -349,7 +351,7 @@ static void

Re: bcache: Fix a writeback performance regression

2013-08-29 Thread Stefan Priebe

sorry seems i see something similiar:
http://pastebin.com/raw.php?i=ZqgLf9gp

Stefan

Am 28.08.2013 22:15, schrieb Stefan Priebe:

sorry but that's completely wrong.

please use branch bcache-for-3.10

http://evilpiepirate.org/git/linux-bcache.git/log/?h=bcache-for-3.10

Stefan

Am 28.08.2013 22:12, schrieb kernel neophyte:

On Wed, Aug 28, 2013 at 12:20 PM, Stefan Priebe
 wrote:

Am 28.08.2013 20:47, schrieb kernel neophyte:


On Wed, Aug 28, 2013 at 11:38 AM, Stefan Priebe - Profihost AG
 wrote:


I don't had one for a few days. Which kernel so you use?



3.10 kernel with all of kent's stable patches and perf patches..



Which exact 3.10 version? Which patches on top?


3.10.0-rc7
with following patches:
bcache: Fix a dumb CPU spinning bug in writeback
bcache: Fix a flush/fua performance bug
bcache: Fix a writeback performance regression
bcache: FUA fixes

I have also tried bcache perf branch i.e.
(http://evilpiepirate.org/git/linux-bcache.git/log/?h=bcache-perf)





and all defaults except for:

echo writeback > /sys/block/bcache0/bcache/cache_mode
echo 0 > /sys/block/bcache0/bcache/sequential_cutoff
echo 0 >
/sys/fs/bcache/1e26e5ca-5cb9-4c5a-ad7e-3c787524a9dc/congested_read_threshold_us

echo 0 >
/sys/fs/bcache/1e26e5ca-5cb9-4c5a-ad7e-3c787524a9dc/congested_write_threshold_us


-Neo



Stefan

This mail was sent with my iPhone.

Am 28.08.2013 um 20:12 schrieb kernel neophyte
:


On Tue, Aug 27, 2013 at 11:05 PM, Kent Overstreet 
wrote:


On Tue, Aug 27, 2013 at 08:37:54PM -0700, kernel neophyte wrote:


Hi Kent,

I am still seeing deadlock:



Seeing blk_throtl_bio (what asshole misspells words in their
function
names, so when you go to grep for them you grep for the wrong
thing?)
in
the backtrace - is that what it takes to hit it?



I don't think so,
I had another deadlock:

[ 1179.193990] bio: create slab  at 2
[ 1179.194491] bcache: run_cache_set() invalidating existing data
[ 1179.212308] bcache: register_cache() registered cache device rxd1
[ 1203.846220] bcache: register_bdev() registered backing device sda3
[ 1227.780088] bcache: bch_cached_dev_attach() Caching sda3 as
bcache0
on set a079dda3-27fd-4b1c-a616-d46110bfae6c
[ 2162.497950] INFO: task kworker/0:0:4 blocked for more than 120
seconds.
[ 2162.497961] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[ 2162.497967] kworker/0:0 D  0 4  2
0x
[ 2162.497982] Workqueue: events write_dirty_finish
[ 2162.497986]  882fa6ac9b30 0046 81511b8c
815137c0
[ 2162.497992]  882fa6a84cb0 882fa6ac9fd8 882fa6ac9fd8
882fa6ac9fd8
[ 2162.497996]  882f78139990 882fa6a84cb0 815117a1
882fa6a84cb0
[ 2162.498001] Call Trace:
[ 2162.498010]  [] ?
bch_btree_map_nodes_recurse+0x6c/0x170
[ 2162.498015]  [] ?
bch_btree_insert_node+0x2f0/0x2f0
[ 2162.498019]  [] ? bch_btree_node_get+0x71/0x280
[ 2162.498028]  [] schedule+0x29/0x70
[ 2162.498034]  [] rwsem_down_read_failed+0x9d/0xe5
[ 2162.498043]  []
call_rwsem_down_read_failed+0x14/0x30
[ 2162.498048]  [] ? down_read+0x24/0x2b
[ 2162.498052]  [] __bch_btree_map_nodes+0xe5/0x1c0
[ 2162.498057]  [] ?
bch_btree_insert_node+0x2f0/0x2f0
[ 2162.498066]  [] ? update_curr+0x141/0x1f0
[ 2162.498072]  [] ? dirty_io_destructor+0xe/0x10
[ 2162.498076]  [] bch_btree_insert+0xb4/0x120
[ 2162.498082]  [] write_dirty_finish+0x1e3/0x270
[ 2162.498087]  [] ? pick_next_task_fair+0x6b/0x150
[ 2162.498093]  [] ? dequeue_task+0x93/0xb0
[ 2162.498100]  [] process_one_work+0x174/0x490
[ 2162.498105]  [] worker_thread+0x11b/0x370
[ 2162.498110]  [] ?
manage_workers.isra.23+0x2d0/0x2d0
[ 2162.498115]  [] kthread+0xc0/0xd0
[ 2162.498120]  [] ? flush_kthread_worker+0xb0/0xb0
[ 2162.498127]  [] ret_from_fork+0x7c/0xb0
[ 2162.498131]  [] ? flush_kthread_worker+0xb0/0xb0
[ 2162.498145] INFO: task bcache_allocato:1766 blocked for more than
120 seconds.
[ 2162.498149] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[ 2162.498154] bcache_allocato D 0001 0  1766  2
0x
[ 2162.498158]  8822630f3d88 0046 8822630f3da8
810808ad
[ 2162.498163]  882f891de640 8822630f3fd8 8822630f3fd8
8822630f3fd8
[ 2162.498167]  882f81a91990 882f891de640 882f88b33320
8801fad20d98
[ 2162.498172] Call Trace:
[ 2162.498177]  [] ? dequeue_task_fair+0x2cd/0x530
[ 2162.498182]  [] schedule+0x29/0x70
[ 2162.498187]  []
schedule_preempt_disabled+0xe/0x10
[ 2162.498192]  []
__mutex_lock_slowpath+0x112/0x1b0
[ 2162.498197]  [] mutex_lock+0x2a/0x50
[ 2162.498202]  [] bch_allocator_thread+0x10f/0xe20
[ 2162.498207]  [] ?
bch_bucket_add_unused+0xe0/0xe0
[ 2162.498211]  [] kthread+0xc0/0xd0
[ 2162.498215]  [] ? flush_kthread_worker+0xb0/0xb0
[ 2162.498220]  [] ret_from_fork+0x7c/0xb0
[ 2162.498224]  [] ? flush_kthread_worker+0xb0/0xb0
[ 2162.498228] INFO: task bcache_writebac:1771 blocked

Re: bcache: Fix a writeback performance regression

2013-08-28 Thread Stefan Priebe

sorry but that's completely wrong.

please use branch bcache-for-3.10

http://evilpiepirate.org/git/linux-bcache.git/log/?h=bcache-for-3.10

Stefan

Am 28.08.2013 22:12, schrieb kernel neophyte:

On Wed, Aug 28, 2013 at 12:20 PM, Stefan Priebe  wrote:

Am 28.08.2013 20:47, schrieb kernel neophyte:


On Wed, Aug 28, 2013 at 11:38 AM, Stefan Priebe - Profihost AG
 wrote:


I don't had one for a few days. Which kernel so you use?



3.10 kernel with all of kent's stable patches and perf patches..



Which exact 3.10 version? Which patches on top?


3.10.0-rc7
with following patches:
bcache: Fix a dumb CPU spinning bug in writeback
bcache: Fix a flush/fua performance bug
bcache: Fix a writeback performance regression
bcache: FUA fixes

I have also tried bcache perf branch i.e.
(http://evilpiepirate.org/git/linux-bcache.git/log/?h=bcache-perf)





and all defaults except for:

echo writeback > /sys/block/bcache0/bcache/cache_mode
echo 0 > /sys/block/bcache0/bcache/sequential_cutoff
echo 0 >
/sys/fs/bcache/1e26e5ca-5cb9-4c5a-ad7e-3c787524a9dc/congested_read_threshold_us
echo 0 >
/sys/fs/bcache/1e26e5ca-5cb9-4c5a-ad7e-3c787524a9dc/congested_write_threshold_us

-Neo



Stefan

This mail was sent with my iPhone.

Am 28.08.2013 um 20:12 schrieb kernel neophyte
:


On Tue, Aug 27, 2013 at 11:05 PM, Kent Overstreet 
wrote:


On Tue, Aug 27, 2013 at 08:37:54PM -0700, kernel neophyte wrote:


Hi Kent,

I am still seeing deadlock:



Seeing blk_throtl_bio (what asshole misspells words in their function
names, so when you go to grep for them you grep for the wrong thing?)
in
the backtrace - is that what it takes to hit it?



I don't think so,
I had another deadlock:

[ 1179.193990] bio: create slab  at 2
[ 1179.194491] bcache: run_cache_set() invalidating existing data
[ 1179.212308] bcache: register_cache() registered cache device rxd1
[ 1203.846220] bcache: register_bdev() registered backing device sda3
[ 1227.780088] bcache: bch_cached_dev_attach() Caching sda3 as bcache0
on set a079dda3-27fd-4b1c-a616-d46110bfae6c
[ 2162.497950] INFO: task kworker/0:0:4 blocked for more than 120
seconds.
[ 2162.497961] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[ 2162.497967] kworker/0:0 D  0 4  2
0x
[ 2162.497982] Workqueue: events write_dirty_finish
[ 2162.497986]  882fa6ac9b30 0046 81511b8c
815137c0
[ 2162.497992]  882fa6a84cb0 882fa6ac9fd8 882fa6ac9fd8
882fa6ac9fd8
[ 2162.497996]  882f78139990 882fa6a84cb0 815117a1
882fa6a84cb0
[ 2162.498001] Call Trace:
[ 2162.498010]  [] ?
bch_btree_map_nodes_recurse+0x6c/0x170
[ 2162.498015]  [] ? bch_btree_insert_node+0x2f0/0x2f0
[ 2162.498019]  [] ? bch_btree_node_get+0x71/0x280
[ 2162.498028]  [] schedule+0x29/0x70
[ 2162.498034]  [] rwsem_down_read_failed+0x9d/0xe5
[ 2162.498043]  []
call_rwsem_down_read_failed+0x14/0x30
[ 2162.498048]  [] ? down_read+0x24/0x2b
[ 2162.498052]  [] __bch_btree_map_nodes+0xe5/0x1c0
[ 2162.498057]  [] ? bch_btree_insert_node+0x2f0/0x2f0
[ 2162.498066]  [] ? update_curr+0x141/0x1f0
[ 2162.498072]  [] ? dirty_io_destructor+0xe/0x10
[ 2162.498076]  [] bch_btree_insert+0xb4/0x120
[ 2162.498082]  [] write_dirty_finish+0x1e3/0x270
[ 2162.498087]  [] ? pick_next_task_fair+0x6b/0x150
[ 2162.498093]  [] ? dequeue_task+0x93/0xb0
[ 2162.498100]  [] process_one_work+0x174/0x490
[ 2162.498105]  [] worker_thread+0x11b/0x370
[ 2162.498110]  [] ?
manage_workers.isra.23+0x2d0/0x2d0
[ 2162.498115]  [] kthread+0xc0/0xd0
[ 2162.498120]  [] ? flush_kthread_worker+0xb0/0xb0
[ 2162.498127]  [] ret_from_fork+0x7c/0xb0
[ 2162.498131]  [] ? flush_kthread_worker+0xb0/0xb0
[ 2162.498145] INFO: task bcache_allocato:1766 blocked for more than
120 seconds.
[ 2162.498149] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[ 2162.498154] bcache_allocato D 0001 0  1766  2
0x
[ 2162.498158]  8822630f3d88 0046 8822630f3da8
810808ad
[ 2162.498163]  882f891de640 8822630f3fd8 8822630f3fd8
8822630f3fd8
[ 2162.498167]  882f81a91990 882f891de640 882f88b33320
8801fad20d98
[ 2162.498172] Call Trace:
[ 2162.498177]  [] ? dequeue_task_fair+0x2cd/0x530
[ 2162.498182]  [] schedule+0x29/0x70
[ 2162.498187]  [] schedule_preempt_disabled+0xe/0x10
[ 2162.498192]  [] __mutex_lock_slowpath+0x112/0x1b0
[ 2162.498197]  [] mutex_lock+0x2a/0x50
[ 2162.498202]  [] bch_allocator_thread+0x10f/0xe20
[ 2162.498207]  [] ? bch_bucket_add_unused+0xe0/0xe0
[ 2162.498211]  [] kthread+0xc0/0xd0
[ 2162.498215]  [] ? flush_kthread_worker+0xb0/0xb0
[ 2162.498220]  [] ret_from_fork+0x7c/0xb0
[ 2162.498224]  [] ? flush_kthread_worker+0xb0/0xb0
[ 2162.498228] INFO: task bcache_writebac:1771 blocked for more than
120 seconds.
[ 2162.498233] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this messag

Re: bcache: Fix a writeback performance regression

2013-08-28 Thread kernel neophyte
On Wed, Aug 28, 2013 at 12:20 PM, Stefan Priebe  wrote:
> Am 28.08.2013 20:47, schrieb kernel neophyte:
>
>> On Wed, Aug 28, 2013 at 11:38 AM, Stefan Priebe - Profihost AG
>>  wrote:
>>>
>>> I don't had one for a few days. Which kernel so you use?
>>
>>
>> 3.10 kernel with all of kent's stable patches and perf patches..
>
>
> Which exact 3.10 version? Which patches on top?

3.10.0-rc7
with following patches:
bcache: Fix a dumb CPU spinning bug in writeback
bcache: Fix a flush/fua performance bug
bcache: Fix a writeback performance regression
bcache: FUA fixes

I have also tried bcache perf branch i.e.
(http://evilpiepirate.org/git/linux-bcache.git/log/?h=bcache-perf)

>
>
>> and all defaults except for:
>>
>> echo writeback > /sys/block/bcache0/bcache/cache_mode
>> echo 0 > /sys/block/bcache0/bcache/sequential_cutoff
>> echo 0 >
>> /sys/fs/bcache/1e26e5ca-5cb9-4c5a-ad7e-3c787524a9dc/congested_read_threshold_us
>> echo 0 >
>> /sys/fs/bcache/1e26e5ca-5cb9-4c5a-ad7e-3c787524a9dc/congested_write_threshold_us
>>
>> -Neo
>>
>>>
>>> Stefan
>>>
>>> This mail was sent with my iPhone.
>>>
>>> Am 28.08.2013 um 20:12 schrieb kernel neophyte
>>> :
>>>
>>>> On Tue, Aug 27, 2013 at 11:05 PM, Kent Overstreet 
>>>> wrote:
>>>>>
>>>>> On Tue, Aug 27, 2013 at 08:37:54PM -0700, kernel neophyte wrote:
>>>>>>
>>>>>> Hi Kent,
>>>>>>
>>>>>> I am still seeing deadlock:
>>>>>
>>>>>
>>>>> Seeing blk_throtl_bio (what asshole misspells words in their function
>>>>> names, so when you go to grep for them you grep for the wrong thing?)
>>>>> in
>>>>> the backtrace - is that what it takes to hit it?
>>>>
>>>>
>>>> I don't think so,
>>>> I had another deadlock:
>>>>
>>>> [ 1179.193990] bio: create slab  at 2
>>>> [ 1179.194491] bcache: run_cache_set() invalidating existing data
>>>> [ 1179.212308] bcache: register_cache() registered cache device rxd1
>>>> [ 1203.846220] bcache: register_bdev() registered backing device sda3
>>>> [ 1227.780088] bcache: bch_cached_dev_attach() Caching sda3 as bcache0
>>>> on set a079dda3-27fd-4b1c-a616-d46110bfae6c
>>>> [ 2162.497950] INFO: task kworker/0:0:4 blocked for more than 120
>>>> seconds.
>>>> [ 2162.497961] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
>>>> disables this message.
>>>> [ 2162.497967] kworker/0:0 D  0 4  2
>>>> 0x
>>>> [ 2162.497982] Workqueue: events write_dirty_finish
>>>> [ 2162.497986]  882fa6ac9b30 0046 81511b8c
>>>> 815137c0
>>>> [ 2162.497992]  882fa6a84cb0 882fa6ac9fd8 882fa6ac9fd8
>>>> 882fa6ac9fd8
>>>> [ 2162.497996]  882f78139990 882fa6a84cb0 815117a1
>>>> 882fa6a84cb0
>>>> [ 2162.498001] Call Trace:
>>>> [ 2162.498010]  [] ?
>>>> bch_btree_map_nodes_recurse+0x6c/0x170
>>>> [ 2162.498015]  [] ? bch_btree_insert_node+0x2f0/0x2f0
>>>> [ 2162.498019]  [] ? bch_btree_node_get+0x71/0x280
>>>> [ 2162.498028]  [] schedule+0x29/0x70
>>>> [ 2162.498034]  [] rwsem_down_read_failed+0x9d/0xe5
>>>> [ 2162.498043]  []
>>>> call_rwsem_down_read_failed+0x14/0x30
>>>> [ 2162.498048]  [] ? down_read+0x24/0x2b
>>>> [ 2162.498052]  [] __bch_btree_map_nodes+0xe5/0x1c0
>>>> [ 2162.498057]  [] ? bch_btree_insert_node+0x2f0/0x2f0
>>>> [ 2162.498066]  [] ? update_curr+0x141/0x1f0
>>>> [ 2162.498072]  [] ? dirty_io_destructor+0xe/0x10
>>>> [ 2162.498076]  [] bch_btree_insert+0xb4/0x120
>>>> [ 2162.498082]  [] write_dirty_finish+0x1e3/0x270
>>>> [ 2162.498087]  [] ? pick_next_task_fair+0x6b/0x150
>>>> [ 2162.498093]  [] ? dequeue_task+0x93/0xb0
>>>> [ 2162.498100]  [] process_one_work+0x174/0x490
>>>> [ 2162.498105]  [] worker_thread+0x11b/0x370
>>>> [ 2162.498110]  [] ?
>>>> manage_workers.isra.23+0x2d0/0x2d0
>>>> [ 2162.498115]  [] kthread+0xc0/0xd0
>>>> [ 2162.498120]  [] ? flush_kthread_worker+0xb0/0xb0
>>>> [ 2162.498127]  [] ret_from_fork+0x7c/0xb0
>>>> [ 2162.498131]  [] ? flush_kthread_worker+0xb0/0xb0
>>>> [ 2162.4

Re: bcache: Fix a writeback performance regression

2013-08-28 Thread Stefan Priebe

Am 28.08.2013 20:47, schrieb kernel neophyte:

On Wed, Aug 28, 2013 at 11:38 AM, Stefan Priebe - Profihost AG
 wrote:

I don't had one for a few days. Which kernel so you use?


3.10 kernel with all of kent's stable patches and perf patches..


Which exact 3.10 version? Which patches on top?


and all defaults except for:

echo writeback > /sys/block/bcache0/bcache/cache_mode
echo 0 > /sys/block/bcache0/bcache/sequential_cutoff
echo 0 > 
/sys/fs/bcache/1e26e5ca-5cb9-4c5a-ad7e-3c787524a9dc/congested_read_threshold_us
echo 0 > 
/sys/fs/bcache/1e26e5ca-5cb9-4c5a-ad7e-3c787524a9dc/congested_write_threshold_us

-Neo



Stefan

This mail was sent with my iPhone.

Am 28.08.2013 um 20:12 schrieb kernel neophyte :


On Tue, Aug 27, 2013 at 11:05 PM, Kent Overstreet  wrote:

On Tue, Aug 27, 2013 at 08:37:54PM -0700, kernel neophyte wrote:

Hi Kent,

I am still seeing deadlock:


Seeing blk_throtl_bio (what asshole misspells words in their function
names, so when you go to grep for them you grep for the wrong thing?) in
the backtrace - is that what it takes to hit it?


I don't think so,
I had another deadlock:

[ 1179.193990] bio: create slab  at 2
[ 1179.194491] bcache: run_cache_set() invalidating existing data
[ 1179.212308] bcache: register_cache() registered cache device rxd1
[ 1203.846220] bcache: register_bdev() registered backing device sda3
[ 1227.780088] bcache: bch_cached_dev_attach() Caching sda3 as bcache0
on set a079dda3-27fd-4b1c-a616-d46110bfae6c
[ 2162.497950] INFO: task kworker/0:0:4 blocked for more than 120 seconds.
[ 2162.497961] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[ 2162.497967] kworker/0:0 D  0 4  2 0x
[ 2162.497982] Workqueue: events write_dirty_finish
[ 2162.497986]  882fa6ac9b30 0046 81511b8c
815137c0
[ 2162.497992]  882fa6a84cb0 882fa6ac9fd8 882fa6ac9fd8
882fa6ac9fd8
[ 2162.497996]  882f78139990 882fa6a84cb0 815117a1
882fa6a84cb0
[ 2162.498001] Call Trace:
[ 2162.498010]  [] ? bch_btree_map_nodes_recurse+0x6c/0x170
[ 2162.498015]  [] ? bch_btree_insert_node+0x2f0/0x2f0
[ 2162.498019]  [] ? bch_btree_node_get+0x71/0x280
[ 2162.498028]  [] schedule+0x29/0x70
[ 2162.498034]  [] rwsem_down_read_failed+0x9d/0xe5
[ 2162.498043]  [] call_rwsem_down_read_failed+0x14/0x30
[ 2162.498048]  [] ? down_read+0x24/0x2b
[ 2162.498052]  [] __bch_btree_map_nodes+0xe5/0x1c0
[ 2162.498057]  [] ? bch_btree_insert_node+0x2f0/0x2f0
[ 2162.498066]  [] ? update_curr+0x141/0x1f0
[ 2162.498072]  [] ? dirty_io_destructor+0xe/0x10
[ 2162.498076]  [] bch_btree_insert+0xb4/0x120
[ 2162.498082]  [] write_dirty_finish+0x1e3/0x270
[ 2162.498087]  [] ? pick_next_task_fair+0x6b/0x150
[ 2162.498093]  [] ? dequeue_task+0x93/0xb0
[ 2162.498100]  [] process_one_work+0x174/0x490
[ 2162.498105]  [] worker_thread+0x11b/0x370
[ 2162.498110]  [] ? manage_workers.isra.23+0x2d0/0x2d0
[ 2162.498115]  [] kthread+0xc0/0xd0
[ 2162.498120]  [] ? flush_kthread_worker+0xb0/0xb0
[ 2162.498127]  [] ret_from_fork+0x7c/0xb0
[ 2162.498131]  [] ? flush_kthread_worker+0xb0/0xb0
[ 2162.498145] INFO: task bcache_allocato:1766 blocked for more than
120 seconds.
[ 2162.498149] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[ 2162.498154] bcache_allocato D 0001 0  1766  2 0x
[ 2162.498158]  8822630f3d88 0046 8822630f3da8
810808ad
[ 2162.498163]  882f891de640 8822630f3fd8 8822630f3fd8
8822630f3fd8
[ 2162.498167]  882f81a91990 882f891de640 882f88b33320
8801fad20d98
[ 2162.498172] Call Trace:
[ 2162.498177]  [] ? dequeue_task_fair+0x2cd/0x530
[ 2162.498182]  [] schedule+0x29/0x70
[ 2162.498187]  [] schedule_preempt_disabled+0xe/0x10
[ 2162.498192]  [] __mutex_lock_slowpath+0x112/0x1b0
[ 2162.498197]  [] mutex_lock+0x2a/0x50
[ 2162.498202]  [] bch_allocator_thread+0x10f/0xe20
[ 2162.498207]  [] ? bch_bucket_add_unused+0xe0/0xe0
[ 2162.498211]  [] kthread+0xc0/0xd0
[ 2162.498215]  [] ? flush_kthread_worker+0xb0/0xb0
[ 2162.498220]  [] ret_from_fork+0x7c/0xb0
[ 2162.498224]  [] ? flush_kthread_worker+0xb0/0xb0
[ 2162.498228] INFO: task bcache_writebac:1771 blocked for more than
120 seconds.
[ 2162.498233] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[ 2162.498237] bcache_writebac D  0  1771  2 0x
[ 2162.498241]  88197d2adc58 0046 88197d2add48
811269e2
[ 2162.498245]  882f891d8000 88197d2adfd8 88197d2adfd8
88197d2adfd8
[ 2162.498250]  882f81a91990 882f891d8000 88197d2adc68
882f89550ba0
[ 2162.498254] Call Trace:
[ 2162.498264]  [] ? __alloc_pages_nodemask+0x152/0x9b0
[ 2162.498269]  [] schedule+0x29/0x70
[ 2162.498274]  [] schedule_timeout+0x1e5/0x250
[ 2162.498280]  [] ? try_to_wake_up+0x24f/0x2b0
[ 2162.498285]  [] __down_common+0x8e/0xe3
[ 2162.498293]  [] ? alloc_pages_current+0x90/

Re: bcache: Fix a writeback performance regression

2013-08-28 Thread kernel neophyte
On Wed, Aug 28, 2013 at 11:38 AM, Stefan Priebe - Profihost AG
 wrote:
> I don't had one for a few days. Which kernel so you use?

3.10 kernel with all of kent's stable patches and perf patches..

and all defaults except for:

echo writeback > /sys/block/bcache0/bcache/cache_mode
echo 0 > /sys/block/bcache0/bcache/sequential_cutoff
echo 0 > 
/sys/fs/bcache/1e26e5ca-5cb9-4c5a-ad7e-3c787524a9dc/congested_read_threshold_us
echo 0 > 
/sys/fs/bcache/1e26e5ca-5cb9-4c5a-ad7e-3c787524a9dc/congested_write_threshold_us

-Neo

>
> Stefan
>
> This mail was sent with my iPhone.
>
> Am 28.08.2013 um 20:12 schrieb kernel neophyte :
>
>> On Tue, Aug 27, 2013 at 11:05 PM, Kent Overstreet  wrote:
>>> On Tue, Aug 27, 2013 at 08:37:54PM -0700, kernel neophyte wrote:
 Hi Kent,

 I am still seeing deadlock:
>>>
>>> Seeing blk_throtl_bio (what asshole misspells words in their function
>>> names, so when you go to grep for them you grep for the wrong thing?) in
>>> the backtrace - is that what it takes to hit it?
>>
>> I don't think so,
>> I had another deadlock:
>>
>> [ 1179.193990] bio: create slab  at 2
>> [ 1179.194491] bcache: run_cache_set() invalidating existing data
>> [ 1179.212308] bcache: register_cache() registered cache device rxd1
>> [ 1203.846220] bcache: register_bdev() registered backing device sda3
>> [ 1227.780088] bcache: bch_cached_dev_attach() Caching sda3 as bcache0
>> on set a079dda3-27fd-4b1c-a616-d46110bfae6c
>> [ 2162.497950] INFO: task kworker/0:0:4 blocked for more than 120 seconds.
>> [ 2162.497961] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
>> disables this message.
>> [ 2162.497967] kworker/0:0 D  0 4  2 
>> 0x
>> [ 2162.497982] Workqueue: events write_dirty_finish
>> [ 2162.497986]  882fa6ac9b30 0046 81511b8c
>> 815137c0
>> [ 2162.497992]  882fa6a84cb0 882fa6ac9fd8 882fa6ac9fd8
>> 882fa6ac9fd8
>> [ 2162.497996]  882f78139990 882fa6a84cb0 815117a1
>> 882fa6a84cb0
>> [ 2162.498001] Call Trace:
>> [ 2162.498010]  [] ? bch_btree_map_nodes_recurse+0x6c/0x170
>> [ 2162.498015]  [] ? bch_btree_insert_node+0x2f0/0x2f0
>> [ 2162.498019]  [] ? bch_btree_node_get+0x71/0x280
>> [ 2162.498028]  [] schedule+0x29/0x70
>> [ 2162.498034]  [] rwsem_down_read_failed+0x9d/0xe5
>> [ 2162.498043]  [] call_rwsem_down_read_failed+0x14/0x30
>> [ 2162.498048]  [] ? down_read+0x24/0x2b
>> [ 2162.498052]  [] __bch_btree_map_nodes+0xe5/0x1c0
>> [ 2162.498057]  [] ? bch_btree_insert_node+0x2f0/0x2f0
>> [ 2162.498066]  [] ? update_curr+0x141/0x1f0
>> [ 2162.498072]  [] ? dirty_io_destructor+0xe/0x10
>> [ 2162.498076]  [] bch_btree_insert+0xb4/0x120
>> [ 2162.498082]  [] write_dirty_finish+0x1e3/0x270
>> [ 2162.498087]  [] ? pick_next_task_fair+0x6b/0x150
>> [ 2162.498093]  [] ? dequeue_task+0x93/0xb0
>> [ 2162.498100]  [] process_one_work+0x174/0x490
>> [ 2162.498105]  [] worker_thread+0x11b/0x370
>> [ 2162.498110]  [] ? manage_workers.isra.23+0x2d0/0x2d0
>> [ 2162.498115]  [] kthread+0xc0/0xd0
>> [ 2162.498120]  [] ? flush_kthread_worker+0xb0/0xb0
>> [ 2162.498127]  [] ret_from_fork+0x7c/0xb0
>> [ 2162.498131]  [] ? flush_kthread_worker+0xb0/0xb0
>> [ 2162.498145] INFO: task bcache_allocato:1766 blocked for more than
>> 120 seconds.
>> [ 2162.498149] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
>> disables this message.
>> [ 2162.498154] bcache_allocato D 0001 0  1766  2 
>> 0x
>> [ 2162.498158]  8822630f3d88 0046 8822630f3da8
>> 810808ad
>> [ 2162.498163]  882f891de640 8822630f3fd8 8822630f3fd8
>> 8822630f3fd8
>> [ 2162.498167]  882f81a91990 882f891de640 882f88b33320
>> 8801fad20d98
>> [ 2162.498172] Call Trace:
>> [ 2162.498177]  [] ? dequeue_task_fair+0x2cd/0x530
>> [ 2162.498182]  [] schedule+0x29/0x70
>> [ 2162.498187]  [] schedule_preempt_disabled+0xe/0x10
>> [ 2162.498192]  [] __mutex_lock_slowpath+0x112/0x1b0
>> [ 2162.498197]  [] mutex_lock+0x2a/0x50
>> [ 2162.498202]  [] bch_allocator_thread+0x10f/0xe20
>> [ 2162.498207]  [] ? bch_bucket_add_unused+0xe0/0xe0
>> [ 2162.498211]  [] kthread+0xc0/0xd0
>> [ 2162.498215]  [] ? flush_kthread_worker+0xb0/0xb0
>> [ 2162.498220]  [] ret_from_fork+0x7c/0xb0
>> [ 2162.498224]  [] ? flush_kthread_worker+0xb0/0xb0
>> [ 2162.498228] INFO: task bcache_writebac:1771 blocked for more than
>> 120 seconds.
>> [ 2162.498233] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
>> disables this message.
>> [ 2162.498237] bcache_writebac D  0  1771  2 
>> 0x
>> [ 2162.498241]  88197d2adc58 0046 88197d2add48
>> 811269e2
>> [ 2162.498245]  882f891d8000 88197d2adfd8 88197d2adfd8
>> 88197d2adfd8
>> [ 2162.498250]  882f81a91990 882f891d8000 88197d2adc68
>> 882f89550ba0
>> [ 2162.498254] Call Trace:
>> [ 2162.498264]  [] ? __alloc_pages_nodemask+0x152/0x9b0
>> [ 2162.498269]  [] schedule+0

Re: bcache: Fix a writeback performance regression

2013-08-28 Thread Stefan Priebe - Profihost AG
I don't had one for a few days. Which kernel so you use?

Stefan

This mail was sent with my iPhone.

Am 28.08.2013 um 20:12 schrieb kernel neophyte :

> On Tue, Aug 27, 2013 at 11:05 PM, Kent Overstreet  wrote:
>> On Tue, Aug 27, 2013 at 08:37:54PM -0700, kernel neophyte wrote:
>>> Hi Kent,
>>> 
>>> I am still seeing deadlock:
>> 
>> Seeing blk_throtl_bio (what asshole misspells words in their function
>> names, so when you go to grep for them you grep for the wrong thing?) in
>> the backtrace - is that what it takes to hit it?
> 
> I don't think so,
> I had another deadlock:
> 
> [ 1179.193990] bio: create slab  at 2
> [ 1179.194491] bcache: run_cache_set() invalidating existing data
> [ 1179.212308] bcache: register_cache() registered cache device rxd1
> [ 1203.846220] bcache: register_bdev() registered backing device sda3
> [ 1227.780088] bcache: bch_cached_dev_attach() Caching sda3 as bcache0
> on set a079dda3-27fd-4b1c-a616-d46110bfae6c
> [ 2162.497950] INFO: task kworker/0:0:4 blocked for more than 120 seconds.
> [ 2162.497961] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
> disables this message.
> [ 2162.497967] kworker/0:0 D  0 4  2 
> 0x
> [ 2162.497982] Workqueue: events write_dirty_finish
> [ 2162.497986]  882fa6ac9b30 0046 81511b8c
> 815137c0
> [ 2162.497992]  882fa6a84cb0 882fa6ac9fd8 882fa6ac9fd8
> 882fa6ac9fd8
> [ 2162.497996]  882f78139990 882fa6a84cb0 815117a1
> 882fa6a84cb0
> [ 2162.498001] Call Trace:
> [ 2162.498010]  [] ? bch_btree_map_nodes_recurse+0x6c/0x170
> [ 2162.498015]  [] ? bch_btree_insert_node+0x2f0/0x2f0
> [ 2162.498019]  [] ? bch_btree_node_get+0x71/0x280
> [ 2162.498028]  [] schedule+0x29/0x70
> [ 2162.498034]  [] rwsem_down_read_failed+0x9d/0xe5
> [ 2162.498043]  [] call_rwsem_down_read_failed+0x14/0x30
> [ 2162.498048]  [] ? down_read+0x24/0x2b
> [ 2162.498052]  [] __bch_btree_map_nodes+0xe5/0x1c0
> [ 2162.498057]  [] ? bch_btree_insert_node+0x2f0/0x2f0
> [ 2162.498066]  [] ? update_curr+0x141/0x1f0
> [ 2162.498072]  [] ? dirty_io_destructor+0xe/0x10
> [ 2162.498076]  [] bch_btree_insert+0xb4/0x120
> [ 2162.498082]  [] write_dirty_finish+0x1e3/0x270
> [ 2162.498087]  [] ? pick_next_task_fair+0x6b/0x150
> [ 2162.498093]  [] ? dequeue_task+0x93/0xb0
> [ 2162.498100]  [] process_one_work+0x174/0x490
> [ 2162.498105]  [] worker_thread+0x11b/0x370
> [ 2162.498110]  [] ? manage_workers.isra.23+0x2d0/0x2d0
> [ 2162.498115]  [] kthread+0xc0/0xd0
> [ 2162.498120]  [] ? flush_kthread_worker+0xb0/0xb0
> [ 2162.498127]  [] ret_from_fork+0x7c/0xb0
> [ 2162.498131]  [] ? flush_kthread_worker+0xb0/0xb0
> [ 2162.498145] INFO: task bcache_allocato:1766 blocked for more than
> 120 seconds.
> [ 2162.498149] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
> disables this message.
> [ 2162.498154] bcache_allocato D 0001 0  1766  2 
> 0x
> [ 2162.498158]  8822630f3d88 0046 8822630f3da8
> 810808ad
> [ 2162.498163]  882f891de640 8822630f3fd8 8822630f3fd8
> 8822630f3fd8
> [ 2162.498167]  882f81a91990 882f891de640 882f88b33320
> 8801fad20d98
> [ 2162.498172] Call Trace:
> [ 2162.498177]  [] ? dequeue_task_fair+0x2cd/0x530
> [ 2162.498182]  [] schedule+0x29/0x70
> [ 2162.498187]  [] schedule_preempt_disabled+0xe/0x10
> [ 2162.498192]  [] __mutex_lock_slowpath+0x112/0x1b0
> [ 2162.498197]  [] mutex_lock+0x2a/0x50
> [ 2162.498202]  [] bch_allocator_thread+0x10f/0xe20
> [ 2162.498207]  [] ? bch_bucket_add_unused+0xe0/0xe0
> [ 2162.498211]  [] kthread+0xc0/0xd0
> [ 2162.498215]  [] ? flush_kthread_worker+0xb0/0xb0
> [ 2162.498220]  [] ret_from_fork+0x7c/0xb0
> [ 2162.498224]  [] ? flush_kthread_worker+0xb0/0xb0
> [ 2162.498228] INFO: task bcache_writebac:1771 blocked for more than
> 120 seconds.
> [ 2162.498233] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
> disables this message.
> [ 2162.498237] bcache_writebac D  0  1771  2 
> 0x
> [ 2162.498241]  88197d2adc58 0046 88197d2add48
> 811269e2
> [ 2162.498245]  882f891d8000 88197d2adfd8 88197d2adfd8
> 88197d2adfd8
> [ 2162.498250]  882f81a91990 882f891d8000 88197d2adc68
> 882f89550ba0
> [ 2162.498254] Call Trace:
> [ 2162.498264]  [] ? __alloc_pages_nodemask+0x152/0x9b0
> [ 2162.498269]  [] schedule+0x29/0x70
> [ 2162.498274]  [] schedule_timeout+0x1e5/0x250
> [ 2162.498280]  [] ? try_to_wake_up+0x24f/0x2b0
> [ 2162.498285]  [] __down_common+0x8e/0xe3
> [ 2162.498293]  [] ? alloc_pages_current+0x90/0x170
> [ 2162.498298]  [] __down+0x1d/0x1f
> [ 2162.498304]  [] down+0x41/0x50
> [ 2162.498310]  [] bch_writeback_thread+0x35d/0x7f0
> [ 2162.498315]  [] ? idle_balance+0xeb/0x150
> [ 2162.498321]  [] ? write_dirty+0xc0/0xc0
> [ 2162.498325]  [] kthread+0xc0/0xd0
> [ 2162.498329]  [] ? flush_kthread_worker+0xb0/0xb0
> [ 2162.498334]  [] ret_from_fork+0x7c/0xb

Re: bcache: Fix a writeback performance regression

2013-08-28 Thread kernel neophyte
On Tue, Aug 27, 2013 at 11:05 PM, Kent Overstreet  wrote:
> On Tue, Aug 27, 2013 at 08:37:54PM -0700, kernel neophyte wrote:
>> Hi Kent,
>>
>> I am still seeing deadlock:
>
> Seeing blk_throtl_bio (what asshole misspells words in their function
> names, so when you go to grep for them you grep for the wrong thing?) in
> the backtrace - is that what it takes to hit it?
>

I don't think so,
I had another deadlock:

[ 1179.193990] bio: create slab  at 2
[ 1179.194491] bcache: run_cache_set() invalidating existing data
[ 1179.212308] bcache: register_cache() registered cache device rxd1
[ 1203.846220] bcache: register_bdev() registered backing device sda3
[ 1227.780088] bcache: bch_cached_dev_attach() Caching sda3 as bcache0
on set a079dda3-27fd-4b1c-a616-d46110bfae6c
[ 2162.497950] INFO: task kworker/0:0:4 blocked for more than 120 seconds.
[ 2162.497961] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[ 2162.497967] kworker/0:0 D  0 4  2 0x
[ 2162.497982] Workqueue: events write_dirty_finish
[ 2162.497986]  882fa6ac9b30 0046 81511b8c
815137c0
[ 2162.497992]  882fa6a84cb0 882fa6ac9fd8 882fa6ac9fd8
882fa6ac9fd8
[ 2162.497996]  882f78139990 882fa6a84cb0 815117a1
882fa6a84cb0
[ 2162.498001] Call Trace:
[ 2162.498010]  [] ? bch_btree_map_nodes_recurse+0x6c/0x170
[ 2162.498015]  [] ? bch_btree_insert_node+0x2f0/0x2f0
[ 2162.498019]  [] ? bch_btree_node_get+0x71/0x280
[ 2162.498028]  [] schedule+0x29/0x70
[ 2162.498034]  [] rwsem_down_read_failed+0x9d/0xe5
[ 2162.498043]  [] call_rwsem_down_read_failed+0x14/0x30
[ 2162.498048]  [] ? down_read+0x24/0x2b
[ 2162.498052]  [] __bch_btree_map_nodes+0xe5/0x1c0
[ 2162.498057]  [] ? bch_btree_insert_node+0x2f0/0x2f0
[ 2162.498066]  [] ? update_curr+0x141/0x1f0
[ 2162.498072]  [] ? dirty_io_destructor+0xe/0x10
[ 2162.498076]  [] bch_btree_insert+0xb4/0x120
[ 2162.498082]  [] write_dirty_finish+0x1e3/0x270
[ 2162.498087]  [] ? pick_next_task_fair+0x6b/0x150
[ 2162.498093]  [] ? dequeue_task+0x93/0xb0
[ 2162.498100]  [] process_one_work+0x174/0x490
[ 2162.498105]  [] worker_thread+0x11b/0x370
[ 2162.498110]  [] ? manage_workers.isra.23+0x2d0/0x2d0
[ 2162.498115]  [] kthread+0xc0/0xd0
[ 2162.498120]  [] ? flush_kthread_worker+0xb0/0xb0
[ 2162.498127]  [] ret_from_fork+0x7c/0xb0
[ 2162.498131]  [] ? flush_kthread_worker+0xb0/0xb0
[ 2162.498145] INFO: task bcache_allocato:1766 blocked for more than
120 seconds.
[ 2162.498149] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[ 2162.498154] bcache_allocato D 0001 0  1766  2 0x
[ 2162.498158]  8822630f3d88 0046 8822630f3da8
810808ad
[ 2162.498163]  882f891de640 8822630f3fd8 8822630f3fd8
8822630f3fd8
[ 2162.498167]  882f81a91990 882f891de640 882f88b33320
8801fad20d98
[ 2162.498172] Call Trace:
[ 2162.498177]  [] ? dequeue_task_fair+0x2cd/0x530
[ 2162.498182]  [] schedule+0x29/0x70
[ 2162.498187]  [] schedule_preempt_disabled+0xe/0x10
[ 2162.498192]  [] __mutex_lock_slowpath+0x112/0x1b0
[ 2162.498197]  [] mutex_lock+0x2a/0x50
[ 2162.498202]  [] bch_allocator_thread+0x10f/0xe20
[ 2162.498207]  [] ? bch_bucket_add_unused+0xe0/0xe0
[ 2162.498211]  [] kthread+0xc0/0xd0
[ 2162.498215]  [] ? flush_kthread_worker+0xb0/0xb0
[ 2162.498220]  [] ret_from_fork+0x7c/0xb0
[ 2162.498224]  [] ? flush_kthread_worker+0xb0/0xb0
[ 2162.498228] INFO: task bcache_writebac:1771 blocked for more than
120 seconds.
[ 2162.498233] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[ 2162.498237] bcache_writebac D  0  1771  2 0x
[ 2162.498241]  88197d2adc58 0046 88197d2add48
811269e2
[ 2162.498245]  882f891d8000 88197d2adfd8 88197d2adfd8
88197d2adfd8
[ 2162.498250]  882f81a91990 882f891d8000 88197d2adc68
882f89550ba0
[ 2162.498254] Call Trace:
[ 2162.498264]  [] ? __alloc_pages_nodemask+0x152/0x9b0
[ 2162.498269]  [] schedule+0x29/0x70
[ 2162.498274]  [] schedule_timeout+0x1e5/0x250
[ 2162.498280]  [] ? try_to_wake_up+0x24f/0x2b0
[ 2162.498285]  [] __down_common+0x8e/0xe3
[ 2162.498293]  [] ? alloc_pages_current+0x90/0x170
[ 2162.498298]  [] __down+0x1d/0x1f
[ 2162.498304]  [] down+0x41/0x50
[ 2162.498310]  [] bch_writeback_thread+0x35d/0x7f0
[ 2162.498315]  [] ? idle_balance+0xeb/0x150
[ 2162.498321]  [] ? write_dirty+0xc0/0xc0
[ 2162.498325]  [] kthread+0xc0/0xd0
[ 2162.498329]  [] ? flush_kthread_worker+0xb0/0xb0
[ 2162.498334]  [] ret_from_fork+0x7c/0xb0
[ 2162.498338]  [] ? flush_kthread_worker+0xb0/0xb0
[ 2162.498342] INFO: task kworker/0:2:1789 blocked for more than 120 seconds.
[ 2162.498346] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[ 2162.498351] kworker/0:2 D 81813a60 0  1789  2 0x
[ 2162.498357] Workqueue: events write_dirty_finish
[ 21

Re: bcache: Fix a writeback performance regression

2013-08-27 Thread Kent Overstreet
On Tue, Aug 27, 2013 at 08:37:54PM -0700, kernel neophyte wrote:
> Hi Kent,
> 
> I am still seeing deadlock:

Seeing blk_throtl_bio (what asshole misspells words in their function
names, so when you go to grep for them you grep for the wrong thing?) in
the backtrace - is that what it takes to hit it?

> 
> [  930.169234] bcache: bch_cached_dev_attach() Caching sda3 as bcache0
> on set 06496904-10ca-489a-ae75-68c6a07d3db1
> [ 2522.956188] INFO: task bcache_writebac:2058 blocked for more than
> 120 seconds.
> [ 2522.956199] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
> disables this message.
> [ 2522.956205] bcache_writebac D 81813a60 0  2058  2 
> 0x
> [ 2522.956212]  881c39c1fcd8 0046 0001
> 0001
> [ 2522.956218]  882f8b56b320 881c39c1ffd8 881c39c1ffd8
> 881c39c1ffd8
> [ 2522.956222]  882fa6aeb320 882f8b56b320 882f8b56b320
> 882f8b56b320
> [ 2522.956227] Call Trace:
> [ 2522.956241]  [] schedule+0x29/0x70
> [ 2522.956247]  [] rwsem_down_write_failed+0xf5/0x1a0
> [ 2522.956255]  [] call_rwsem_down_write_failed+0x13/0x20
> [ 2522.956261]  [] ? down_write+0x31/0x40
> [ 2522.956269]  [] bch_writeback_thread+0x62/0x7f0
> [ 2522.956278]  [] ? idle_balance+0xeb/0x150
> [ 2522.956284]  [] ? write_dirty+0xc0/0xc0
> [ 2522.956290]  [] kthread+0xc0/0xd0
> [ 2522.956294]  [] ? flush_kthread_worker+0xb0/0xb0
> [ 2522.956302]  [] ret_from_fork+0x7c/0xb0
> [ 2522.956306]  [] ? flush_kthread_worker+0xb0/0xb0
> [ 2522.956313] INFO: task kworker/u64:1:2433 blocked for more than 120 
> seconds.
> [ 2522.956317] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
> disables this message.
> [ 2522.956322] kworker/u64:1   D 81813a60 0  2433  2 
> 0x
> [ 2522.956332] Workqueue: writeback bdi_writeback_workfn (flush-252:0)
> [ 2522.956336]  882f899d1560 0046 81120635
> 0100
> [ 2522.956340]  88219b488000 882f899d1fd8 882f899d1fd8
> 882f899d1fd8
> [ 2522.956345]  882fa6aeb320 88219b488000 8131ad55
> 88219b488000
> [ 2522.956349] Call Trace:
> [ 2522.956359]  [] ? mempool_alloc_slab+0x15/0x20
> [ 2522.956367]  [] ? blk_throtl_bio+0x3b5/0x540
> [ 2522.956372]  [] schedule+0x29/0x70
> [ 2522.956378]  [] rwsem_down_read_failed+0x9d/0xe5
> [ 2522.956383]  [] call_rwsem_down_read_failed+0x14/0x30
> [ 2522.956388]  [] ? down_read+0x24/0x2b
> [ 2522.956394]  [] cached_dev_make_request+0x752/0xf00
> [ 2522.956400]  [] ? generic_make_request_checks+0x1e8/0x3a0
> [ 2522.956405]  [] generic_make_request+0xca/0x100
> [ 2522.956409]  [] submit_bio+0x79/0x160
> [ 2522.956417]  [] ? bio_alloc_bioset+0xa0/0x1d0
> [ 2522.956421]  [] _submit_bh+0x13f/0x200
> [ 2522.956425]  [] submit_bh+0x10/0x20
> [ 2522.956430]  [] __block_write_full_page+0x1d8/0x360
> [ 2522.956439]  [] ? scsi_request_fn+0xbb/0x530
> [ 2522.956444]  [] ? end_buffer_async_read+0x130/0x130
> [ 2522.956449]  [] ? I_BDEV+0x10/0x10
> [ 2522.956454]  [] ? I_BDEV+0x10/0x10
> [ 2522.956459]  [] block_write_full_page_endio+0xca/0x100
> [ 2522.956464]  [] block_write_full_page+0x15/0x20
> [ 2522.956469]  [] blkdev_writepage+0x18/0x20
> [ 2522.956473]  [] __writepage+0x17/0x40
> [ 2522.956477]  [] write_cache_pages+0x20e/0x460
> [ 2522.956481]  [] ? set_page_dirty_lock+0x60/0x60
> [ 2522.956486]  [] ? __set_page_dirty+0x71/0xc0
> [ 2522.956490]  [] generic_writepages+0x4a/0x70
> [ 2522.956495]  [] do_writepages+0x20/0x40
> [ 2522.956501]  [] __writeback_single_inode+0x45/0x280
> [ 2522.956507]  [] ? __alloc_pages_nodemask+0x152/0x9b0
> [ 2522.956513]  [] writeback_sb_inodes+0x19d/0x3c0
> [ 2522.956518]  [] __writeback_inodes_wb+0x9e/0xd0
> [ 2522.956523]  [] wb_writeback+0x27b/0x320
> [ 2522.956529]  [] ? get_nr_dirty_inodes+0x58/0x80
> [ 2522.956534]  [] wb_check_old_data_flush+0x9f/0xb0
> [ 2522.956539]  [] wb_do_writeback+0x151/0x1d0
> [ 2522.956547]  [] ? set_worker_desc+0x6f/0x80
> [ 2522.956552]  [] bdi_writeback_workfn+0x7a/0x200
> [ 2522.956557]  [] process_one_work+0x174/0x490
> [ 2522.956562]  [] worker_thread+0x11b/0x370
> [ 2522.956567]  [] ? manage_workers.isra.23+0x2d0/0x2d0
> [ 2522.956571]  [] kthread+0xc0/0xd0
> [ 2522.956575]  [] ? flush_kthread_worker+0xb0/0xb0
> [ 2522.956581]  [] ret_from_fork+0x7c/0xb0
> [ 2522.956585]  [] ? flush_kthread_worker+0xb0/0xb0
> [ 2522.956588] INFO: task iozone:2435 blocked for more than 120 seconds.
> [ 2522.956593] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
> disables this message.
> [ 2522.956597] iozone  D 0001 0  2435   1545 
> 0x
> [ 2522.956602]  882a75671978 0082 882a75671938
> 81301d7a
> [ 2522.956606]  882f88dc1990 882a75671fd8 882a75671fd8
> 882a75671fd8
> [ 2522.956611]  882f8ec91990 882f88dc1990 882a75671978
> 882fbf2539f8
> [ 2522.956615] Call Trace:
> [ 2522.956619]  [] ? generic_make_request+0xca/0x100
> [ 2522.956625]  [] schedule+

Re: bcache: Fix a writeback performance regression

2013-08-27 Thread kernel neophyte
Hi Kent,

I am still seeing deadlock:

[  930.169234] bcache: bch_cached_dev_attach() Caching sda3 as bcache0
on set 06496904-10ca-489a-ae75-68c6a07d3db1
[ 2522.956188] INFO: task bcache_writebac:2058 blocked for more than
120 seconds.
[ 2522.956199] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[ 2522.956205] bcache_writebac D 81813a60 0  2058  2 0x
[ 2522.956212]  881c39c1fcd8 0046 0001
0001
[ 2522.956218]  882f8b56b320 881c39c1ffd8 881c39c1ffd8
881c39c1ffd8
[ 2522.956222]  882fa6aeb320 882f8b56b320 882f8b56b320
882f8b56b320
[ 2522.956227] Call Trace:
[ 2522.956241]  [] schedule+0x29/0x70
[ 2522.956247]  [] rwsem_down_write_failed+0xf5/0x1a0
[ 2522.956255]  [] call_rwsem_down_write_failed+0x13/0x20
[ 2522.956261]  [] ? down_write+0x31/0x40
[ 2522.956269]  [] bch_writeback_thread+0x62/0x7f0
[ 2522.956278]  [] ? idle_balance+0xeb/0x150
[ 2522.956284]  [] ? write_dirty+0xc0/0xc0
[ 2522.956290]  [] kthread+0xc0/0xd0
[ 2522.956294]  [] ? flush_kthread_worker+0xb0/0xb0
[ 2522.956302]  [] ret_from_fork+0x7c/0xb0
[ 2522.956306]  [] ? flush_kthread_worker+0xb0/0xb0
[ 2522.956313] INFO: task kworker/u64:1:2433 blocked for more than 120 seconds.
[ 2522.956317] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[ 2522.956322] kworker/u64:1   D 81813a60 0  2433  2 0x
[ 2522.956332] Workqueue: writeback bdi_writeback_workfn (flush-252:0)
[ 2522.956336]  882f899d1560 0046 81120635
0100
[ 2522.956340]  88219b488000 882f899d1fd8 882f899d1fd8
882f899d1fd8
[ 2522.956345]  882fa6aeb320 88219b488000 8131ad55
88219b488000
[ 2522.956349] Call Trace:
[ 2522.956359]  [] ? mempool_alloc_slab+0x15/0x20
[ 2522.956367]  [] ? blk_throtl_bio+0x3b5/0x540
[ 2522.956372]  [] schedule+0x29/0x70
[ 2522.956378]  [] rwsem_down_read_failed+0x9d/0xe5
[ 2522.956383]  [] call_rwsem_down_read_failed+0x14/0x30
[ 2522.956388]  [] ? down_read+0x24/0x2b
[ 2522.956394]  [] cached_dev_make_request+0x752/0xf00
[ 2522.956400]  [] ? generic_make_request_checks+0x1e8/0x3a0
[ 2522.956405]  [] generic_make_request+0xca/0x100
[ 2522.956409]  [] submit_bio+0x79/0x160
[ 2522.956417]  [] ? bio_alloc_bioset+0xa0/0x1d0
[ 2522.956421]  [] _submit_bh+0x13f/0x200
[ 2522.956425]  [] submit_bh+0x10/0x20
[ 2522.956430]  [] __block_write_full_page+0x1d8/0x360
[ 2522.956439]  [] ? scsi_request_fn+0xbb/0x530
[ 2522.956444]  [] ? end_buffer_async_read+0x130/0x130
[ 2522.956449]  [] ? I_BDEV+0x10/0x10
[ 2522.956454]  [] ? I_BDEV+0x10/0x10
[ 2522.956459]  [] block_write_full_page_endio+0xca/0x100
[ 2522.956464]  [] block_write_full_page+0x15/0x20
[ 2522.956469]  [] blkdev_writepage+0x18/0x20
[ 2522.956473]  [] __writepage+0x17/0x40
[ 2522.956477]  [] write_cache_pages+0x20e/0x460
[ 2522.956481]  [] ? set_page_dirty_lock+0x60/0x60
[ 2522.956486]  [] ? __set_page_dirty+0x71/0xc0
[ 2522.956490]  [] generic_writepages+0x4a/0x70
[ 2522.956495]  [] do_writepages+0x20/0x40
[ 2522.956501]  [] __writeback_single_inode+0x45/0x280
[ 2522.956507]  [] ? __alloc_pages_nodemask+0x152/0x9b0
[ 2522.956513]  [] writeback_sb_inodes+0x19d/0x3c0
[ 2522.956518]  [] __writeback_inodes_wb+0x9e/0xd0
[ 2522.956523]  [] wb_writeback+0x27b/0x320
[ 2522.956529]  [] ? get_nr_dirty_inodes+0x58/0x80
[ 2522.956534]  [] wb_check_old_data_flush+0x9f/0xb0
[ 2522.956539]  [] wb_do_writeback+0x151/0x1d0
[ 2522.956547]  [] ? set_worker_desc+0x6f/0x80
[ 2522.956552]  [] bdi_writeback_workfn+0x7a/0x200
[ 2522.956557]  [] process_one_work+0x174/0x490
[ 2522.956562]  [] worker_thread+0x11b/0x370
[ 2522.956567]  [] ? manage_workers.isra.23+0x2d0/0x2d0
[ 2522.956571]  [] kthread+0xc0/0xd0
[ 2522.956575]  [] ? flush_kthread_worker+0xb0/0xb0
[ 2522.956581]  [] ret_from_fork+0x7c/0xb0
[ 2522.956585]  [] ? flush_kthread_worker+0xb0/0xb0
[ 2522.956588] INFO: task iozone:2435 blocked for more than 120 seconds.
[ 2522.956593] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[ 2522.956597] iozone  D 0001 0  2435   1545 0x
[ 2522.956602]  882a75671978 0082 882a75671938
81301d7a
[ 2522.956606]  882f88dc1990 882a75671fd8 882a75671fd8
882a75671fd8
[ 2522.956611]  882f8ec91990 882f88dc1990 882a75671978
882fbf2539f8
[ 2522.956615] Call Trace:
[ 2522.956619]  [] ? generic_make_request+0xca/0x100
[ 2522.956625]  [] schedule+0x29/0x70
[ 2522.956630]  [] io_schedule+0x8f/0xd0
[ 2522.956636]  [] do_blockdev_direct_IO+0x1a7c/0x1fb0
[ 2522.956645]  [] ? ext2_get_blocks+0xa60/0xa60 [ext2]
[ 2522.956652]  [] __blockdev_direct_IO+0x55/0x60
[ 2522.956658]  [] ? ext2_get_blocks+0xa60/0xa60 [ext2]
[ 2522.956662]  [] ? update_curr+0x141/0x1f0
[ 2522.956669]  [] ext2_direct_IO+0x79/0xe0 [ext2]
[ 2522.956674]  [] ? ext2_get_blocks+0xa60/0xa60 [ext2]
[ 2522.956680]  [] ? current_fs_time+0x16/0x60

Re: bcache: Fix a writeback performance regression

2013-08-26 Thread Stefan Priebe

Hi Kent,

a new one under 3.10:

2013-08-26 21:05:30 INFO: task ceph-osd:8939 blocked for more than 
120 seconds.
2013-08-26 21:05:30 "echo 0 > 
/proc/sys/kernel/hung_task_timeout_secs" disables this message.

2013-08-26 21:05:30 ceph-osd D 8160f760 0 8939 1 0x
2013-08-26 21:05:30 880bd9a61820 0086 
880bd9a61fd8 00012c80
2013-08-26 21:05:30 880bd9a60010 00012c80 
00012c80 00012c80
2013-08-26 21:05:30 880bd9a61fd8 00012c80 
880bd7abb1c0 880c48dee380

2013-08-26 21:05:29 [] xfs_vm_readpages+0x18/0x20
2013-08-26 21:05:29 [] read_pages+0x43/0x100
2013-08-26 21:05:29 [] ? __page_cache_alloc+0x9f/0xc0
2013-08-26 21:05:29 [] 
__do_page_cache_readahead+0x14b/0x160

2013-08-26 21:05:29 [] ra_submit+0x1c/0x20
2013-08-26 21:05:29 [] ondemand_readahead+0x115/0x240
2013-08-26 21:05:29 [] 
page_cache_sync_readahead+0x2e/0x40

2013-08-26 21:05:29 [] T.1003+0x33e/0x430
2013-08-26 21:05:29 [] 
generic_file_aio_read+0xe0/0x220

2013-08-26 21:05:29 [] xfs_file_aio_read+0x15a/0x2a0
2013-08-26 21:05:29 [] do_sync_read+0x7a/0xb0
2013-08-26 21:05:29 [] vfs_read+0xb1/0x130
2013-08-26 21:05:29 [] SyS_pread64+0x97/0xa0
2013-08-26 21:05:29 [] system_call_fastpath+0x16/0x1b
2013-08-26 21:05:29 INFO: task ceph-osd:8896 blocked for more than 
120 seconds.
2013-08-26 21:05:29 "echo 0 > 
/proc/sys/kernel/hung_task_timeout_secs" disables this message.

2013-08-26 21:05:29 ceph-osd D 8160f760 0 8896 1 0x
2013-08-26 21:05:29 880bd9a9dc28 0086 
880bd9a9dfd8 00012c80
2013-08-26 21:05:29 880bd9a9c010 00012c80 
00012c80 00012c80
2013-08-26 21:05:29 880bd9a9dfd8 00012c80 
880bd79298e0 880c48dd6380

2013-08-26 21:05:29 Call Trace:
2013-08-26 21:05:29 [] ? __lock_page+0x70/0x70
2013-08-26 21:05:28 "echo 0 > 
/proc/sys/kernel/hung_task_timeout_secs" disables this message.

2013-08-26 21:05:28 ceph-osd D 8160f760 0 8798 1 0x
2013-08-26 21:05:28 880bd998b740 0086 
880bd998bfd8 00012c80
2013-08-26 21:05:28 880bd998a010 00012c80 
00012c80 00012c80
2013-08-26 21:05:28 880bd998bfd8 00012c80 
880bd99c 880c48dd6380

2013-08-26 21:05:28 Call Trace:
2013-08-26 21:05:28 [] schedule+0x24/0x70
2013-08-26 21:05:28 [] 
rwsem_down_read_failed+0x9d/0xe5
2013-08-26 21:05:28 [] 
call_rwsem_down_read_failed+0x14/0x30

2013-08-26 21:05:28 [] ? down_read+0x12/0x20
2013-08-26 21:05:28 [] btree_read_async+0xa2/0x1a0 
[bcache]
2013-08-26 21:05:28 [] closure_queue+0x43/0x60 
[bcache]

2013-08-26 21:05:28 [] T.1050+0x63/0x70 [bcache]
2013-08-26 21:05:28 [] 
cached_dev_make_request+0x20a/0x350 [bcache]

2013-08-26 21:05:28 [] generic_make_request+0xc2/0x100
2013-08-26 21:05:28 [] submit_bio+0x67/0x130
2013-08-26 21:05:28 [] do_mpage_readpage+0x2a0/0x660
2013-08-26 21:05:28 [] ? 
__inc_zone_page_state+0x2e/0x30
2013-08-26 21:05:28 [] ? 
add_to_page_cache_locked+0x81/0x110

2013-08-26 21:05:28 [] mpage_readpages+0xfa/0x150
2013-08-26 21:05:28 [] ? 
xfs_get_blocks_direct+0x20/0x20
2013-08-26 21:05:28 [] ? 
xfs_get_blocks_direct+0x20/0x20
2013-08-26 21:05:28 [] ? 
__alloc_pages_nodemask+0x123/0x240

2013-08-26 21:05:27 [] ? __close_fd+0x43/0x80
2013-08-26 21:05:27 [] SyS_fdatasync+0xe/0x20
2013-08-26 21:05:27 [] system_call_fastpath+0x16/0x1b
2013-08-26 21:05:27 INFO: task ceph-osd:8746 blocked for more than 
120 seconds.
2013-08-26 21:05:27 "echo 0 > 
/proc/sys/kernel/hung_task_timeout_secs" disables this message.

2013-08-26 21:05:27 ceph-osd D 8160f760 0 8746 1 0x
2013-08-26 21:05:27 880c2dde1d38 0086 
880c2dde1fd8 00012c80
2013-08-26 21:05:27 880c2dde0010 00012c80 
00012c80 00012c80
2013-08-26 21:05:27 880c2dde1fd8 00012c80 
880c239eb1c0 880c48dd6380

2013-08-26 21:05:27 Call Trace:
2013-08-26 21:05:27 [] schedule+0x24/0x70
2013-08-26 21:05:27 [] schedule_timeout+0x16d/0x200
2013-08-26 21:05:27 [] ? 
__queue_delayed_work+0xaa/0x1a0
2013-08-26 21:05:27 [] ? 
try_to_grab_pending+0x109/0x190

2013-08-26 21:05:27 [] wait_for_completion+0x95/0x110
2013-08-26 21:05:27 [] ? try_to_wake_up+0x2a0/0x2a0
2013-08-26 21:05:27 [] ? bdi_queue_work+0x77/0xc0
2013-08-26 21:05:27 [] 
writeback_inodes_sb_nr+0x83/0xb0

2013-08-26 21:05:27 [] writeback_inodes_sb+0x5a/0x70
2013-08-26 21:05:27 [] __sync_filesystem+0x4a/0x50
2013-08-26 21:05:27 [] sync_filesystem+0x32/0x60
2013-08-26 21:05:27 [] SyS_syncfs+0x50/0x90
2013-08-26 21:05:27 [] system_call_fastpath+0x16/0x1b
2013-08-26 21:05:27 INFO: task ceph-osd:8798 blocked for more than 
120

Re: bcache: Fix a writeback performance regression

2013-08-22 Thread Kent Overstreet
On Thu, Aug 22, 2013 at 09:32:52AM +0200, Stefan Priebe - Profihost AG wrote:
> great!
> 
> Everything seems to work fine now! Except read_dirty always going to
> negative values after a reboot.

That one's fixed in 3.11 :)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: bcache: Fix a writeback performance regression

2013-08-22 Thread Stefan Priebe - Profihost AG
great!

Everything seems to work fine now! Except read_dirty always going to
negative values after a reboot.

Stefan

Am 22.08.2013 08:02, schrieb Kent Overstreet:
> On Thu, Aug 22, 2013 at 07:59:04AM +0200, Stefan Priebe wrote:
>>
>>> schedule_timeout() is not the same as
>>> schedule_timeout_interruptible().
>>
>> just search and replace? So i can try on my own.
> 
> The one in read_dirty(), line ~330
> --
> To unsubscribe from this list: send the line "unsubscribe linux-bcache" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: bcache: Fix a writeback performance regression

2013-08-21 Thread Kent Overstreet
On Thu, Aug 22, 2013 at 07:59:04AM +0200, Stefan Priebe wrote:
> 
> >schedule_timeout() is not the same as
> >schedule_timeout_interruptible().
> 
> just search and replace? So i can try on my own.

The one in read_dirty(), line ~330
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: bcache: Fix a writeback performance regression

2013-08-21 Thread Stefan Priebe
schedule+0x24/0x70
2013-08-22 06:28:43 []
schedule_preempt_disabled+0x9/0x10
2013-08-22 06:28:43 []
__mutex_lock_slowpath+0x194/0x240
2013-08-22 06:28:43 [] mutex_lock+0x1e/0x40
2013-08-22 06:28:43 []
bch_bucket_alloc_set+0x3b/0x80 [bcache]
2013-08-22 06:28:43 []
bch_alloc_sectors+0x12c/0x4e0 [bcache]
2013-08-22 06:28:43 [] ?
bch_generic_make_request_hack+0x25/0xc0 [bcache]
2013-08-22 06:28:43 [] ?
__bch_submit_bbio+0x73/0x80 [bcache]
2013-08-22 06:28:43 []
bch_insert_data_loop+0xf8/0x610 [bcache]
2013-08-22 06:28:43 [] ?
bch_get_congested+0x25/0x70 [bcache]
2013-08-22 06:28:43 []
bch_insert_data+0x1d/0x20 [bcache]
2013-08-22 06:28:43 [] closure_queue+0x43/0x60
[bcache]
2013-08-22 06:28:43 []
request_write+0x19e/0x380 [bcache]

Am 22.08.2013 01:47, schrieb Kent Overstreet:

On Tue, Aug 20, 2013 at 10:07:45AM +0200, Stefan Priebe - Profihost AG wrote:

Am 20.08.2013 10:01, schrieb Stefan Priebe - Profihost AG:

Am 20.08.2013 00:27, schrieb Kent Overstreet:

On Mon, Aug 19, 2013 at 12:09:24AM +0200, Stefan Priebe wrote:


Vanilla 3.10.7 + bcache: Fix a writeback performance regression

http://pastebin.com/raw.php?i=LXZk4cMH


Whoops, at first I thought this was the same bug as one I'd already been
chasing down that had been a harmless bug - turns out I didn't look
closely enough at the backtrace.

What happened is background writeback is deadlocking, because for some
reason the workqueue it's running out of is a singlethreaded workqueue,
so as soon as it decides to queue enough writeback bios that it has to
sleep on that semaphore (which often won't happen due to the PD
controller based ratelimiting) - boom, deadlock.

Here's the fixup patch I just tested and am applying:


Oh i'm now seeing very high CPU spikes of kworker... i don't see if i
remove bcache: Fix a writeback performance regression.


Are you able to reproduce it? I'm not having any luck reproducing it...
--
To unsubscribe from this list: send the line "unsubscribe linux-bcache" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line "unsubscribe linux-bcache" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: bcache: Fix a writeback performance regression

2013-08-21 Thread Kent Overstreet
 autoremove_wake_function+0x40/0x40
> 2013-08-22 06:28:43 [] ? submit_bh+0xb/0x10
> 2013-08-22 06:28:43 [] __wait_on_buffer+0x26/0x30
> 2013-08-22 06:28:43 [] __block_write_begin+0x383/0x530
> 2013-08-22 06:28:43 [] ?
> xfs_get_blocks_direct+0x20/0x20
> 2013-08-22 06:28:43 [] ?
> grab_cache_page_write_begin+0x9f/0xd0
> 2013-08-22 06:28:43 [] xfs_vm_write_begin+0x5f/0xd0
> 2013-08-22 06:28:43 []
> generic_perform_write+0xca/0x200
> 2013-08-22 06:28:43 []
> generic_file_buffered_write+0x5f/0x90
> 2013-08-22 06:28:43 []
> xfs_file_buffered_aio_write+0xf5/0x180
> 2013-08-22 06:28:43 [] xfs_file_aio_write+0xce/0x150
> 2013-08-22 06:28:43 [] ? path_put+0x1d/0x30
> 2013-08-22 06:28:43 [] do_sync_readv_writev+0x68/0xa0
> 2013-08-22 06:28:43 [] do_readv_writev+0xf2/0x2e0
> 2013-08-22 06:28:43 [] ?
> xfs_file_buffered_aio_write+0x180/0x180
> 2013-08-22 06:28:43 [] ?
> do_sync_readv_writev+0xa0/0xa0
> 2013-08-22 06:28:43 [] vfs_writev+0x3e/0x60
> 2013-08-22 06:28:43 [] SyS_writev+0x5a/0xc0
> 2013-08-22 06:28:43 [] ? SyS_lseek+0x53/0x80
> 2013-08-22 06:28:43 [] system_call_fastpath+0x16/0x1b
> 2013-08-22 06:28:43 INFO: task ceph-osd:24134 blocked for more
> than 120 seconds.
> 2013-08-22 06:28:43 "echo 0 >
> /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> 2013-08-22 06:28:43 ceph-osd D 8160f760 0 24134 1 0x0004
> 2013-08-22 06:28:43 880c23d31708 0082
> 880c23d31fd8 00012c80
> 2013-08-22 06:28:43 880c23d30010 00012c80
> 00012c80 00012c80
> 2013-08-22 06:28:43 880c23d31fd8 00012c80
> 880c361e 880c48dd4aa0
> 2013-08-22 06:28:43 Call Trace:
> 2013-08-22 06:28:43 [] schedule+0x24/0x70
> 2013-08-22 06:28:43 []
> schedule_preempt_disabled+0x9/0x10
> 2013-08-22 06:28:43 []
> __mutex_lock_slowpath+0x194/0x240
> 2013-08-22 06:28:43 [] mutex_lock+0x1e/0x40
> 2013-08-22 06:28:43 []
> bch_bucket_alloc_set+0x3b/0x80 [bcache]
> 2013-08-22 06:28:43 []
> bch_alloc_sectors+0x12c/0x4e0 [bcache]
> 2013-08-22 06:28:43 [] ?
> bch_generic_make_request_hack+0x25/0xc0 [bcache]
> 2013-08-22 06:28:43 [] ?
> __bch_submit_bbio+0x73/0x80 [bcache]
> 2013-08-22 06:28:43 []
> bch_insert_data_loop+0xf8/0x610 [bcache]
> 2013-08-22 06:28:43 [] ?
> bch_get_congested+0x25/0x70 [bcache]
> 2013-08-22 06:28:43 []
> bch_insert_data+0x1d/0x20 [bcache]
> 2013-08-22 06:28:43 [] closure_queue+0x43/0x60
> [bcache]
> 2013-08-22 06:28:43 []
> request_write+0x19e/0x380 [bcache]
> 
> Am 22.08.2013 01:47, schrieb Kent Overstreet:
> >On Tue, Aug 20, 2013 at 10:07:45AM +0200, Stefan Priebe - Profihost AG wrote:
> >>Am 20.08.2013 10:01, schrieb Stefan Priebe - Profihost AG:
> >>>Am 20.08.2013 00:27, schrieb Kent Overstreet:
> >>>>On Mon, Aug 19, 2013 at 12:09:24AM +0200, Stefan Priebe wrote:
> >>>>>
> >>>>>Vanilla 3.10.7 + bcache: Fix a writeback performance regression
> >>>>>
> >>>>>http://pastebin.com/raw.php?i=LXZk4cMH
> >>>>
> >>>>Whoops, at first I thought this was the same bug as one I'd already been
> >>>>chasing down that had been a harmless bug - turns out I didn't look
> >>>>closely enough at the backtrace.
> >>>>
> >>>>What happened is background writeback is deadlocking, because for some
> >>>>reason the workqueue it's running out of is a singlethreaded workqueue,
> >>>>so as soon as it decides to queue enough writeback bios that it has to
> >>>>sleep on that semaphore (which often won't happen due to the PD
> >>>>controller based ratelimiting) - boom, deadlock.
> >>>>
> >>>>Here's the fixup patch I just tested and am applying:
> >>
> >>Oh i'm now seeing very high CPU spikes of kworker... i don't see if i
> >>remove bcache: Fix a writeback performance regression.
> >
> >Are you able to reproduce it? I'm not having any luck reproducing it...
> >--
> >To unsubscribe from this list: send the line "unsubscribe linux-bcache" in
> >the body of a message to majord...@vger.kernel.org
> >More majordomo info at  http://vger.kernel.org/majordomo-info.html
> >
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: bcache: Fix a writeback performance regression

2013-08-21 Thread Stefan Priebe
eneric_make_request_hack+0x25/0xc0 [bcache]
2013-08-22 06:28:43 [] ? 
__bch_submit_bbio+0x73/0x80 [bcache]
2013-08-22 06:28:43 [] 
bch_insert_data_loop+0xf8/0x610 [bcache]
2013-08-22 06:28:43 [] ? 
bch_get_congested+0x25/0x70 [bcache]
2013-08-22 06:28:43 [] bch_insert_data+0x1d/0x20 
[bcache]
2013-08-22 06:28:43 [] closure_queue+0x43/0x60 
[bcache]
2013-08-22 06:28:43 [] request_write+0x19e/0x380 
[bcache]


Am 22.08.2013 01:47, schrieb Kent Overstreet:

On Tue, Aug 20, 2013 at 10:07:45AM +0200, Stefan Priebe - Profihost AG wrote:

Am 20.08.2013 10:01, schrieb Stefan Priebe - Profihost AG:

Am 20.08.2013 00:27, schrieb Kent Overstreet:

On Mon, Aug 19, 2013 at 12:09:24AM +0200, Stefan Priebe wrote:


Vanilla 3.10.7 + bcache: Fix a writeback performance regression

http://pastebin.com/raw.php?i=LXZk4cMH


Whoops, at first I thought this was the same bug as one I'd already been
chasing down that had been a harmless bug - turns out I didn't look
closely enough at the backtrace.

What happened is background writeback is deadlocking, because for some
reason the workqueue it's running out of is a singlethreaded workqueue,
so as soon as it decides to queue enough writeback bios that it has to
sleep on that semaphore (which often won't happen due to the PD
controller based ratelimiting) - boom, deadlock.

Here's the fixup patch I just tested and am applying:


Oh i'm now seeing very high CPU spikes of kworker... i don't see if i
remove bcache: Fix a writeback performance regression.


Are you able to reproduce it? I'm not having any luck reproducing it...
--
To unsubscribe from this list: send the line "unsubscribe linux-bcache" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: bcache: Fix a writeback performance regression

2013-08-21 Thread Stefan Priebe

Am 22.08.2013 01:47, schrieb Kent Overstreet:

On Tue, Aug 20, 2013 at 10:07:45AM +0200, Stefan Priebe - Profihost AG wrote:

Am 20.08.2013 10:01, schrieb Stefan Priebe - Profihost AG:

Am 20.08.2013 00:27, schrieb Kent Overstreet:

On Mon, Aug 19, 2013 at 12:09:24AM +0200, Stefan Priebe wrote:


Vanilla 3.10.7 + bcache: Fix a writeback performance regression

http://pastebin.com/raw.php?i=LXZk4cMH


Whoops, at first I thought this was the same bug as one I'd already been
chasing down that had been a harmless bug - turns out I didn't look
closely enough at the backtrace.

What happened is background writeback is deadlocking, because for some
reason the workqueue it's running out of is a singlethreaded workqueue,
so as soon as it decides to queue enough writeback bios that it has to
sleep on that semaphore (which often won't happen due to the PD
controller based ratelimiting) - boom, deadlock.

Here's the fixup patch I just tested and am applying:


Oh i'm now seeing very high CPU spikes of kworker... i don't see if i
remove bcache: Fix a writeback performance regression.


Are you able to reproduce it? I'm not having any luck reproducing it...


Hi,

yes but only on production system it seems it happens when there is a 
big bunch in dirty and having heavy 4K random I/O.


Stefan
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: bcache: Fix a writeback performance regression

2013-08-21 Thread Kent Overstreet
On Tue, Aug 20, 2013 at 10:07:45AM +0200, Stefan Priebe - Profihost AG wrote:
> Am 20.08.2013 10:01, schrieb Stefan Priebe - Profihost AG:
> > Am 20.08.2013 00:27, schrieb Kent Overstreet:
> >> On Mon, Aug 19, 2013 at 12:09:24AM +0200, Stefan Priebe wrote:
> >>>
> >>> Vanilla 3.10.7 + bcache: Fix a writeback performance regression
> >>>
> >>> http://pastebin.com/raw.php?i=LXZk4cMH
> >>
> >> Whoops, at first I thought this was the same bug as one I'd already been
> >> chasing down that had been a harmless bug - turns out I didn't look
> >> closely enough at the backtrace.
> >>
> >> What happened is background writeback is deadlocking, because for some
> >> reason the workqueue it's running out of is a singlethreaded workqueue,
> >> so as soon as it decides to queue enough writeback bios that it has to
> >> sleep on that semaphore (which often won't happen due to the PD
> >> controller based ratelimiting) - boom, deadlock.
> >>
> >> Here's the fixup patch I just tested and am applying:
> 
> Oh i'm now seeing very high CPU spikes of kworker... i don't see if i
> remove bcache: Fix a writeback performance regression.

Are you able to reproduce it? I'm not having any luck reproducing it...
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: bcache: Fix a writeback performance regression

2013-08-21 Thread Kent Overstreet
On Tue, Aug 20, 2013 at 10:07:45AM +0200, Stefan Priebe - Profihost AG wrote:
> Am 20.08.2013 10:01, schrieb Stefan Priebe - Profihost AG:
> > Am 20.08.2013 00:27, schrieb Kent Overstreet:
> >> On Mon, Aug 19, 2013 at 12:09:24AM +0200, Stefan Priebe wrote:
> >>>
> >>> Vanilla 3.10.7 + bcache: Fix a writeback performance regression
> >>>
> >>> http://pastebin.com/raw.php?i=LXZk4cMH
> >>
> >> Whoops, at first I thought this was the same bug as one I'd already been
> >> chasing down that had been a harmless bug - turns out I didn't look
> >> closely enough at the backtrace.
> >>
> >> What happened is background writeback is deadlocking, because for some
> >> reason the workqueue it's running out of is a singlethreaded workqueue,
> >> so as soon as it decides to queue enough writeback bios that it has to
> >> sleep on that semaphore (which often won't happen due to the PD
> >> controller based ratelimiting) - boom, deadlock.
> >>
> >> Here's the fixup patch I just tested and am applying:
> 
> Oh i'm now seeing very high CPU spikes of kworker... i don't see if i
> remove bcache: Fix a writeback performance regression.

*swears*

I just saw that this morning, but I assumed it was a bug in my stripe
aware scan code that isn't in this branch yet - thanks for letting me
know that's not the case.

I shall work on a fix for the fix...
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: bcache: Fix a writeback performance regression

2013-08-20 Thread Stefan Priebe - Profihost AG
Am 20.08.2013 10:01, schrieb Stefan Priebe - Profihost AG:
> Am 20.08.2013 00:27, schrieb Kent Overstreet:
>> On Mon, Aug 19, 2013 at 12:09:24AM +0200, Stefan Priebe wrote:
>>>
>>> Vanilla 3.10.7 + bcache: Fix a writeback performance regression
>>>
>>> http://pastebin.com/raw.php?i=LXZk4cMH
>>
>> Whoops, at first I thought this was the same bug as one I'd already been
>> chasing down that had been a harmless bug - turns out I didn't look
>> closely enough at the backtrace.
>>
>> What happened is background writeback is deadlocking, because for some
>> reason the workqueue it's running out of is a singlethreaded workqueue,
>> so as soon as it decides to queue enough writeback bios that it has to
>> sleep on that semaphore (which often won't happen due to the PD
>> controller based ratelimiting) - boom, deadlock.
>>
>> Here's the fixup patch I just tested and am applying:

Oh i'm now seeing very high CPU spikes of kworker... i don't see if i
remove bcache: Fix a writeback performance regression.

Stefan
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: bcache: Fix a writeback performance regression

2013-08-20 Thread Stefan Priebe - Profihost AG
Am 20.08.2013 00:27, schrieb Kent Overstreet:
> On Mon, Aug 19, 2013 at 12:09:24AM +0200, Stefan Priebe wrote:
>>
>> Vanilla 3.10.7 + bcache: Fix a writeback performance regression
>>
>> http://pastebin.com/raw.php?i=LXZk4cMH
> 
> Whoops, at first I thought this was the same bug as one I'd already been
> chasing down that had been a harmless bug - turns out I didn't look
> closely enough at the backtrace.
> 
> What happened is background writeback is deadlocking, because for some
> reason the workqueue it's running out of is a singlethreaded workqueue,
> so as soon as it decides to queue enough writeback bios that it has to
> sleep on that semaphore (which often won't happen due to the PD
> controller based ratelimiting) - boom, deadlock.
> 
> Here's the fixup patch I just tested and am applying:

thanks, this works fine. The only issue i still have, is that the
dirty_date value starts at 0 and goes to a negative value after reboot.

Greets,
Stefan
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: bcache: Fix a writeback performance regression

2013-08-19 Thread Kent Overstreet
On Mon, Aug 19, 2013 at 12:09:24AM +0200, Stefan Priebe wrote:
> 
> Vanilla 3.10.7 + bcache: Fix a writeback performance regression
> 
> http://pastebin.com/raw.php?i=LXZk4cMH

Whoops, at first I thought this was the same bug as one I'd already been
chasing down that had been a harmless bug - turns out I didn't look
closely enough at the backtrace.

What happened is background writeback is deadlocking, because for some
reason the workqueue it's running out of is a singlethreaded workqueue,
so as soon as it decides to queue enough writeback bios that it has to
sleep on that semaphore (which often won't happen due to the PD
controller based ratelimiting) - boom, deadlock.

Here's the fixup patch I just tested and am applying:


>From 0af68de350e05e43fd093b36dcb0fe8aa838fabf Mon Sep 17 00:00:00 2001
From: Kent Overstreet 
Date: Mon, 19 Aug 2013 15:26:22 -0700
Subject: [PATCH] bcache: Fix a writeback deadlock

Signed-off-by: Kent Overstreet 

diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index f88c62e..27ac519 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -351,7 +351,7 @@ static void write_dirty(struct closure *cl)
 
closure_bio_submit(&io->bio, cl, &io->dc->disk);
 
-   continue_at(cl, write_dirty_finish, dirty_wq);
+   continue_at(cl, write_dirty_finish, system_wq);
 }
 
 static void read_dirty_endio(struct bio *bio, int error)
@@ -371,7 +371,7 @@ static void read_dirty_submit(struct closure *cl)
 
closure_bio_submit(&io->bio, cl, &io->dc->disk);
 
-   continue_at(cl, write_dirty, dirty_wq);
+   continue_at(cl, write_dirty, system_wq);
 }
 
 static void read_dirty(struct closure *cl)
@@ -512,7 +512,7 @@ void bch_writeback_exit(void)
 
 int __init bch_writeback_init(void)
 {
-   dirty_wq = create_singlethread_workqueue("bcache_writeback");
+   dirty_wq = create_workqueue("bcache_writeback");
if (!dirty_wq)
return -ENOMEM;
 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: bcache: Fix a writeback performance regression

2013-08-18 Thread Stefan Priebe


Vanilla 3.10.7 + bcache: Fix a writeback performance regression

http://pastebin.com/raw.php?i=LXZk4cMH

Stefan

Am 16.08.2013 12:11, schrieb Stefan Priebe - Profihost AG:

Hi,

bcache: Fix a writeback performance regression

this one results in 3.10 into hung tasks in bcache_writeback read_dirty.

Stefan
Am 15.08.2013 08:43, schrieb Stefan Priebe - Profihost AG:

Am 15.08.2013 00:59, schrieb Kent Overstreet:

Jens, here's the latest bcache fixes. Some urgent stuff in here:


The following changes since commit 79826c35eb99cd3c0873b8396f45fa26c87fb0b0:

   bcache: Allocation kthread fixes (2013-07-12 00:22:49 -0700)

are available in the git repository at:

   git://evilpiepirate.org/~kent/linux-bcache.git bcache-for-3.11

for you to fetch changes up to 0434a516e99ee51ac2d0dfa71b2f56c89ac5db05:

   bcache: Fix a flush/fua performance bug (2013-08-14 15:44:51 -0700)


Gabriel de Perthuis (1):
   bcache: Strip endline when writing the label through sysfs

Geert Uytterhoeven (1):
   bcache: Correct printf()-style format length modifier

Kent Overstreet (4):
   bcache: Fix a dumb journal discard bug
   bcache: Fix for when no journal entries are found
   bcache: Fix a writeback performance regression
   bcache: Fix a flush/fua performance bug

  drivers/md/bcache/bcache.h|  7 +++
  drivers/md/bcache/btree.c |  2 +-
  drivers/md/bcache/journal.c   | 33 -
  drivers/md/bcache/sysfs.c |  9 +++--
  drivers/md/bcache/util.c  | 11 ++-
  drivers/md/bcache/util.h  | 12 +---
  drivers/md/bcache/writeback.c | 37 ++---
  7 files changed, 68 insertions(+), 43 deletions(-)
--


As 3.10 is long term stable release. You might need CC the stable list
for patches which have to go to 3.10 as well.

At least this one should go to 3.10:

Gabriel de Perthuis (1):
   bcache: Strip endline when writing the label through sysfs


Thanks!

Greets,
Stefan


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/