Re: [PATCH 10/12] Limit bio_endio recursion

2016-04-07 Thread Ming Lei
On Thu, Apr 7, 2016 at 1:44 PM, Ming Lei  wrote:
> On Thu, 7 Apr 2016 11:54:49 +0800
> Ming Lei  wrote:
>

> @@ -1737,6 +1739,46 @@ static inline bool bio_remaining_done(struct bio *bio)
> return false;
>  }
>
> +/* disable local irq when manipulating the percpu bio_list */
> +static void unwind_bio_endio(struct bio *bio)
> +{
> +   struct bio_list bl_in_stack;
> +   struct bio_list *bl;
> +   unsigned long flags;
> +   bool clear_list = false;
> +
> +   local_irq_save(flags);
> +
> +   bl = this_cpu_read(bio_end_list);
> +   if (!bl) {
> +   bl = _in_stack;
> +   bio_list_init(bl);
> +   clear_list = true;

oops, forget to write the pointer into the percpu bio_list pointer.
But it is still working after I fix that by the following line:

 this_cpu_write(bio_end_list, bl);

thanks,
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 10/12] Limit bio_endio recursion

2016-04-06 Thread Ming Lei
On Thu, 7 Apr 2016 11:54:49 +0800
Ming Lei  wrote:

> On Mon, Apr 4, 2016 at 1:06 PM, Shaun Tancheff  wrote:
> > Recursive endio calls can exceed 16k stack. Tested with
> > 32k stack and observed:
> >
> > DepthSize   Location(293 entries)
> > -   
> >   0)21520  16   __module_text_address+0x12/0x60
> >   1)21504   8   __module_address+0x5/0x140
> >   2)21496  24   __module_text_address+0x12/0x60
> >   3)21472  16   is_module_text_address+0xe/0x20
> >   4)21456   8   __kernel_text_address+0x50/0x80
> >   5)21448 136   print_context_stack+0x5a/0xf0
> >   6)21312 144   dump_trace+0x14c/0x300
> >   7)21168   8   save_stack_trace+0x2f/0x50
> >   8)21160  88   set_track+0x64/0x130
> >   9)21072  96   free_debug_processing+0x200/0x290
> >  10)20976 176   __slab_free+0x164/0x290
> >  11)20800  48   kmem_cache_free+0x1b0/0x1e0
> >  12)20752  16   mempool_free_slab+0x17/0x20
> >  13)20736  48   mempool_free+0x2f/0x90
> >  14)20688  16   bvec_free+0x36/0x40
> >  15)20672  32   bio_free+0x3b/0x60
> >  16)20640  16   bio_put+0x23/0x30
> >  17)20624  64   end_bio_extent_writepage+0xcf/0xe0
> >  18)20560  48   bio_endio+0x57/0x90
> >  19)20512  48   btrfs_end_bio+0xa8/0x160
> >  20)20464  48   bio_endio+0x57/0x90
> >  21)20416 112   dec_pending+0x121/0x270
> >  22)20304  64   clone_endio+0x7a/0x100
> >  23)20240  48   bio_endio+0x57/0x90
> > ...
> > 277) 1264  64   clone_endio+0x7a/0x100
> > 278) 1200  48   bio_endio+0x57/0x90
> > 279) 1152 112   dec_pending+0x121/0x270
> > 280) 1040  64   clone_endio+0x7a/0x100
> > 281)  976  48   bio_endio+0x57/0x90
> > 282)  928  80   blk_update_request+0x8f/0x340
> > 283)  848  80   scsi_end_request+0x33/0x1c0
> > 284)  768 112   scsi_io_completion+0xb5/0x620
> > 285)  656  48   scsi_finish_command+0xcf/0x120
> > 286)  608  48   scsi_softirq_done+0x126/0x150
> > 287)  560  24   blk_done_softirq+0x78/0x90
> > 288)  536 136   __do_softirq+0xfd/0x280
> > 289)  400  16   run_ksoftirqd+0x28/0x50
> > 290)  384  64   smpboot_thread_fn+0x105/0x160
> > 291)  320 144   kthread+0xc9/0xe0
> > 292)  176 176   ret_from_fork+0x3f/0x70
> >
> > Based on earlier patch by Mikulas Patocka .
> > https://lkml.org/lkml/2008/6/24/18
> 
> Looks a empty link, and the following had the discussion too:
> 
> http://linux-kernel.2935.n7.nabble.com/PATCH-1-2-Avoid-bio-endio-recursion-td306043.html
> 
> >
> > Signed-off-by: Shaun Tancheff 
> > ---
> >  block/bio.c | 57 ++---
> >  1 file changed, 54 insertions(+), 3 deletions(-)
> >
> > diff --git a/block/bio.c b/block/bio.c
> > index d42e69c..4ac19f6 100644
> > --- a/block/bio.c
> > +++ b/block/bio.c
> > @@ -1733,6 +1733,59 @@ static inline bool bio_remaining_done(struct bio 
> > *bio)
> > return false;
> >  }
> >
> > +static DEFINE_PER_CPU(struct bio **, bio_end_queue) = { NULL };
> 
> The idea looks very nice, but this patch can't be applid to current block-next
> branch.
> 
> Looks it might be simpler to implement the approach by using percpu bio_list.

Cc Mikulas & Christoph

How about the following implementation with bio_list? Looks more readable and
simpler.

Just run a recent testcase of bcache over raid1(virtio-blk, virtio-blk), looks
it does work, :-)

---

diff --git a/block/bio.c b/block/bio.c
index f124a0a..b97dfe8 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -68,6 +68,8 @@ static DEFINE_MUTEX(bio_slab_lock);
 static struct bio_slab *bio_slabs;
 static unsigned int bio_slab_nr, bio_slab_max;
 
+static DEFINE_PER_CPU(struct bio_list *, bio_end_list) = { NULL };
+
 static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
 {
unsigned int sz = sizeof(struct bio) + extra_size;
@@ -1737,6 +1739,46 @@ static inline bool bio_remaining_done(struct bio *bio)
return false;
 }
 
+/* disable local irq when manipulating the percpu bio_list */
+static void unwind_bio_endio(struct bio *bio)
+{
+   struct bio_list bl_in_stack;
+   struct bio_list *bl;
+   unsigned long flags;
+   bool clear_list = false;
+
+   local_irq_save(flags);
+
+   bl = this_cpu_read(bio_end_list);
+   if (!bl) {
+   bl = _in_stack;
+   bio_list_init(bl);
+   clear_list = true;
+   }
+
+   if (!bio_list_empty(bl)) {
+   bio_list_add(bl, bio);
+   goto out;
+   }
+
+   while (1) {
+   

Re: [PATCH 10/12] Limit bio_endio recursion

2016-04-06 Thread Ming Lei
On Mon, Apr 4, 2016 at 1:06 PM, Shaun Tancheff  wrote:
> Recursive endio calls can exceed 16k stack. Tested with
> 32k stack and observed:
>
> DepthSize   Location(293 entries)
> -   
>   0)21520  16   __module_text_address+0x12/0x60
>   1)21504   8   __module_address+0x5/0x140
>   2)21496  24   __module_text_address+0x12/0x60
>   3)21472  16   is_module_text_address+0xe/0x20
>   4)21456   8   __kernel_text_address+0x50/0x80
>   5)21448 136   print_context_stack+0x5a/0xf0
>   6)21312 144   dump_trace+0x14c/0x300
>   7)21168   8   save_stack_trace+0x2f/0x50
>   8)21160  88   set_track+0x64/0x130
>   9)21072  96   free_debug_processing+0x200/0x290
>  10)20976 176   __slab_free+0x164/0x290
>  11)20800  48   kmem_cache_free+0x1b0/0x1e0
>  12)20752  16   mempool_free_slab+0x17/0x20
>  13)20736  48   mempool_free+0x2f/0x90
>  14)20688  16   bvec_free+0x36/0x40
>  15)20672  32   bio_free+0x3b/0x60
>  16)20640  16   bio_put+0x23/0x30
>  17)20624  64   end_bio_extent_writepage+0xcf/0xe0
>  18)20560  48   bio_endio+0x57/0x90
>  19)20512  48   btrfs_end_bio+0xa8/0x160
>  20)20464  48   bio_endio+0x57/0x90
>  21)20416 112   dec_pending+0x121/0x270
>  22)20304  64   clone_endio+0x7a/0x100
>  23)20240  48   bio_endio+0x57/0x90
> ...
> 277) 1264  64   clone_endio+0x7a/0x100
> 278) 1200  48   bio_endio+0x57/0x90
> 279) 1152 112   dec_pending+0x121/0x270
> 280) 1040  64   clone_endio+0x7a/0x100
> 281)  976  48   bio_endio+0x57/0x90
> 282)  928  80   blk_update_request+0x8f/0x340
> 283)  848  80   scsi_end_request+0x33/0x1c0
> 284)  768 112   scsi_io_completion+0xb5/0x620
> 285)  656  48   scsi_finish_command+0xcf/0x120
> 286)  608  48   scsi_softirq_done+0x126/0x150
> 287)  560  24   blk_done_softirq+0x78/0x90
> 288)  536 136   __do_softirq+0xfd/0x280
> 289)  400  16   run_ksoftirqd+0x28/0x50
> 290)  384  64   smpboot_thread_fn+0x105/0x160
> 291)  320 144   kthread+0xc9/0xe0
> 292)  176 176   ret_from_fork+0x3f/0x70
>
> Based on earlier patch by Mikulas Patocka .
> https://lkml.org/lkml/2008/6/24/18

Looks a empty link, and the following had the discussion too:

http://linux-kernel.2935.n7.nabble.com/PATCH-1-2-Avoid-bio-endio-recursion-td306043.html

>
> Signed-off-by: Shaun Tancheff 
> ---
>  block/bio.c | 57 ++---
>  1 file changed, 54 insertions(+), 3 deletions(-)
>
> diff --git a/block/bio.c b/block/bio.c
> index d42e69c..4ac19f6 100644
> --- a/block/bio.c
> +++ b/block/bio.c
> @@ -1733,6 +1733,59 @@ static inline bool bio_remaining_done(struct bio *bio)
> return false;
>  }
>
> +static DEFINE_PER_CPU(struct bio **, bio_end_queue) = { NULL };

The idea looks very nice, but this patch can't be applid to current block-next
branch.

Looks it might be simpler to implement the approach by using percpu bio_list.

> +
> +static struct bio *unwind_bio_endio(struct bio *bio)
> +{
> +   struct bio ***bio_end_queue_ptr;
> +   struct bio *bio_queue;
> +   struct bio *chain_bio = NULL;
> +   int error = bio->bi_error;
> +   unsigned long flags;
> +
> +   local_irq_save(flags);

If we may resue current->bio_list for this purpose, disabling irq should have
been avoided, but looks it is difficult to do in that way, maybe impossible.
Also not realistic to introduce a new per-task variable.

> +   bio_end_queue_ptr = this_cpu_ptr(_end_queue);
> +
> +   if (*bio_end_queue_ptr) {
> +   **bio_end_queue_ptr = bio;
> +   *bio_end_queue_ptr = >bi_next;
> +   bio->bi_next = NULL;
> +   } else {
> +   bio_queue = NULL;
> +   *bio_end_queue_ptr = _queue;

Suggest to comment that bio_queue is the bottom-most stack variable,
otherwise looks a bit tricky to understand.

> +
> +next_bio:
> +   if (bio->bi_end_io == bio_chain_endio) {
> +   struct bio *parent = bio->bi_private;
> +
> +   bio_put(bio);
> +   chain_bio = parent;
> +   goto out;
> +   }
> +
> +   if (bio->bi_end_io) {
> +   if (!bio->bi_error)
> +   bio->bi_error = error;
> +   bio->bi_end_io(bio);
> +   }
> +
> +   if (bio_queue) {
> +   bio = bio_queue;
> +   bio_queue = bio->bi_next;
> +   

[PATCH 10/12] Limit bio_endio recursion

2016-04-03 Thread Shaun Tancheff
Recursive endio calls can exceed 16k stack. Tested with
32k stack and observed:

DepthSize   Location(293 entries)
-   
  0)21520  16   __module_text_address+0x12/0x60
  1)21504   8   __module_address+0x5/0x140
  2)21496  24   __module_text_address+0x12/0x60
  3)21472  16   is_module_text_address+0xe/0x20
  4)21456   8   __kernel_text_address+0x50/0x80
  5)21448 136   print_context_stack+0x5a/0xf0
  6)21312 144   dump_trace+0x14c/0x300
  7)21168   8   save_stack_trace+0x2f/0x50
  8)21160  88   set_track+0x64/0x130
  9)21072  96   free_debug_processing+0x200/0x290
 10)20976 176   __slab_free+0x164/0x290
 11)20800  48   kmem_cache_free+0x1b0/0x1e0
 12)20752  16   mempool_free_slab+0x17/0x20
 13)20736  48   mempool_free+0x2f/0x90
 14)20688  16   bvec_free+0x36/0x40
 15)20672  32   bio_free+0x3b/0x60
 16)20640  16   bio_put+0x23/0x30
 17)20624  64   end_bio_extent_writepage+0xcf/0xe0
 18)20560  48   bio_endio+0x57/0x90
 19)20512  48   btrfs_end_bio+0xa8/0x160
 20)20464  48   bio_endio+0x57/0x90
 21)20416 112   dec_pending+0x121/0x270
 22)20304  64   clone_endio+0x7a/0x100
 23)20240  48   bio_endio+0x57/0x90
...
277) 1264  64   clone_endio+0x7a/0x100
278) 1200  48   bio_endio+0x57/0x90
279) 1152 112   dec_pending+0x121/0x270
280) 1040  64   clone_endio+0x7a/0x100
281)  976  48   bio_endio+0x57/0x90
282)  928  80   blk_update_request+0x8f/0x340
283)  848  80   scsi_end_request+0x33/0x1c0
284)  768 112   scsi_io_completion+0xb5/0x620
285)  656  48   scsi_finish_command+0xcf/0x120
286)  608  48   scsi_softirq_done+0x126/0x150
287)  560  24   blk_done_softirq+0x78/0x90
288)  536 136   __do_softirq+0xfd/0x280
289)  400  16   run_ksoftirqd+0x28/0x50
290)  384  64   smpboot_thread_fn+0x105/0x160
291)  320 144   kthread+0xc9/0xe0
292)  176 176   ret_from_fork+0x3f/0x70

Based on earlier patch by Mikulas Patocka .
https://lkml.org/lkml/2008/6/24/18

Signed-off-by: Shaun Tancheff 
---
 block/bio.c | 57 ++---
 1 file changed, 54 insertions(+), 3 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index d42e69c..4ac19f6 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1733,6 +1733,59 @@ static inline bool bio_remaining_done(struct bio *bio)
return false;
 }
 
+static DEFINE_PER_CPU(struct bio **, bio_end_queue) = { NULL };
+
+static struct bio *unwind_bio_endio(struct bio *bio)
+{
+   struct bio ***bio_end_queue_ptr;
+   struct bio *bio_queue;
+   struct bio *chain_bio = NULL;
+   int error = bio->bi_error;
+   unsigned long flags;
+
+   local_irq_save(flags);
+   bio_end_queue_ptr = this_cpu_ptr(_end_queue);
+
+   if (*bio_end_queue_ptr) {
+   **bio_end_queue_ptr = bio;
+   *bio_end_queue_ptr = >bi_next;
+   bio->bi_next = NULL;
+   } else {
+   bio_queue = NULL;
+   *bio_end_queue_ptr = _queue;
+
+next_bio:
+   if (bio->bi_end_io == bio_chain_endio) {
+   struct bio *parent = bio->bi_private;
+
+   bio_put(bio);
+   chain_bio = parent;
+   goto out;
+   }
+
+   if (bio->bi_end_io) {
+   if (!bio->bi_error)
+   bio->bi_error = error;
+   bio->bi_end_io(bio);
+   }
+
+   if (bio_queue) {
+   bio = bio_queue;
+   bio_queue = bio->bi_next;
+   if (!bio_queue)
+   *bio_end_queue_ptr = _queue;
+   goto next_bio;
+   }
+   *bio_end_queue_ptr = NULL;
+   }
+
+out:
+
+   local_irq_restore(flags);
+
+   return chain_bio;
+}
+
 /**
  * bio_endio - end I/O on a bio
  * @bio:   bio
@@ -1762,9 +1815,7 @@ void bio_endio(struct bio *bio)
bio_put(bio);
bio = parent;
} else {
-   if (bio->bi_end_io)
-   bio->bi_end_io(bio);
-   bio = NULL;
+   bio = unwind_bio_endio(bio);
}
}
 }
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html