Re: [PATCH 10/12] Limit bio_endio recursion
On Thu, Apr 7, 2016 at 1:44 PM, Ming Lei wrote: > On Thu, 7 Apr 2016 11:54:49 +0800 > Ming Lei wrote: > > @@ -1737,6 +1739,46 @@ static inline bool bio_remaining_done(struct bio *bio) > return false; > } > > +/* disable local irq when manipulating the percpu bio_list */ > +static void unwind_bio_endio(struct bio *bio) > +{ > + struct bio_list bl_in_stack; > + struct bio_list *bl; > + unsigned long flags; > + bool clear_list = false; > + > + local_irq_save(flags); > + > + bl = this_cpu_read(bio_end_list); > + if (!bl) { > + bl = &bl_in_stack; > + bio_list_init(bl); > + clear_list = true; oops, forget to write the pointer into the percpu bio_list pointer. But it is still working after I fix that by the following line: this_cpu_write(bio_end_list, bl); thanks, -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 10/12] Limit bio_endio recursion
On Thu, 7 Apr 2016 11:54:49 +0800 Ming Lei wrote: > On Mon, Apr 4, 2016 at 1:06 PM, Shaun Tancheff wrote: > > Recursive endio calls can exceed 16k stack. Tested with > > 32k stack and observed: > > > > DepthSize Location(293 entries) > > - > > 0)21520 16 __module_text_address+0x12/0x60 > > 1)21504 8 __module_address+0x5/0x140 > > 2)21496 24 __module_text_address+0x12/0x60 > > 3)21472 16 is_module_text_address+0xe/0x20 > > 4)21456 8 __kernel_text_address+0x50/0x80 > > 5)21448 136 print_context_stack+0x5a/0xf0 > > 6)21312 144 dump_trace+0x14c/0x300 > > 7)21168 8 save_stack_trace+0x2f/0x50 > > 8)21160 88 set_track+0x64/0x130 > > 9)21072 96 free_debug_processing+0x200/0x290 > > 10)20976 176 __slab_free+0x164/0x290 > > 11)20800 48 kmem_cache_free+0x1b0/0x1e0 > > 12)20752 16 mempool_free_slab+0x17/0x20 > > 13)20736 48 mempool_free+0x2f/0x90 > > 14)20688 16 bvec_free+0x36/0x40 > > 15)20672 32 bio_free+0x3b/0x60 > > 16)20640 16 bio_put+0x23/0x30 > > 17)20624 64 end_bio_extent_writepage+0xcf/0xe0 > > 18)20560 48 bio_endio+0x57/0x90 > > 19)20512 48 btrfs_end_bio+0xa8/0x160 > > 20)20464 48 bio_endio+0x57/0x90 > > 21)20416 112 dec_pending+0x121/0x270 > > 22)20304 64 clone_endio+0x7a/0x100 > > 23)20240 48 bio_endio+0x57/0x90 > > ... > > 277) 1264 64 clone_endio+0x7a/0x100 > > 278) 1200 48 bio_endio+0x57/0x90 > > 279) 1152 112 dec_pending+0x121/0x270 > > 280) 1040 64 clone_endio+0x7a/0x100 > > 281) 976 48 bio_endio+0x57/0x90 > > 282) 928 80 blk_update_request+0x8f/0x340 > > 283) 848 80 scsi_end_request+0x33/0x1c0 > > 284) 768 112 scsi_io_completion+0xb5/0x620 > > 285) 656 48 scsi_finish_command+0xcf/0x120 > > 286) 608 48 scsi_softirq_done+0x126/0x150 > > 287) 560 24 blk_done_softirq+0x78/0x90 > > 288) 536 136 __do_softirq+0xfd/0x280 > > 289) 400 16 run_ksoftirqd+0x28/0x50 > > 290) 384 64 smpboot_thread_fn+0x105/0x160 > > 291) 320 144 kthread+0xc9/0xe0 > > 292) 176 176 ret_from_fork+0x3f/0x70 > > > > Based on earlier patch by Mikulas Patocka . > > https://lkml.org/lkml/2008/6/24/18 > > Looks a empty link, and the following had the discussion too: > > http://linux-kernel.2935.n7.nabble.com/PATCH-1-2-Avoid-bio-endio-recursion-td306043.html > > > > > Signed-off-by: Shaun Tancheff > > --- > > block/bio.c | 57 ++--- > > 1 file changed, 54 insertions(+), 3 deletions(-) > > > > diff --git a/block/bio.c b/block/bio.c > > index d42e69c..4ac19f6 100644 > > --- a/block/bio.c > > +++ b/block/bio.c > > @@ -1733,6 +1733,59 @@ static inline bool bio_remaining_done(struct bio > > *bio) > > return false; > > } > > > > +static DEFINE_PER_CPU(struct bio **, bio_end_queue) = { NULL }; > > The idea looks very nice, but this patch can't be applid to current block-next > branch. > > Looks it might be simpler to implement the approach by using percpu bio_list. Cc Mikulas & Christoph How about the following implementation with bio_list? Looks more readable and simpler. Just run a recent testcase of bcache over raid1(virtio-blk, virtio-blk), looks it does work, :-) --- diff --git a/block/bio.c b/block/bio.c index f124a0a..b97dfe8 100644 --- a/block/bio.c +++ b/block/bio.c @@ -68,6 +68,8 @@ static DEFINE_MUTEX(bio_slab_lock); static struct bio_slab *bio_slabs; static unsigned int bio_slab_nr, bio_slab_max; +static DEFINE_PER_CPU(struct bio_list *, bio_end_list) = { NULL }; + static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size) { unsigned int sz = sizeof(struct bio) + extra_size; @@ -1737,6 +1739,46 @@ static inline bool bio_remaining_done(struct bio *bio) return false; } +/* disable local irq when manipulating the percpu bio_list */ +static void unwind_bio_endio(struct bio *bio) +{ + struct bio_list bl_in_stack; + struct bio_list *bl; + unsigned long flags; + bool clear_list = false; + + local_irq_save(flags); + + bl = this_cpu_read(bio_end_list); + if (!bl) { + bl = &bl_in_stack; + bio_list_init(bl); + clear_list = true; + } + + if (!bio_list_empty(bl)) { + bio_list_add(bl, bio); + goto out; + } + + while (1) { + local_irq_restore(flags); + + if (!bio) +
Re: [PATCH 10/12] Limit bio_endio recursion
On Mon, Apr 4, 2016 at 1:06 PM, Shaun Tancheff wrote: > Recursive endio calls can exceed 16k stack. Tested with > 32k stack and observed: > > DepthSize Location(293 entries) > - > 0)21520 16 __module_text_address+0x12/0x60 > 1)21504 8 __module_address+0x5/0x140 > 2)21496 24 __module_text_address+0x12/0x60 > 3)21472 16 is_module_text_address+0xe/0x20 > 4)21456 8 __kernel_text_address+0x50/0x80 > 5)21448 136 print_context_stack+0x5a/0xf0 > 6)21312 144 dump_trace+0x14c/0x300 > 7)21168 8 save_stack_trace+0x2f/0x50 > 8)21160 88 set_track+0x64/0x130 > 9)21072 96 free_debug_processing+0x200/0x290 > 10)20976 176 __slab_free+0x164/0x290 > 11)20800 48 kmem_cache_free+0x1b0/0x1e0 > 12)20752 16 mempool_free_slab+0x17/0x20 > 13)20736 48 mempool_free+0x2f/0x90 > 14)20688 16 bvec_free+0x36/0x40 > 15)20672 32 bio_free+0x3b/0x60 > 16)20640 16 bio_put+0x23/0x30 > 17)20624 64 end_bio_extent_writepage+0xcf/0xe0 > 18)20560 48 bio_endio+0x57/0x90 > 19)20512 48 btrfs_end_bio+0xa8/0x160 > 20)20464 48 bio_endio+0x57/0x90 > 21)20416 112 dec_pending+0x121/0x270 > 22)20304 64 clone_endio+0x7a/0x100 > 23)20240 48 bio_endio+0x57/0x90 > ... > 277) 1264 64 clone_endio+0x7a/0x100 > 278) 1200 48 bio_endio+0x57/0x90 > 279) 1152 112 dec_pending+0x121/0x270 > 280) 1040 64 clone_endio+0x7a/0x100 > 281) 976 48 bio_endio+0x57/0x90 > 282) 928 80 blk_update_request+0x8f/0x340 > 283) 848 80 scsi_end_request+0x33/0x1c0 > 284) 768 112 scsi_io_completion+0xb5/0x620 > 285) 656 48 scsi_finish_command+0xcf/0x120 > 286) 608 48 scsi_softirq_done+0x126/0x150 > 287) 560 24 blk_done_softirq+0x78/0x90 > 288) 536 136 __do_softirq+0xfd/0x280 > 289) 400 16 run_ksoftirqd+0x28/0x50 > 290) 384 64 smpboot_thread_fn+0x105/0x160 > 291) 320 144 kthread+0xc9/0xe0 > 292) 176 176 ret_from_fork+0x3f/0x70 > > Based on earlier patch by Mikulas Patocka . > https://lkml.org/lkml/2008/6/24/18 Looks a empty link, and the following had the discussion too: http://linux-kernel.2935.n7.nabble.com/PATCH-1-2-Avoid-bio-endio-recursion-td306043.html > > Signed-off-by: Shaun Tancheff > --- > block/bio.c | 57 ++--- > 1 file changed, 54 insertions(+), 3 deletions(-) > > diff --git a/block/bio.c b/block/bio.c > index d42e69c..4ac19f6 100644 > --- a/block/bio.c > +++ b/block/bio.c > @@ -1733,6 +1733,59 @@ static inline bool bio_remaining_done(struct bio *bio) > return false; > } > > +static DEFINE_PER_CPU(struct bio **, bio_end_queue) = { NULL }; The idea looks very nice, but this patch can't be applid to current block-next branch. Looks it might be simpler to implement the approach by using percpu bio_list. > + > +static struct bio *unwind_bio_endio(struct bio *bio) > +{ > + struct bio ***bio_end_queue_ptr; > + struct bio *bio_queue; > + struct bio *chain_bio = NULL; > + int error = bio->bi_error; > + unsigned long flags; > + > + local_irq_save(flags); If we may resue current->bio_list for this purpose, disabling irq should have been avoided, but looks it is difficult to do in that way, maybe impossible. Also not realistic to introduce a new per-task variable. > + bio_end_queue_ptr = this_cpu_ptr(&bio_end_queue); > + > + if (*bio_end_queue_ptr) { > + **bio_end_queue_ptr = bio; > + *bio_end_queue_ptr = &bio->bi_next; > + bio->bi_next = NULL; > + } else { > + bio_queue = NULL; > + *bio_end_queue_ptr = &bio_queue; Suggest to comment that bio_queue is the bottom-most stack variable, otherwise looks a bit tricky to understand. > + > +next_bio: > + if (bio->bi_end_io == bio_chain_endio) { > + struct bio *parent = bio->bi_private; > + > + bio_put(bio); > + chain_bio = parent; > + goto out; > + } > + > + if (bio->bi_end_io) { > + if (!bio->bi_error) > + bio->bi_error = error; > + bio->bi_end_io(bio); > + } > + > + if (bio_queue) { > + bio = bio_queue; > + bio_queue = bio->bi_next; > + if (!bio_queue) > +