The commit is pushed to "branch-rh8-4.18.0-240.1.1.vz8.5.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after rh8-4.18.0-240.1.1.vz8.5.51 ------> commit 72792315a15cec6a69d0f45724295dbc4ba05efa Author: Kirill Tkhai <ktk...@virtuozzo.com> Date: Fri Jul 2 22:48:01 2021 +0300
ploop: Split pios from kwork context ...instead of doing this in ploop_clone_and_map(). Otherwise, since dm-rq is not marked as BLK_MQ_F_BLOCKING, we may call ploop_clone_and_map() from such stack: [ 325.472261] CT: 7d0ce903-d493-464f-8786-d57bebc77b37: stopped [ 329.991603] BUG: sleeping function called from invalid context at mm/slab.h:496 [ 329.995398] in_atomic(): 1, irqs_disabled(): 0, pid: 6647, name: jbd2/dm-50135-8 [ 329.999080] 1 lock held by jbd2/dm-50135-8/6647: [ 330.002290] #0: ffffffffa6269e60 (rcu_read_lock){....}, at: hctx_lock+0x6d/0x180 [ 330.006298] CPU: 2 PID: 6647 Comm: jbd2/dm-50135-8 ve: / Kdump: loaded Tainted: G W --------- - - 4.18.0-240.1.1.vz8.5.50+debug #1 5.50 [ 330.013953] Hardware name: Virtuozzo OpenStack Compute, BIOS 1.11.0-2.vz7.1 04/01/2014 [ 330.018055] Call Trace: [ 330.021059] dump_stack+0x9a/0xf0 [ 330.024068] ___might_sleep.cold.70+0x13d/0x178 [ 330.027531] slab_pre_alloc_hook+0x6a/0x90 [ 330.030763] __kmalloc+0x5c/0x320 [ 330.033687] ? create_bvec_from_rq+0x1ab/0x9f0 [ploop] [ 330.037158] create_bvec_from_rq+0x1ab/0x9f0 [ploop] [ 330.040452] ploop_clone_and_map+0x166/0x5c0 [ploop] [ 330.044382] dm_mq_queue_rq+0x358/0x1030 [dm_mod] [ 330.048139] ? dm_softirq_done+0x830/0x830 [dm_mod] [ 330.051668] ? __sbitmap_queue_get+0xb7/0x230 [ 330.054961] ? sched_clock+0x5/0x10 [ 330.057947] ? __blk_mq_get_driver_tag+0x193/0x730 [ 330.061721] blk_mq_dispatch_rq_list+0x287/0x1f40 [ 330.065562] ? elv_rb_del+0x3b/0x80 [ 330.069061] ? blk_mq_dequeue_from_ctx+0x500/0x500 [ 330.072840] ? dd_dispatch_request+0x20f/0x930 [ 330.076442] blk_mq_do_dispatch_sched+0x2d8/0x4c0 [ 330.080288] ? blk_mq_sched_free_hctx_data+0x1b0/0x1b0 [ 330.084154] ? trace_hardirqs_on+0x10/0x10 [ 330.087570] __blk_mq_sched_dispatch_requests+0x2fd/0x4c0 [ 330.091476] ? blk_mq_sched_restart+0x50/0x50 [ 330.095008] ? sched_clock+0x5/0x10 [ 330.098065] ? sched_clock_cpu+0x18/0x1e0 [ 330.101411] blk_mq_sched_dispatch_requests+0xae/0x100 [ 330.105135] __blk_mq_run_hw_queue+0x169/0x250 [ 330.108330] ? __blk_mq_requeue_request+0x640/0x640 [ 330.111691] ? lock_downgrade+0x6f0/0x6f0 [ 330.114688] ? lock_acquire+0x14f/0x3b0 [ 330.117756] ? hctx_lock+0x6d/0x180 [ 330.120819] __blk_mq_delay_run_hw_queue+0x35c/0x690 [ 330.124265] blk_mq_run_hw_queue+0x140/0x280 [ 330.127789] ? blk_mq_delay_run_hw_queues+0x130/0x130 [ 330.131495] blk_mq_sched_insert_requests+0x1bd/0x4d0 [ 330.134584] ? kvm_sched_clock_read+0x14/0x30 [ 330.137682] blk_mq_flush_plug_list+0x6f8/0xb50 [ 330.141069] ? blk_mq_insert_requests+0x5d0/0x5d0 [ 330.144501] ? kvm_sched_clock_read+0x14/0x30 [ 330.147826] ? sched_clock+0x5/0x10 [ 330.150736] ? sched_clock_cpu+0x18/0x1e0 [ 330.153734] blk_flush_plug_list+0x27a/0x410 [ 330.156834] ? blk_rq_bio_prep+0x370/0x370 [ 330.160029] ? do_raw_read_unlock+0x40/0x70 [ 330.163177] blk_finish_plug+0x47/0x8a [ 330.166206] jbd2_journal_commit_transaction+0x2fc7/0x67f0 [jbd2] [ 330.169917] ? journal_submit_commit_record+0xa10/0xa10 [jbd2] [ 330.174095] ? find_held_lock+0x3a/0x1c0 [ 330.177559] ? kvm_sched_clock_read+0x14/0x30 [ 330.180656] ? sched_clock+0x5/0x10 [ 330.183392] ? find_held_lock+0x3a/0x1c0 [ 330.186306] ? _raw_spin_unlock_irqrestore+0x46/0x60 [ 330.189533] ? trace_hardirqs_on_caller+0x39d/0x580 [ 330.192471] ? del_timer+0x100/0x100 [ 330.195073] kjournald2+0x1df/0x7a0 [jbd2] [ 330.197545] ? __bpf_trace_jbd2_end_commit+0x10/0x10 [jbd2] [ 330.200693] ? finish_wait+0x280/0x280 [ 330.203351] ? __kthread_parkme+0xb6/0x180 [ 330.206086] ? __bpf_trace_jbd2_end_commit+0x10/0x10 [jbd2] [ 330.209207] kthread+0x30e/0x3d0 [ 330.211609] ? kthread_create_fn+0x70/0x70 [ 330.214183] ret_from_fork+0x3a/0x50 https://jira.sw.ru/browse/PSBM-131208 Signed-off-by: Kirill Tkhai <ktk...@virtuozzo.com> ========================== ploop: Split pios in kwork https://jira.sw.ru/browse/PSBM-131208 Kirill Tkhai (10): ploop: Remove debug noinline in create_bvec_from_rq() ploop: Manage flush pios in generic way ploop: Teach dispatch_pios() work with flush pios ploop: Make split_pios_to_list() to add initial pio to the list too ploop: Introduce embedded_pio_to_prq() ploop: Introduce ploop_prq_valid() ploop: Move create_bvec_from_rq() up ploop: Split pios from kwork context ploop: Add sanity check of passed BAT from disk ploop: Reread file size after index update --- drivers/md/dm-ploop-cmd.c | 2 +- drivers/md/dm-ploop-map.c | 157 +++++++++++++++++++++++----------------------- drivers/md/dm-ploop.h | 5 +- 3 files changed, 84 insertions(+), 80 deletions(-) diff --git a/drivers/md/dm-ploop-cmd.c b/drivers/md/dm-ploop-cmd.c index 5104bdff10f8..87517ed7ef1a 100644 --- a/drivers/md/dm-ploop-cmd.c +++ b/drivers/md/dm-ploop-cmd.c @@ -139,7 +139,7 @@ static void ploop_resume_submitting_pios(struct ploop *ploop) list_splice_tail_init(&ploop->suspended_pios, &list); spin_unlock_irq(&ploop->deferred_lock); - submit_pios(ploop, &list); + dispatch_pios(ploop, NULL, &list); } /* Find existing BAT clu pointing to dst_clu */ diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c index 7f467db8d6b1..ca7f841cb8b8 100644 --- a/drivers/md/dm-ploop-map.c +++ b/drivers/md/dm-ploop-map.c @@ -1559,6 +1559,59 @@ static struct bio_vec *create_bvec_from_rq(struct request *rq) return bvec; } +static void prepare_one_embedded_pio(struct ploop *ploop, struct pio *pio, + struct list_head *deferred_pios) +{ + struct ploop_rq *prq = embedded_pio_to_prq(pio); + struct request *rq = prq->rq; + struct bio_vec *bvec = NULL; + LIST_HEAD(list); + int ret; + + if (rq->bio != rq->biotail) { + if (req_op(rq) == REQ_OP_DISCARD) + goto skip_bvec; + /* + * Transform a set of bvec arrays related to bios + * into a single bvec array (which we can iterate). + */ + bvec = create_bvec_from_rq(rq); + if (!bvec) + goto err_nomem; + prq->bvec = bvec; +skip_bvec: + pio->bi_iter.bi_sector = blk_rq_pos(rq); + pio->bi_iter.bi_size = blk_rq_bytes(rq); + pio->bi_iter.bi_idx = 0; + pio->bi_iter.bi_bvec_done = 0; + } else { + /* Single bio already provides bvec array */ + bvec = rq->bio->bi_io_vec; + + pio->bi_iter = rq->bio->bi_iter; + } + pio->bi_io_vec = bvec; + + pio->queue_list_id = PLOOP_LIST_DEFERRED; + ret = split_pio_to_list(ploop, pio, deferred_pios); + if (ret) + goto err_nomem; + + return; +err_nomem: + pio->bi_status = BLK_STS_IOERR; + pio_endio(pio); +} + +static void prepare_embedded_pios(struct ploop *ploop, struct list_head *pios, + struct list_head *deferred_pios) +{ + struct pio *pio; + + while ((pio = pio_list_pop(pios)) != NULL) + prepare_one_embedded_pio(ploop, pio, deferred_pios); +} + static void process_deferred_pios(struct ploop *ploop, struct list_head *pios) { struct pio *pio; @@ -1662,6 +1715,7 @@ static void submit_metadata_writeback(struct ploop *ploop) void do_ploop_work(struct work_struct *ws) { struct ploop *ploop = container_of(ws, struct ploop, worker); + LIST_HEAD(embedded_pios); LIST_HEAD(deferred_pios); LIST_HEAD(discard_pios); LIST_HEAD(cow_pios); @@ -1671,12 +1725,15 @@ void do_ploop_work(struct work_struct *ws) current->flags |= PF_IO_THREAD; spin_lock_irq(&ploop->deferred_lock); - list_splice_init(&ploop->resubmit_pios, &resubmit_pios); + list_splice_init(&ploop->pios[PLOOP_LIST_PREPARE], &embedded_pios); list_splice_init(&ploop->pios[PLOOP_LIST_DEFERRED], &deferred_pios); list_splice_init(&ploop->pios[PLOOP_LIST_DISCARD], &discard_pios); list_splice_init(&ploop->pios[PLOOP_LIST_COW], &cow_pios); + list_splice_init(&ploop->resubmit_pios, &resubmit_pios); spin_unlock_irq(&ploop->deferred_lock); + prepare_embedded_pios(ploop, &embedded_pios, &deferred_pios); + process_resubmit_pios(ploop, &resubmit_pios); process_deferred_pios(ploop, &deferred_pios); process_discard_pios(ploop, &discard_pios); @@ -1715,107 +1772,53 @@ static void init_prq(struct ploop_rq *prq, struct request *rq) prq->bvec = NULL; } -static void submit_pio(struct ploop *ploop, struct pio *pio) +int ploop_clone_and_map(struct dm_target *ti, struct request *rq, + union map_info *info, struct request **clone) { - struct list_head *queue_list; + struct ploop *ploop = ti->private; struct work_struct *worker; + struct ploop_rq *prq; unsigned long flags; bool queue = true; - LIST_HEAD(list); - int ret; + struct pio *pio; - if (pio->bi_iter.bi_size) { - queue_list = &ploop->pios[PLOOP_LIST_DEFERRED]; - worker = &ploop->worker; + prq = map_info_to_embedded_prq(info); + init_prq(prq, rq); - ret = split_pio_to_list(ploop, pio, &list); - if (ret) { - pio->bi_status = BLK_STS_RESOURCE; - goto endio; - } + pio = map_info_to_embedded_pio(info); + init_pio(ploop, req_op(rq), pio); + pio->endio_cb = prq_endio; + pio->endio_cb_data = prq; + + if (blk_rq_bytes(rq)) { + if (ploop_prq_valid(ploop, prq) < 0) + return DM_MAPIO_KILL; + + pio->queue_list_id = PLOOP_LIST_PREPARE; + worker = &ploop->worker; } else { - queue_list = &ploop->pios[PLOOP_LIST_FLUSH]; + pio->queue_list_id = PLOOP_LIST_FLUSH; worker = &ploop->fsync_worker; if (WARN_ON_ONCE(pio->bi_op != REQ_OP_FLUSH)) - goto kill; - list_add_tail(&pio->list, &list); + return DM_MAPIO_KILL; } spin_lock_irqsave(&ploop->deferred_lock, flags); if (unlikely(ploop->stop_submitting_pios)) { - list_splice_tail(&list, &ploop->suspended_pios); + list_add_tail(&pio->list, &ploop->suspended_pios); queue = false; goto unlock; } inc_nr_inflight(ploop, pio); - list_splice_tail(&list, queue_list); + list_add_tail(&pio->list, &ploop->pios[pio->queue_list_id]); unlock: spin_unlock_irqrestore(&ploop->deferred_lock, flags); if (queue) queue_work(ploop->wq, worker); - return; -kill: - pio->bi_status = BLK_STS_IOERR; -endio: - pio_endio(pio); -} - -void submit_pios(struct ploop *ploop, struct list_head *list) -{ - struct pio *pio; - - while ((pio = pio_list_pop(list)) != NULL) - submit_pio(ploop, pio); -} - -int ploop_clone_and_map(struct dm_target *ti, struct request *rq, - union map_info *info, struct request **clone) -{ - struct ploop *ploop = ti->private; - struct bio_vec *bvec = NULL; - struct ploop_rq *prq; - struct pio *pio; - - prq = map_info_to_embedded_prq(info); - init_prq(prq, rq); - - if (ploop_prq_valid(ploop, prq) < 0) - return DM_MAPIO_KILL; - - pio = map_info_to_embedded_pio(info); /* Embedded pio */ - init_pio(ploop, req_op(rq), pio); - - if (rq->bio != rq->biotail) { - if (req_op(rq) == REQ_OP_DISCARD) - goto skip_bvec; - /* - * Transform a set of bvec arrays related to bios - * into a single bvec array (which we can iterate). - */ - bvec = create_bvec_from_rq(rq); - if (!bvec) - return DM_MAPIO_KILL; - prq->bvec = bvec; -skip_bvec: - pio->bi_iter.bi_sector = blk_rq_pos(rq); - pio->bi_iter.bi_size = blk_rq_bytes(rq); - pio->bi_iter.bi_idx = 0; - pio->bi_iter.bi_bvec_done = 0; - } else if (rq->bio) { - /* Single bio already provides bvec array */ - bvec = rq->bio->bi_io_vec; - - pio->bi_iter = rq->bio->bi_iter; - } /* else FLUSH */ - - pio->bi_io_vec = bvec; - pio->endio_cb = prq_endio; - pio->endio_cb_data = prq; - submit_pio(ploop, pio); return DM_MAPIO_SUBMITTED; } diff --git a/drivers/md/dm-ploop.h b/drivers/md/dm-ploop.h index 26eab969c389..d26f269e475c 100644 --- a/drivers/md/dm-ploop.h +++ b/drivers/md/dm-ploop.h @@ -123,7 +123,9 @@ struct md_page { }; enum { - PLOOP_LIST_DEFERRED = 0, + PLOOP_LIST_PREPARE = 0, /* List for initial preparation and splitting + * embedded pios related to prq */ + PLOOP_LIST_DEFERRED, PLOOP_LIST_FLUSH, PLOOP_LIST_DISCARD, PLOOP_LIST_COW, @@ -530,7 +532,6 @@ extern bool try_update_bat_entry(struct ploop *ploop, u32 clu, extern int convert_bat_entries(u32 *bat_entries, u32 count); extern int ploop_add_delta(struct ploop *ploop, u32 level, struct file *file, bool is_raw); -extern void submit_pios(struct ploop *ploop, struct list_head *list); extern void dispatch_pios(struct ploop *ploop, struct pio *pio, struct list_head *pio_list); extern void do_ploop_work(struct work_struct *ws); extern void do_ploop_fsync_work(struct work_struct *ws); _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel