When handling multiple concurrent dm-ploop requests, large bio_vec arrays can be allocated during request processing. These allocations are currently done with kmalloc_array(GFP_ATOMIC), which can fail under memory pressure for higher orders (order >= 6, ~256KB). Such failures result in partial or corrupted I/O, leading to EXT4 directory checksum errors and read-only remounts under heavy parallel workloads.
This patch adds a fallback mechanism to use kvmalloc_array for large or failed allocations. If the estimated allocation order is >= 6, or if the kmalloc_array allocation fails. This avoids high-order GFP_ATOMIC allocations from interrupt context and ensures more reliable memory allocation behavior. https://virtuozzo.atlassian.net/browse/VSTOR-109595 Signed-off-by: Vasileios Almpanis <[email protected]> Feature: dm-ploop: ploop target driver --- drivers/md/dm-ploop-map.c | 46 ++++++++++++++++++++++++++++++--------- drivers/md/dm-ploop.h | 1 + 2 files changed, 37 insertions(+), 10 deletions(-) diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c index 3fb841f8bcea..899b9bf088b3 100644 --- a/drivers/md/dm-ploop-map.c +++ b/drivers/md/dm-ploop-map.c @@ -16,6 +16,7 @@ #include <linux/error-injection.h> #include <linux/uio.h> #include <linux/blk-mq.h> +#include <linux/mm.h> #include <uapi/linux/falloc.h> #include "dm-ploop.h" #include "dm-rq.h" @@ -89,6 +90,7 @@ void ploop_init_pio(struct ploop *ploop, unsigned int bi_op, struct pio *pio) pio->ref_index = PLOOP_REF_INDEX_INVALID; pio->queue_list_id = PLOOP_LIST_DEFERRED; pio->bi_status = BLK_STS_OK; + pio->use_kvmalloc = false; atomic_set(&pio->remaining, 1); pio->piwb = NULL; INIT_LIST_HEAD(&pio->list); @@ -193,8 +195,12 @@ static void ploop_prq_endio(struct pio *pio, void *prq_ptr, struct ploop_rq *prq = prq_ptr; struct request *rq = prq->rq; - if (prq->bvec) - kfree(prq->bvec); + if (prq->bvec) { + if (pio->use_kvmalloc) + kvfree(prq->bvec); + else + kfree(prq->bvec); + } if (prq->css) css_put(prq->css); /* @@ -1963,26 +1969,40 @@ void ploop_index_wb_submit(struct ploop *ploop, struct ploop_index_wb *piwb) ploop_runners_add_work(ploop, pio); } -static struct bio_vec *ploop_create_bvec_from_rq(struct request *rq) +static struct bio_vec *ploop_create_bvec_from_rq(struct request *rq, bool use_kvmalloc) { struct bio_vec bv, *bvec, *tmp; struct req_iterator rq_iter; unsigned int nr_bvec = 0; + unsigned int order = 0; rq_for_each_bvec(bv, rq, rq_iter) nr_bvec++; - bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec), - GFP_ATOMIC); - if (!bvec) - goto out; + if (use_kvmalloc) { + bvec = kvmalloc_array(nr_bvec, sizeof(struct bio_vec), + GFP_NOIO); + if (!bvec) + return ERR_PTR(-ENOMEM); + } else { + order = get_order(nr_bvec * sizeof(struct bio_vec)); + /* + * order 6 is 262144 bytes. Lets defer such big + * allocations to workqueue. + */ + if (order >= 6) + return ERR_PTR(-EAGAIN); + bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec), + GFP_ATOMIC | __GFP_NOWARN); + if (!bvec) + return ERR_PTR(-EAGAIN); + } tmp = bvec; rq_for_each_bvec(bv, rq, rq_iter) { *tmp = bv; tmp++; } -out: return bvec; } ALLOW_ERROR_INJECTION(ploop_create_bvec_from_rq, NULL); @@ -2003,9 +2023,15 @@ static void ploop_prepare_one_embedded_pio(struct ploop *ploop, * Transform a set of bvec arrays related to bios * into a single bvec array (which we can iterate). */ - bvec = ploop_create_bvec_from_rq(rq); - if (!bvec) + bvec = ploop_create_bvec_from_rq(rq, pio->use_kvmalloc); + if (IS_ERR(bvec)) { + if (PTR_ERR(bvec) == -EAGAIN) { + pio->use_kvmalloc = true; + llist_add((struct llist_node *)(&pio->list), &ploop->pios[PLOOP_LIST_PREPARE]); + return; + } goto err_nomem; + } prq->bvec = bvec; skip_bvec: pio->bi_iter.bi_size = blk_rq_bytes(rq); diff --git a/drivers/md/dm-ploop.h b/drivers/md/dm-ploop.h index fc12efeb0cd9..53e8d12064bd 100644 --- a/drivers/md/dm-ploop.h +++ b/drivers/md/dm-ploop.h @@ -316,6 +316,7 @@ struct pio { unsigned int ref_index:2; u8 queue_list_id; /* id in ploop->pios */ + bool use_kvmalloc; struct ploop_index_wb *piwb; -- 2.43.0 _______________________________________________ Devel mailing list [email protected] https://lists.openvz.org/mailman/listinfo/devel
