Hello! Beware, it used GFP_ATOMIC. Does not this mean t his code can be executed in interrupt context? If so, then kvmalloc is a strict no.
On Wed, Oct 22, 2025 at 8:11 PM Vasileios Almpanis <[email protected]> wrote: > > When handling multiple concurrent dm-ploop requests, large bio_vec arrays > can be allocated during request processing. These allocations are currently > done with kmalloc_array(GFP_ATOMIC), which can fail under memory pressure > for higher orders (order >= 6, ~256KB). Such failures result in partial or > corrupted I/O, leading to EXT4 directory checksum errors and read-only > remounts under heavy parallel workloads. > > This patch adds a fallback mechanism to use kvmalloc_array for > large or failed allocations. If the estimated allocation order is >= 6, or > if the kmalloc_array allocation fails. This avoids high-order GFP_ATOMIC > allocations from interrupt context and ensures more reliable memory allocation > behavior. > > https://virtuozzo.atlassian.net/browse/VSTOR-109595 > Signed-off-by: Vasileios Almpanis <[email protected]> > Feature: dm-ploop: ploop target driver > --- > drivers/md/dm-ploop-map.c | 46 ++++++++++++++++++++++++++++++--------- > drivers/md/dm-ploop.h | 1 + > 2 files changed, 37 insertions(+), 10 deletions(-) > > diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c > index 3fb841f8bcea..899b9bf088b3 100644 > --- a/drivers/md/dm-ploop-map.c > +++ b/drivers/md/dm-ploop-map.c > @@ -16,6 +16,7 @@ > #include <linux/error-injection.h> > #include <linux/uio.h> > #include <linux/blk-mq.h> > +#include <linux/mm.h> > #include <uapi/linux/falloc.h> > #include "dm-ploop.h" > #include "dm-rq.h" > @@ -89,6 +90,7 @@ void ploop_init_pio(struct ploop *ploop, unsigned int > bi_op, struct pio *pio) > pio->ref_index = PLOOP_REF_INDEX_INVALID; > pio->queue_list_id = PLOOP_LIST_DEFERRED; > pio->bi_status = BLK_STS_OK; > + pio->use_kvmalloc = false; > atomic_set(&pio->remaining, 1); > pio->piwb = NULL; > INIT_LIST_HEAD(&pio->list); > @@ -193,8 +195,12 @@ static void ploop_prq_endio(struct pio *pio, void > *prq_ptr, > struct ploop_rq *prq = prq_ptr; > struct request *rq = prq->rq; > > - if (prq->bvec) > - kfree(prq->bvec); > + if (prq->bvec) { > + if (pio->use_kvmalloc) > + kvfree(prq->bvec); > + else > + kfree(prq->bvec); > + } > if (prq->css) > css_put(prq->css); > /* > @@ -1963,26 +1969,40 @@ void ploop_index_wb_submit(struct ploop *ploop, > struct ploop_index_wb *piwb) > ploop_runners_add_work(ploop, pio); > } > > -static struct bio_vec *ploop_create_bvec_from_rq(struct request *rq) > +static struct bio_vec *ploop_create_bvec_from_rq(struct request *rq, bool > use_kvmalloc) > { > struct bio_vec bv, *bvec, *tmp; > struct req_iterator rq_iter; > unsigned int nr_bvec = 0; > + unsigned int order = 0; > > rq_for_each_bvec(bv, rq, rq_iter) > nr_bvec++; > > - bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec), > - GFP_ATOMIC); > - if (!bvec) > - goto out; > + if (use_kvmalloc) { > + bvec = kvmalloc_array(nr_bvec, sizeof(struct bio_vec), > + GFP_NOIO); > + if (!bvec) > + return ERR_PTR(-ENOMEM); > + } else { > + order = get_order(nr_bvec * sizeof(struct bio_vec)); > + /* > + * order 6 is 262144 bytes. Lets defer such big > + * allocations to workqueue. > + */ > + if (order >= 6) > + return ERR_PTR(-EAGAIN); > + bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec), > + GFP_ATOMIC | __GFP_NOWARN); > + if (!bvec) > + return ERR_PTR(-EAGAIN); > + } > > tmp = bvec; > rq_for_each_bvec(bv, rq, rq_iter) { > *tmp = bv; > tmp++; > } > -out: > return bvec; > } > ALLOW_ERROR_INJECTION(ploop_create_bvec_from_rq, NULL); > @@ -2003,9 +2023,15 @@ static void ploop_prepare_one_embedded_pio(struct > ploop *ploop, > * Transform a set of bvec arrays related to bios > * into a single bvec array (which we can iterate). > */ > - bvec = ploop_create_bvec_from_rq(rq); > - if (!bvec) > + bvec = ploop_create_bvec_from_rq(rq, pio->use_kvmalloc); > + if (IS_ERR(bvec)) { > + if (PTR_ERR(bvec) == -EAGAIN) { > + pio->use_kvmalloc = true; > + llist_add((struct llist_node *)(&pio->list), > &ploop->pios[PLOOP_LIST_PREPARE]); > + return; > + } > goto err_nomem; > + } > prq->bvec = bvec; > skip_bvec: > pio->bi_iter.bi_size = blk_rq_bytes(rq); > diff --git a/drivers/md/dm-ploop.h b/drivers/md/dm-ploop.h > index fc12efeb0cd9..53e8d12064bd 100644 > --- a/drivers/md/dm-ploop.h > +++ b/drivers/md/dm-ploop.h > @@ -316,6 +316,7 @@ struct pio { > unsigned int ref_index:2; > > u8 queue_list_id; /* id in ploop->pios */ > + bool use_kvmalloc; > > struct ploop_index_wb *piwb; > > -- > 2.43.0 > > _______________________________________________ > Devel mailing list > [email protected] > https://lists.openvz.org/mailman/listinfo/devel _______________________________________________ Devel mailing list [email protected] https://lists.openvz.org/mailman/listinfo/devel
