I dived a little bit deeper and I found a problem, see inline.

On 10/24/25 17:33, Vasileios Almpanis wrote:
When handling multiple concurrent dm-ploop requests, large bio_vec arrays
can be allocated during request processing. These allocations are currently
done with kmalloc_array(GFP_ATOMIC), which can fail under memory pressure
for higher orders (order >= 6, ~256KB). Such failures result in partial or
corrupted I/O, leading to EXT4 directory checksum errors and read-only
remounts under heavy parallel workloads.

This patch adds a fallback mechanism to use kvmalloc_array for
large or failed allocations in case the kmalloc_array allocation fails.
This avoids high-order GFP_ATOMIC allocations from interrupt context
and ensures more reliable memory allocation behavior.


nit: bug link missing

https://virtuozzo.atlassian.net/browse/VSTOR-109595

Signed-off-by: Vasileios Almpanis <[email protected]>
Acked-by: Denis V. Lunev <[email protected]>

Feature: dm-ploop: ploop target driver
---
  drivers/md/dm-ploop-map.c | 30 +++++++++++++++++++++---------
  1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
index 3fb841f8bcea..ad9806880213 100644
--- a/drivers/md/dm-ploop-map.c
+++ b/drivers/md/dm-ploop-map.c
@@ -194,7 +194,7 @@ static void ploop_prq_endio(struct pio *pio, void *prq_ptr,
        struct request *rq = prq->rq;
if (prq->bvec)
-               kfree(prq->bvec);
+               kvfree(prq->bvec);
        if (prq->css)
                css_put(prq->css);
        /*
@@ -1963,7 +1963,7 @@ void ploop_index_wb_submit(struct ploop *ploop, struct 
ploop_index_wb *piwb)
        ploop_runners_add_work(ploop, pio);
  }
-static struct bio_vec *ploop_create_bvec_from_rq(struct request *rq)
+static struct bio_vec *ploop_create_bvec_from_rq(struct request *rq, gfp_t 
flags)
  {
        struct bio_vec bv, *bvec, *tmp;
        struct req_iterator rq_iter;
@@ -1972,8 +1972,10 @@ static struct bio_vec *ploop_create_bvec_from_rq(struct 
request *rq)
        rq_for_each_bvec(bv, rq, rq_iter)
                nr_bvec++;
- bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec),
-                            GFP_ATOMIC);
+       if (gfpflags_allow_blocking(flags))
+               bvec = kvmalloc_array(nr_bvec, sizeof(struct bio_vec), flags);
+       else
+               bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec), flags);
        if (!bvec)
                goto out;
@@ -1989,7 +1991,8 @@ ALLOW_ERROR_INJECTION(ploop_create_bvec_from_rq, NULL); static void ploop_prepare_one_embedded_pio(struct ploop *ploop,
                                           struct pio *pio,
-                                          struct llist_head *lldeferred_pios)
+                                          struct llist_head *lldeferred_pios,
+                                          gfp_t flags)
  {
        struct ploop_rq *prq = pio->endio_cb_data;
        struct request *rq = prq->rq;
@@ -2003,9 +2006,18 @@ static void ploop_prepare_one_embedded_pio(struct ploop 
*ploop,
                 * Transform a set of bvec arrays related to bios
                 * into a single bvec array (which we can iterate).
                 */
-               bvec = ploop_create_bvec_from_rq(rq);
-               if (!bvec)
+               bvec = ploop_create_bvec_from_rq(rq, flags);
+               if (!bvec) {
+                       /*
+                        * If allocation in atomic context fails defer
+                        * it to blocking context.
+                        */
+                       if (!gfpflags_allow_blocking(flags)) {
+                               llist_add((struct llist_node *)(&pio->list), 
&ploop->pios[PLOOP_LIST_PREPARE]);
+                               return;
+                       }
                        goto err_nomem;
+               }
                prq->bvec = bvec;
  skip_bvec:
                pio->bi_iter.bi_size = blk_rq_bytes(rq);
@@ -2044,7 +2056,7 @@ static void ploop_prepare_embedded_pios(struct ploop 
*ploop,
                pio = list_entry((struct list_head *)pos, typeof(*pio), list);
                INIT_LIST_HEAD(&pio->list); /* until type is changed */
                if (pio->queue_list_id != PLOOP_LIST_FLUSH)
-                       ploop_prepare_one_embedded_pio(ploop, pio, 
deferred_pios);
+                       ploop_prepare_one_embedded_pio(ploop, pio, 
deferred_pios, GFP_NOIO);
                else
                        llist_add((struct llist_node *)(&pio->list),
                                  &ploop->pios[PLOOP_LIST_FLUSH]);
@@ -2615,7 +2627,7 @@ static void ploop_submit_embedded_pio(struct ploop 
*ploop, struct pio *pio)
                goto out;
        }
- ploop_prepare_one_embedded_pio(ploop, pio, &deferred_pios);
+       ploop_prepare_one_embedded_pio(ploop, pio, &deferred_pios, GFP_ATOMIC | 
__GFP_NOWARN);

Here only ploop_clone_and_map->ploop_submit_embedded_pio path requires GFP_ATOMIC, the ploop_submit_embedded_pios->ploop_submit_embedded_pio should not have GFP_ATOMIC. We need to propagate flags to the caller to fix that.

        /*
         * Disable fast path due to rcu lockups fs -> ploop -> fs - fses are 
not reentrant
         * we can however try another fast path skip dispatcher thread and pass 
directly to

--
Best regards, Pavel Tikhomirov
Senior Software Developer, Virtuozzo.

_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to