LGTM

Reviewed-by: Andrey Zhadchenko <andrey.zhadche...@virtuozzo.com>
(or signed-by?)

On 7/7/25 10:50, Pavel Tikhomirov wrote:
Recently we noticed that fstrim does not work properly on ploop images.
The problem is actually worse: current code preallocates excessive space
on the image and loses it later.

Example:
   [root@vz9-demens-1 fstrim]# du -h .
   4.0K  .
   [root@vz9-demens-1 fstrim]# ploop init -s 10G image
   ...
   [root@vz9-demens-1 fstrim]# du -h .
   513M  .
   [root@vz9-demens-1 fstrim]# ploop mount -m /mnt/fstrim/ DiskDescriptor.xml
   ...
   [root@vz9-demens-1 fstrim]# dd if=/dev/urandom of=/mnt/fstrim/file bs=1M 
count=1024 oflag=direct
   1024+0 records in
   1024+0 records out
   1073741824 bytes (1.1 GB, 1.0 GiB) copied, 4.93694 s, 217 MB/s
   [root@vz9-demens-1 fstrim]# ploop umount DiskDescriptor.xml
   ...
   [root@vz9-demens-1 fstrim]# du -h .
   8.7G  .
   [root@vz9-demens-1 fstrim]# ploop mount -m /mnt/fstrim/ DiskDescriptor.xml
   ...
   [root@vz9-demens-1 fstrim]# rm -rf /mnt/fstrim/file
   [root@vz9-demens-1 fstrim]# fstrim /mnt/fstrim
   [root@vz9-demens-1 fstrim]# ploop umount DiskDescriptor.xml
   ...
   [root@vz9-demens-1 fstrim]# du -h .
   7.7G  .

The image just after the creation was already 512M. After writing 1GB
data it was almost 9GB in size. The fstrim discarded only known blocks, so
final size is 7.7GB.

Mainly two factors contribute to that:
  - file_preallocated_area_start is never updated
  - parallel preallocation requests scale preallocation size by 128M
    each time

To fix this, the patch changes preallocation logic a bit:

- Make preallocations absolute instead of relative in
   ploop_req_prealloc(). Thus only request what is really needed.
- Rename s/ploop_pending_prealloc/ploop_no_pending_prealloc/ as it
   returns true in case of no pending preallocation.
- Make ploop_preallocate_cluster() return error on unexpected file size
   change. Also simplify the logic a little bit (e.g. more is excess).
- Preallocation in ploop_allocate_cluster() should not depend on (pos <
   prealloc_start), so always try to preallocate. Also simplify
   ploop_allocate_cluster() a little bit (e.g. old_size is excess).
- Update file_preallocated_area_start after allocation starts using
   preallocated space.
- Truncate preallocated space on ploop destruction.

Same example after the patch:

[root@ptikh-hci fstrim]# du -h .
4.0K    .
[root@ptikh-hci fstrim]# ploop init -s 10G image
[root@ptikh-hci fstrim]# du -h .
3.2M    .
[root@ptikh-hci fstrim]# ploop mount -m /mnt/fstrim/ DiskDescriptor.xml
...
[root@ptikh-hci fstrim]# dd if=/dev/urandom of=/mnt/fstrim/file bs=1M 
count=1024 oflag=direct
...
[root@ptikh-hci fstrim]# ploop umount DiskDescriptor.xml
...
[root@ptikh-hci fstrim]# du -h .
1.1G    .
[root@ptikh-hci fstrim]# ploop mount -m /mnt/fstrim/ DiskDescriptor.xml
...
[root@ptikh-hci fstrim]# rm -rf /mnt/fstrim/file
[root@ptikh-hci fstrim]# fstrim /mnt/fstrim
[root@ptikh-hci fstrim]# ploop umount DiskDescriptor.xml
...
[root@ptikh-hci fstrim]# du -h .
4.2M    .

Now it is much better. Empty image size is quite small, write does
not cause excessive preallocation and fstrim therefore reduces the
size.

https://virtuozzo.atlassian.net/browse/VSTOR-108868
Co-developed-by: Andrey Zhadchenko <andrey.zhadche...@virtuozzo.com>
Signed-off-by: Pavel Tikhomirov <ptikhomi...@virtuozzo.com>
---
v3: This is rework of v2 which makes it possible to apply it in
ready-kernel (no ploop structure changes). Also it covers some new
cases, like resetting the state on unexpected file size change. In v2
preemptive_prealloc was not reset and can probably leed to problems.
v4: fix compilation and incorporate v2 description
---
  drivers/md/dm-ploop-map.c    | 75 ++++++++++++++++++------------------
  drivers/md/dm-ploop-target.c |  6 +++
  2 files changed, 43 insertions(+), 38 deletions(-)

diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
index 11767dd91950..0c30f2cdd091 100644
--- a/drivers/md/dm-ploop-map.c
+++ b/drivers/md/dm-ploop-map.c
@@ -385,11 +385,16 @@ static void ploop_schedule_work(struct ploop *ploop)
        wake_up_process(ploop->kt_worker->task);
  }
-void ploop_req_prealloc(struct ploop *ploop, loff_t newlen)
+static void ploop_req_prealloc(struct ploop *ploop, loff_t size)
  {
+       struct ploop_delta *top = ploop_top_delta(ploop);
+
        lockdep_assert_held(&ploop->bat_lock);
- ploop->prealloc_size += newlen;
+       size = ALIGN(size + PREALLOC_SIZE / 2, PREALLOC_SIZE);
+       if (top->file_size + ploop->prealloc_in_progress + ploop->prealloc_size 
>= size)
+               return;
+       ploop->prealloc_size = size - top->file_size - 
ploop->prealloc_in_progress;
        wake_up_process(ploop->kt_allocator->task);
  }
@@ -1170,46 +1175,43 @@ ALLOW_ERROR_INJECTION(ploop_truncate_prealloc_safe, ERRNO);
  static int ploop_preallocate_cluster(struct ploop *ploop, struct file *file)
  {
        struct ploop_delta *top = ploop_top_delta(ploop);
-       loff_t end;
+       loff_t new_len;
        unsigned long flags;
-       int ret, more = 0;
+       int ret;
-prealloc_more:
        spin_lock_irqsave(&ploop->bat_lock, flags);
+prealloc_more:
        ploop->prealloc_in_progress = ploop->prealloc_size;
-       end = top->file_size + ploop->prealloc_in_progress;
-       loff_t new_len = ALIGN(end, ploop->prealloc_in_progress);
        ploop->prealloc_size = 0;
-       if (!ploop->prealloc_in_progress)
-               new_len = 0;
-       spin_unlock_irqrestore(&ploop->bat_lock, flags);
-       if (!new_len)
+       if (!ploop->prealloc_in_progress) {
+               spin_unlock_irqrestore(&ploop->bat_lock, flags);
                return 0;
+       }
+       new_len = top->file_size + ploop->prealloc_in_progress;
+       spin_unlock_irqrestore(&ploop->bat_lock, flags);
ret = ploop_truncate_prealloc_safe(ploop, top, top->file_size,
                                           new_len, file, __func__);
        if (ret) {
                PL_ERR("Failed to preallocate space: %d\n", ret);
+               spin_lock_irqsave(&ploop->bat_lock, flags);
                goto out;
        }
- /* here must be the only place to change file_size */
+       /*
+        * Here must be the only place to change file_size.
+        */
        spin_lock_irqsave(&ploop->bat_lock, flags);
        if (top->file_size < new_len) {
                top->file_size = new_len;
        } else {
                PL_ERR("unexpected file size change\n");
+               ret = -EIO;
+               goto out;
        }
        if (ploop->prealloc_size)
-               more = 1;
-       spin_unlock_irqrestore(&ploop->bat_lock, flags);
-       if (more) {
-               more = 0;
                goto prealloc_more;
-       }
-
  out:
-       spin_lock_irqsave(&ploop->bat_lock, flags);
        ploop->prealloc_in_progress = 0;
        ploop->prealloc_size = 0;
        spin_unlock_irqrestore(&ploop->bat_lock, flags);
@@ -1223,7 +1225,6 @@ ALLOW_ERROR_INJECTION(ploop_preallocate_cluster, ERRNO);
void ploop_should_prealloc(struct ploop *ploop, struct file *file)
  {
-       struct ploop_delta *top = ploop_top_delta(ploop);
        u32 dst_clu;
        u32 clu_size = CLU_SIZE(ploop);
        loff_t pos, end;
@@ -1237,13 +1238,12 @@ void ploop_should_prealloc(struct ploop *ploop, struct 
file *file)
pos = CLU_TO_POS(ploop, dst_clu);
        end = pos + clu_size;
-       if (end > top->file_preallocated_area_start - (PREALLOC_SIZE/2)) {
-               ploop_req_prealloc(ploop, PREALLOC_SIZE);
-       }
+
+       ploop_req_prealloc(ploop, end);
        spin_unlock_irqrestore(&ploop->bat_lock, flags);
  }
-static int ploop_pending_prealloc(struct ploop *ploop)
+static int ploop_no_pending_prealloc(struct ploop *ploop)
  {
        int ret;
        unsigned long flags;
@@ -1259,7 +1259,7 @@ static int ploop_allocate_cluster(struct ploop *ploop, 
u32 *dst_clu, struct file
  {
        struct ploop_delta *top = ploop_top_delta(ploop);
        u32 clu_size = CLU_SIZE(ploop);
-       loff_t off, pos, end, old_size;
+       loff_t off, pos, end;
        unsigned long flags;
        int ret;
        int retry_cnt = 0;
@@ -1278,21 +1278,15 @@ static int ploop_allocate_cluster(struct ploop *ploop, 
u32 *dst_clu, struct file
         */
        ploop_hole_clear_bit(*dst_clu, ploop);
- old_size = top->file_size;
        prealloc_start = top->file_preallocated_area_start;
        pos = CLU_TO_POS(ploop, *dst_clu);
        end = pos + clu_size;
-       off = min_t(loff_t, old_size, end);
+       off = min_t(loff_t, top->file_size, end);
+       ploop_req_prealloc(ploop, end);
+
        spin_unlock_irqrestore(&ploop->bat_lock, flags);
if (pos < prealloc_start) {
-               if (end + clu_size >
-                   top->file_preallocated_area_start - (PREALLOC_SIZE/2)) {
-                       spin_lock_irqsave(&ploop->bat_lock, flags);
-                       ploop_req_prealloc(ploop, PREALLOC_SIZE);
-                       spin_unlock_irqrestore(&ploop->bat_lock, flags);
-               }
-
                /* Clu at @pos may contain dirty data */
                if (!ploop->falloc_new_clu)
                        ret = ploop_punch_hole(file, pos, off - pos);
@@ -1316,13 +1310,16 @@ static int ploop_allocate_cluster(struct ploop *ploop, 
u32 *dst_clu, struct file
  retry_alloc:
        spin_lock_irqsave(&ploop->bat_lock, flags);
        /* size can change from parallel alloc */
-       old_size = top->file_size;
-       if (end > old_size) {
-               ploop_req_prealloc(ploop, PREALLOC_SIZE);
+       if (end > top->file_size) {
+               /*
+                * Reset preallocation, in case ploop_preallocate_cluster
+                * dropped it on error path.
+                */
+               ploop_req_prealloc(ploop, end);
                spin_unlock_irqrestore(&ploop->bat_lock, flags);
wait_event_interruptible(ploop->dispatcher_wq_prealloc,
-                                        ploop_pending_prealloc(ploop));
+                                        ploop_no_pending_prealloc(ploop));
spin_lock_irqsave(&ploop->bat_lock, flags);
                if (end > top->file_size) {
@@ -1339,6 +1336,8 @@ static int ploop_allocate_cluster(struct ploop *ploop, 
u32 *dst_clu, struct file
                }
        }
+ if (end > top->file_preallocated_area_start)
+               top->file_preallocated_area_start = end;
        spin_unlock_irqrestore(&ploop->bat_lock, flags);
return 0;
diff --git a/drivers/md/dm-ploop-target.c b/drivers/md/dm-ploop-target.c
index 8fa0043a7142..ea5f3a8dab6b 100644
--- a/drivers/md/dm-ploop-target.c
+++ b/drivers/md/dm-ploop-target.c
@@ -162,6 +162,7 @@ static bool ploop_empty_htable(struct hlist_head head[])
static void ploop_destroy(struct ploop *ploop)
  {
+       struct ploop_delta *top = ploop_top_delta(ploop);
        int i;
if (ploop->kt_worker) {
@@ -201,6 +202,11 @@ static void ploop_destroy(struct ploop *ploop)
for (i = 0; i < 2; i++)
                percpu_ref_exit(&ploop->inflight_bios_ref[i]);
+
+       if (top->file_preallocated_area_start < top->file_size &&
+           vfs_truncate2(&top->file->f_path, top->file_preallocated_area_start, 
top->file))
+               PL_ERR("Unable to truncate preallocated area on destroy");
+
        /* Nobody uses it after destroy_workqueue() */
        while (ploop->nr_deltas-- > 0) {
                if (ploop->deltas[ploop->nr_deltas].file) {

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to