Re: [PATCH v2 09/11] qcow2: make subclusters discardable

2024-05-21 Thread Alexander Ivanov




On 5/13/24 08:32, Andrey Drobyshev wrote:

This commit makes the discard operation work on the subcluster level
rather than cluster level.  It introduces discard_l2_subclusters()
function and makes use of it in qcow2 discard implementation, much like
it's done with zero_in_l2_slice() / zero_l2_subclusters().  It also
changes the qcow2 driver pdiscard_alignment to subcluster_size.  That
way subcluster-aligned discards lead to actual fallocate(PUNCH_HOLE)
operation and free host disk space.

This feature will let us gain additional disk space on guest
TRIM/discard requests, especially when using large enough clusters
(1M, 2M) with subclusters enabled.

Also rename qcow2_cluster_discard() -> qcow2_subcluster_discard() to
reflect the change.

Signed-off-by: Andrey Drobyshev 
---
  block/qcow2-cluster.c  | 106 +
  block/qcow2-snapshot.c |   6 +--
  block/qcow2.c  |  25 +-
  block/qcow2.h  |   4 +-
  tests/qemu-iotests/271 |   2 +-
  5 files changed, 117 insertions(+), 26 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 8d39e2f960..3c134a7e80 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -2105,25 +2105,106 @@ discard_in_l2_slice(BlockDriverState *bs, uint64_t 
offset, uint64_t nb_clusters,
  return nb_clusters;
  }
  
-int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset,

-  uint64_t bytes, enum qcow2_discard_type type,
-  bool full_discard)
+static int coroutine_fn GRAPH_RDLOCK
+discard_l2_subclusters(BlockDriverState *bs, uint64_t offset,
+   uint64_t nb_subclusters,
+   enum qcow2_discard_type type,
+   bool full_discard)
+{
+BDRVQcow2State *s = bs->opaque;
+uint64_t new_l2_bitmap, bitmap_alloc_mask, bitmap_zero_mask;
+int ret, sc = offset_to_sc_index(s, offset);
+g_auto(SubClusterRangeInfo) scri = { 0 };
+
+ret = get_sc_range_info(bs, offset, nb_subclusters, );
+if (ret < 0) {
+return ret;
+}
+
+new_l2_bitmap = scri.l2_bitmap;
+bitmap_alloc_mask = QCOW_OFLAG_SUB_ALLOC_RANGE(sc, sc + nb_subclusters);
+bitmap_zero_mask = QCOW_OFLAG_SUB_ZERO_RANGE(sc, sc + nb_subclusters);
+
+new_l2_bitmap &= ~bitmap_alloc_mask;
+
+/*
+ * Full discard means we fall through to the backing file, thus we need
+ * to mark the subclusters as deallocated and clear the corresponding
+ * zero bits.
+ *
+ * Non-full discard means subclusters should be explicitly marked as
+ * zeroes.  In this case QCOW2 specification requires the corresponding
+ * allocation status bits to be unset as well.  If the subclusters are
+ * deallocated in the first place and there's no backing, the operation
+ * can be skipped.
+ */
+if (full_discard) {
+new_l2_bitmap &= ~bitmap_zero_mask;
+} else if (bs->backing || scri.l2_bitmap & bitmap_alloc_mask) {
+new_l2_bitmap |= bitmap_zero_mask;
+}
+
+/*
+ * If after discarding this range there won't be any allocated subclusters
+ * left, and new bitmap becomes the same as it'd be after discarding the
+ * whole cluster, we better discard it entirely.  That way we'd also
+ * update the refcount table.
+ */
+if ((full_discard && new_l2_bitmap == 0) ||
+(!full_discard && new_l2_bitmap == QCOW_L2_BITMAP_ALL_ZEROES)) {
+return discard_in_l2_slice(
+bs, QEMU_ALIGN_DOWN(offset, s->cluster_size),
+1, type, full_discard);
+}
+
+if (scri.l2_bitmap != new_l2_bitmap) {
+set_l2_bitmap(s, scri.l2_slice, scri.l2_index, new_l2_bitmap);
+qcow2_cache_entry_mark_dirty(s->l2_table_cache, scri.l2_slice);
+}
+
+discard_no_unref_any_file(
+bs, (scri.l2_entry & L2E_OFFSET_MASK) + offset_into_cluster(s, offset),
+nb_subclusters * s->subcluster_size, scri.ctype, type);
+
+return 0;
+}
+
+int qcow2_subcluster_discard(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, enum qcow2_discard_type type,
+ bool full_discard)
  {
  BDRVQcow2State *s = bs->opaque;
  uint64_t end_offset = offset + bytes;
  uint64_t nb_clusters;
+unsigned head, tail;
  int64_t cleared;
  int ret;
  
  /* Caller must pass aligned values, except at image end */

-assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
-assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) ||
+assert(QEMU_IS_ALIGNED(offset, s->subcluster_size));
+assert(QEMU_IS_ALIGNED(end_offset, s->subcluster_size) ||
 end_offset == bs->total_sectors << BDRV_SECTOR_BITS);
  
-nb_clusters = size_to_clusters(s, bytes);

+head = MIN(end_offset, ROUND_UP(offset, s->cluster_size)) - offset;
+offset += head;
+
+tail = (end_offset >= bs->total_sectors << BDRV_SECTOR_BITS) ? 0 :
+   end_offset - 

[PATCH v2 09/11] qcow2: make subclusters discardable

2024-05-13 Thread Andrey Drobyshev
This commit makes the discard operation work on the subcluster level
rather than cluster level.  It introduces discard_l2_subclusters()
function and makes use of it in qcow2 discard implementation, much like
it's done with zero_in_l2_slice() / zero_l2_subclusters().  It also
changes the qcow2 driver pdiscard_alignment to subcluster_size.  That
way subcluster-aligned discards lead to actual fallocate(PUNCH_HOLE)
operation and free host disk space.

This feature will let us gain additional disk space on guest
TRIM/discard requests, especially when using large enough clusters
(1M, 2M) with subclusters enabled.

Also rename qcow2_cluster_discard() -> qcow2_subcluster_discard() to
reflect the change.

Signed-off-by: Andrey Drobyshev 
---
 block/qcow2-cluster.c  | 106 +
 block/qcow2-snapshot.c |   6 +--
 block/qcow2.c  |  25 +-
 block/qcow2.h  |   4 +-
 tests/qemu-iotests/271 |   2 +-
 5 files changed, 117 insertions(+), 26 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 8d39e2f960..3c134a7e80 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -2105,25 +2105,106 @@ discard_in_l2_slice(BlockDriverState *bs, uint64_t 
offset, uint64_t nb_clusters,
 return nb_clusters;
 }
 
-int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset,
-  uint64_t bytes, enum qcow2_discard_type type,
-  bool full_discard)
+static int coroutine_fn GRAPH_RDLOCK
+discard_l2_subclusters(BlockDriverState *bs, uint64_t offset,
+   uint64_t nb_subclusters,
+   enum qcow2_discard_type type,
+   bool full_discard)
+{
+BDRVQcow2State *s = bs->opaque;
+uint64_t new_l2_bitmap, bitmap_alloc_mask, bitmap_zero_mask;
+int ret, sc = offset_to_sc_index(s, offset);
+g_auto(SubClusterRangeInfo) scri = { 0 };
+
+ret = get_sc_range_info(bs, offset, nb_subclusters, );
+if (ret < 0) {
+return ret;
+}
+
+new_l2_bitmap = scri.l2_bitmap;
+bitmap_alloc_mask = QCOW_OFLAG_SUB_ALLOC_RANGE(sc, sc + nb_subclusters);
+bitmap_zero_mask = QCOW_OFLAG_SUB_ZERO_RANGE(sc, sc + nb_subclusters);
+
+new_l2_bitmap &= ~bitmap_alloc_mask;
+
+/*
+ * Full discard means we fall through to the backing file, thus we need
+ * to mark the subclusters as deallocated and clear the corresponding
+ * zero bits.
+ *
+ * Non-full discard means subclusters should be explicitly marked as
+ * zeroes.  In this case QCOW2 specification requires the corresponding
+ * allocation status bits to be unset as well.  If the subclusters are
+ * deallocated in the first place and there's no backing, the operation
+ * can be skipped.
+ */
+if (full_discard) {
+new_l2_bitmap &= ~bitmap_zero_mask;
+} else if (bs->backing || scri.l2_bitmap & bitmap_alloc_mask) {
+new_l2_bitmap |= bitmap_zero_mask;
+}
+
+/*
+ * If after discarding this range there won't be any allocated subclusters
+ * left, and new bitmap becomes the same as it'd be after discarding the
+ * whole cluster, we better discard it entirely.  That way we'd also
+ * update the refcount table.
+ */
+if ((full_discard && new_l2_bitmap == 0) ||
+(!full_discard && new_l2_bitmap == QCOW_L2_BITMAP_ALL_ZEROES)) {
+return discard_in_l2_slice(
+bs, QEMU_ALIGN_DOWN(offset, s->cluster_size),
+1, type, full_discard);
+}
+
+if (scri.l2_bitmap != new_l2_bitmap) {
+set_l2_bitmap(s, scri.l2_slice, scri.l2_index, new_l2_bitmap);
+qcow2_cache_entry_mark_dirty(s->l2_table_cache, scri.l2_slice);
+}
+
+discard_no_unref_any_file(
+bs, (scri.l2_entry & L2E_OFFSET_MASK) + offset_into_cluster(s, offset),
+nb_subclusters * s->subcluster_size, scri.ctype, type);
+
+return 0;
+}
+
+int qcow2_subcluster_discard(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, enum qcow2_discard_type type,
+ bool full_discard)
 {
 BDRVQcow2State *s = bs->opaque;
 uint64_t end_offset = offset + bytes;
 uint64_t nb_clusters;
+unsigned head, tail;
 int64_t cleared;
 int ret;
 
 /* Caller must pass aligned values, except at image end */
-assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
-assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) ||
+assert(QEMU_IS_ALIGNED(offset, s->subcluster_size));
+assert(QEMU_IS_ALIGNED(end_offset, s->subcluster_size) ||
end_offset == bs->total_sectors << BDRV_SECTOR_BITS);
 
-nb_clusters = size_to_clusters(s, bytes);
+head = MIN(end_offset, ROUND_UP(offset, s->cluster_size)) - offset;
+offset += head;
+
+tail = (end_offset >= bs->total_sectors << BDRV_SECTOR_BITS) ? 0 :
+   end_offset - MAX(offset, start_of_cluster(s, end_offset));
+end_offset -= tail;