Re: [Qemu-block] [PATCH 1/7] qcow2: use one single memory block for the L2/refcount cache tables

2015-05-08 Thread Max Reitz

On 06.05.2015 15:39, Alberto Garcia wrote:

The qcow2 L2/refcount cache contains one separate table for each cache
entry. Doing one allocation per table adds unnecessary overhead and it
also requires us to store the address of each table separately.

Since the size of the cache is constant during its lifetime, it's
better to have an array that contains all the tables using one single
allocation.

In my tests measuring freshly created caches with sizes 128MB (L2) and
32MB (refcount) this uses around 10MB of RAM less.

Signed-off-by: Alberto Garcia be...@igalia.com
---
  block/qcow2-cache.c| 55 --
  block/qcow2-cluster.c  | 12 +--
  block/qcow2-refcount.c |  8 +---
  block/qcow2.h  |  3 ++-
  4 files changed, 39 insertions(+), 39 deletions(-)


Reviewed-by: Max Reitz mre...@redhat.com



[Qemu-block] [PATCH 1/7] qcow2: use one single memory block for the L2/refcount cache tables

2015-05-06 Thread Alberto Garcia
The qcow2 L2/refcount cache contains one separate table for each cache
entry. Doing one allocation per table adds unnecessary overhead and it
also requires us to store the address of each table separately.

Since the size of the cache is constant during its lifetime, it's
better to have an array that contains all the tables using one single
allocation.

In my tests measuring freshly created caches with sizes 128MB (L2) and
32MB (refcount) this uses around 10MB of RAM less.

Signed-off-by: Alberto Garcia be...@igalia.com
---
 block/qcow2-cache.c| 55 --
 block/qcow2-cluster.c  | 12 +--
 block/qcow2-refcount.c |  8 +---
 block/qcow2.h  |  3 ++-
 4 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index b115549..f0dfb69 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -28,7 +28,6 @@
 #include trace.h
 
 typedef struct Qcow2CachedTable {
-void*   table;
 int64_t offset;
 booldirty;
 int cache_hits;
@@ -40,39 +39,35 @@ struct Qcow2Cache {
 struct Qcow2Cache*  depends;
 int size;
 booldepends_on_flush;
+void   *table_array;
 };
 
+static inline void *qcow2_cache_get_table_addr(BlockDriverState *bs,
+Qcow2Cache *c, int table)
+{
+BDRVQcowState *s = bs-opaque;
+return (uint8_t *) c-table_array + (size_t) table * s-cluster_size;
+}
+
 Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
 {
 BDRVQcowState *s = bs-opaque;
 Qcow2Cache *c;
-int i;
 
 c = g_new0(Qcow2Cache, 1);
 c-size = num_tables;
 c-entries = g_try_new0(Qcow2CachedTable, num_tables);
-if (!c-entries) {
-goto fail;
-}
-
-for (i = 0; i  c-size; i++) {
-c-entries[i].table = qemu_try_blockalign(bs-file, s-cluster_size);
-if (c-entries[i].table == NULL) {
-goto fail;
-}
+c-table_array = qemu_try_blockalign(bs-file,
+ (size_t) num_tables * 
s-cluster_size);
+
+if (!c-entries || !c-table_array) {
+qemu_vfree(c-table_array);
+g_free(c-entries);
+g_free(c);
+c = NULL;
 }
 
 return c;
-
-fail:
-if (c-entries) {
-for (i = 0; i  c-size; i++) {
-qemu_vfree(c-entries[i].table);
-}
-}
-g_free(c-entries);
-g_free(c);
-return NULL;
 }
 
 int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c)
@@ -81,9 +76,9 @@ int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c)
 
 for (i = 0; i  c-size; i++) {
 assert(c-entries[i].ref == 0);
-qemu_vfree(c-entries[i].table);
 }
 
+qemu_vfree(c-table_array);
 g_free(c-entries);
 g_free(c);
 
@@ -151,8 +146,8 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, 
Qcow2Cache *c, int i)
 BLKDBG_EVENT(bs-file, BLKDBG_L2_UPDATE);
 }
 
-ret = bdrv_pwrite(bs-file, c-entries[i].offset, c-entries[i].table,
-s-cluster_size);
+ret = bdrv_pwrite(bs-file, c-entries[i].offset,
+  qcow2_cache_get_table_addr(bs, c, i), s-cluster_size);
 if (ret  0) {
 return ret;
 }
@@ -304,7 +299,8 @@ static int qcow2_cache_do_get(BlockDriverState *bs, 
Qcow2Cache *c,
 BLKDBG_EVENT(bs-file, BLKDBG_L2_LOAD);
 }
 
-ret = bdrv_pread(bs-file, offset, c-entries[i].table, 
s-cluster_size);
+ret = bdrv_pread(bs-file, offset, qcow2_cache_get_table_addr(bs, c, 
i),
+ s-cluster_size);
 if (ret  0) {
 return ret;
 }
@@ -319,7 +315,7 @@ static int qcow2_cache_do_get(BlockDriverState *bs, 
Qcow2Cache *c,
 found:
 c-entries[i].cache_hits++;
 c-entries[i].ref++;
-*table = c-entries[i].table;
+*table = qcow2_cache_get_table_addr(bs, c, i);
 
 trace_qcow2_cache_get_done(qemu_coroutine_self(),
c == s-l2_table_cache, i);
@@ -344,7 +340,7 @@ int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, 
void **table)
 int i;
 
 for (i = 0; i  c-size; i++) {
-if (c-entries[i].table == *table) {
+if (qcow2_cache_get_table_addr(bs, c, i) == *table) {
 goto found;
 }
 }
@@ -358,12 +354,13 @@ found:
 return 0;
 }
 
-void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table)
+void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c,
+ void *table)
 {
 int i;
 
 for (i = 0; i  c-size; i++) {
-if (c-entries[i].table == table) {
+if (qcow2_cache_get_table_addr(bs, c, i) == table) {
 goto found;
 }
 }
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index ed2b44d..5cd418a 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -263,7 +263,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, 
uint64_t