The hash store allow to do lookup in the set of previously frozen log stores.
Each frozen log store is called an incarnation. When the number of incarnation
reach a user defined setting oldest incarnations are dropped to avoid that the
dedup metadata grow without limit.

Signed-off-by: Benoit Canet <ben...@irqsave.net>
---
 block/Makefile.objs      |    2 +-
 block/qcow2-hash-store.c |  802 ++++++++++++++++++++++++++++++++++++++++++++++
 block/qcow2-log-store.c  |  242 ++++++++++++++
 block/qcow2.h            |   38 +++
 4 files changed, 1083 insertions(+), 1 deletion(-)
 create mode 100644 block/qcow2-hash-store.c

diff --git a/block/Makefile.objs b/block/Makefile.objs
index 8c0b646..cafd4f8 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -1,6 +1,6 @@
 block-obj-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o 
vvfat.o
 block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o 
qcow2-cache.o
-block-obj-y += qcow2-journal.o qcow2-log-store.o
+block-obj-y += qcow2-journal.o qcow2-log-store.o qcow2-hash-store.o
 block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
 block-obj-y += vhdx.o
diff --git a/block/qcow2-hash-store.c b/block/qcow2-hash-store.c
new file mode 100644
index 0000000..5284740
--- /dev/null
+++ b/block/qcow2-hash-store.c
@@ -0,0 +1,802 @@
+/*
+ * QCOW2 hash store
+ *
+ * Copyright (C) Nodalink, SARL. 2013
+ *
+ * Author:
+ *   Benoît Canet <benoit.ca...@irqsave.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "block/qcow2.h"
+
+/* This function compute the number of bytes required to store a tag in hash
+ * store filters
+ *
+ * This function will only return values in the set [2, 4, 8] matching
+ * convenient compiler type sizes.
+ *
+ * @order: order of the hash store
+ * @ret:   the number of byte required to store a tag
+ */
+int qcow2_hash_store_get_tag_size(uint32_t order)
+{
+    if (order < 16) {
+        return 2;
+    }
+
+    if (order < 32) {
+        return 4;
+    }
+
+    return 8;
+}
+
+static uint64_t qcow2_hash_store_round_to_cluster(uint64_t size)
+{
+    return ((size / HASH_STORE_CLUSTER_SIZE) + 1) * HASH_STORE_CLUSTER_SIZE;
+}
+
+/* This function compute the size of an in ram filter
+ *
+ * @order: the order of the store
+ * @ret:   the size of the in ram filter in bytes
+ */
+uint64_t qcow2_hash_store_filter_size(uint32_t order)
+{
+    uint64_t size = qcow2_log_store_nb_entries(order) *
+                    qcow2_hash_store_get_tag_size(order);
+    return qcow2_hash_store_round_to_cluster(size);
+}
+
+/* this function make sure that no buckets are splitted between two clusters */
+static uint64_t qcow2_hash_store_round_to_bucket(uint64_t count)
+{
+    return (count / QCOW_LOG_STORE_BUCKET_SIZE) * QCOW_LOG_STORE_BUCKET_SIZE;
+}
+
+uint64_t qcow2_hash_store_nb_hash_info_per_cluster(void)
+{
+    uint64_t raw_nb = HASH_STORE_CLUSTER_SIZE / sizeof(QCowHashInfo);
+    return qcow2_hash_store_round_to_bucket(raw_nb);
+}
+
+/* This function compute the size of a frozen hash table that is dumped to disk
+ *
+ * @order: the order of the store
+ * @ret:   the size in bytes
+ */
+static uint64_t qcow2_hash_store_table_size(uint32_t order)
+{
+    uint32_t nb_per_cluster = qcow2_hash_store_nb_hash_info_per_cluster();
+    uint64_t count = (qcow2_log_store_nb_entries(order) / nb_per_cluster) + 1;
+    return count * HASH_STORE_CLUSTER_SIZE;
+}
+
+/* This function compute the size of the result of a log store incarnation
+ *
+ * @order: the order of the store
+ * @ret:   the size in bytes
+ */
+uint64_t qcow2_hash_store_incarnation_size(uint32_t order)
+{
+    return qcow2_hash_store_filter_size(order) +
+           qcow2_hash_store_table_size(order);
+}
+
+/* This function reset a hash store
+ *
+ * @store:           the hash store to initialize
+ * @order:           the bit order of the hash store
+ * @nb_incarnations: the number of incarnation in the hash store
+ * @nb_in_limbo:     the number of in limbo incarnation disk space to recycle
+ */
+void qcow2_hash_store_reset(QCowHashStore *store,
+                            uint32_t order,
+                            uint32_t nb_incarnations,
+                            uint32_t nb_in_limbo)
+{
+    store->order = order;
+    store->nb_incarnations = nb_incarnations;
+    store->nb_in_limbo = nb_in_limbo;
+}
+
+/* This function initialize a hash store
+ *
+ * @store:           the hash store to initialize
+ */
+void qcow2_hash_store_init(QCowHashStore *store, uint32_t order)
+{
+    store->order = order;
+    QTAILQ_INIT(&store->incarnations);
+    QTAILQ_INIT(&store->in_limbo);
+}
+
+/* This function cleanup a hash store : it free incarnation and limboes
+ *
+ * @store: the store to cleanup
+ */
+void qcow2_hash_store_cleanup(QCowHashStore *store)
+{
+    QCowIncarnation *incarnation, *next_incarnation;
+    QCowLimbo *limbo, *next_limbo;
+
+    /* destroy each incarnation structure */
+    QTAILQ_FOREACH_SAFE(incarnation,
+                        &store->incarnations,
+                        next, next_incarnation) {
+        QTAILQ_REMOVE(&store->incarnations, incarnation, next);
+        qcow2_hash_store_incarnation_free(incarnation);
+    }
+
+    /* destroy each limbo offset */
+    QTAILQ_FOREACH_SAFE(limbo, &store->in_limbo, next, next_limbo) {
+        QTAILQ_REMOVE(&store->in_limbo, limbo, next);
+        g_free(limbo);
+    }
+}
+
+/* this function read a tag from an in ram filter
+ *
+ * For explanation on purpose see qcow2_log_store_write_tag_to_filter
+ *
+ * @order:  the order of the store
+ * @index:  the index in the in ram filter
+ * @filter: the in ram filter
+ * @ret:    the tag stored at index in the in ram filter
+ */
+static uint64_t qcow2_hash_store_read_tag_from_filter(uint32_t order,
+                                                      uint64_t index,
+                                                      uint8_t *filter)
+{
+    uint16_t *filter_16 = (uint16_t *) filter;
+    uint32_t *filter_32 = (uint32_t *) filter;
+    uint64_t *filter_64 = (uint64_t *) filter;
+
+    switch (qcow2_hash_store_get_tag_size(order)) {
+    case 2:
+        return be16_to_cpu(filter_16[index]);
+    case 4:
+        return be32_to_cpu(filter_32[index]);
+    case 8:
+        return be64_to_cpu(filter_64[index]);
+    default:
+        /* should never pass here given qcow2_hash_store_get_tag_size results 
*/
+        abort();
+    }
+    return 0;
+}
+
+static bool qcow2_hash_store_max_incarnations_reached(BlockDriverState *bs,
+                                                      QCowHashStore *store)
+{
+    BDRVQcowState *s = bs->opaque;
+
+    /* max never reached if limit is zero */
+    if (!s->dedup_max_incarnations) {
+        return false;
+    }
+
+    return store->nb_incarnations >= s->dedup_max_incarnations;
+}
+
+static QCowLimbo * qcow2_hash_store_limbo_new(uint64_t offset)
+{
+    QCowLimbo *result = g_new0(QCowLimbo, 1);
+    result->offset = offset;
+    return result;
+}
+
+static QCowIncarnation *qcow2_hash_store_incarnation_new(uint8_t *filter,
+                                                         uint64_t offset,
+                                                         uint32_t order)
+{
+    QCowIncarnation *incarnation;
+
+    incarnation = g_new0(QCowIncarnation, 1);
+    incarnation->filter = filter;
+    incarnation->filter_offset = offset;
+    incarnation->hash_table_offset = offset +
+                                     qcow2_hash_store_filter_size(order);
+    incarnation->size = qcow2_hash_store_incarnation_size(order);
+
+    return incarnation;
+}
+
+void qcow2_hash_store_incarnation_free(QCowIncarnation *incarnation)
+{
+    qemu_vfree(incarnation->filter);
+    free(incarnation);
+}
+
+/* This function allocate the disk space needed for an incarnation
+ *
+ * @order: the bit order of the incarnation size
+ * @ret:   offset of allocated disk space on succes, -errno on error
+ */
+static int64_t qcow2_hash_store_allocate_incarnation_space(BlockDriverState 
*bs,
+                                                           uint32_t order)
+{
+    BDRVQcowState *s = bs->opaque;
+    uint64_t size = qcow2_hash_store_incarnation_size(order);
+    int64_t offset;
+    int ret = 0;
+
+    /* allocate from disk the combined size of the filter and the hash table */
+    offset = qcow2_alloc_clusters(bs, size);
+
+    if (offset < 0) {
+        return offset;
+    }
+
+    /* flush refcounts so we don't loose allocated space on crash */
+    ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+
+    if (ret < 0) {
+        qcow2_free_clusters(bs, offset, size);
+        return ret;
+    }
+
+    return offset;
+}
+
+/* This function get the on disk offset needed for storing an incarnation
+ *
+ * It will try to recycle in limbo incarnation space if there is any
+ * or will allocate space from disk
+ *
+ * @store: the QCowHashStore to work on
+ * @ret:   disk offset on succes, -errno on error
+ */
+int64_t qcow2_hash_store_get_incarnation_offset(BlockDriverState *bs,
+                                                QCowHashStore *store)
+{
+    QCowLimbo *limbo;
+    int64_t offset;
+
+    /* no previously allocated space in limbo -> allocate it */
+    if (QTAILQ_EMPTY(&store->in_limbo)) {
+        return qcow2_hash_store_allocate_incarnation_space(bs, store->order);
+    }
+
+    /* take oldest in limbo allocated space and remove it from list */
+    limbo = QTAILQ_LAST(&store->in_limbo, in_limbo_head);
+    QTAILQ_REMOVE(&store->in_limbo, limbo, next);
+    store->nb_in_limbo--;
+
+    offset = limbo->offset;
+    free(limbo);
+
+    /* TODO: save new configuration to disk */
+
+    return offset;
+}
+
+/* This function will forget the oldest incarnation and put it's disk space
+ * offset in the limbo list
+ *
+ * note: this function does not save the new configuration to disk as it's
+ * caller will do it
+ *
+ * store: the hash store to work on
+ */
+static void qcow2_hash_store_forget_oldest_incarnation(QCowHashStore *store)
+{
+    QCowIncarnation *incarnation;
+    QCowLimbo *limbo;
+
+    /* get oldest elements of the incarnation list */
+    incarnation = QTAILQ_LAST(&store->incarnations, incarnations_head);
+
+    /* remove it from incarnation list */
+    QTAILQ_REMOVE(&store->incarnations, incarnation, next);
+    store->nb_incarnations--;
+
+    /* transform the incarnation disk space in a to reincarnate space */
+    limbo = qcow2_hash_store_limbo_new(incarnation->filter_offset);
+
+    /* insert it in the limbo list */
+    QTAILQ_INSERT_HEAD(&store->in_limbo, limbo, next);
+    store->nb_in_limbo++;
+
+    /* free the incarnation from ram */
+    qcow2_hash_store_incarnation_free(incarnation);
+}
+
+/* This functions forget all the incarnations and recycle them as limbo
+ *
+ * note: the called must save the new configuration
+ *
+ * @store: the QCowHashStore to work on
+ */
+void qcow2_hash_store_forget_all_incarnations(QCowHashStore *store)
+{
+    while (store->nb_incarnations) {
+        qcow2_hash_store_forget_oldest_incarnation(store);
+    }
+
+    /* TODO: need to save configuration to disk */
+}
+
+/* This function will add a newly created incarnation to a hash store
+ *
+ * @store:  the hash store to work on
+ * @filter: the in ram filter of the incarnation
+ * @offset: the on disk offset of the incarnation
+ */
+void qcow2_hash_store_add_incarnation(BlockDriverState *bs,
+                                      QCowHashStore *store,
+                                      uint8_t *filter,
+                                      uint64_t offset)
+{
+    QCowIncarnation *incarnation;
+
+    /* behave as a FIFO if the maximum number of incarnation is reached */
+    while (qcow2_hash_store_max_incarnations_reached(bs, store)) {
+        qcow2_hash_store_forget_oldest_incarnation(store);
+    }
+
+    /* create new incarnation */
+    incarnation = qcow2_hash_store_incarnation_new(filter,
+                                                   offset,
+                                                   store->order);
+
+    /* insert at the begining of the list */
+    QTAILQ_INSERT_HEAD(&store->incarnations, incarnation, next);
+    store->nb_incarnations++;
+
+    /* TODO: save new configuration to disk */
+}
+
+/* This function copy the QCowHashInfo from a buffer if the hash is the same
+ *
+ * @hash_info:  the QCowHashInfo we are trying to read the infos for
+ * @buf:        a buffer containing QCowHashInfos
+ * @in_buf_idx: the offset in the buffer in sizeof(QCowHashInfo) chunks
+ * @ret:        true if found else false
+ */
+static bool qcow2_hash_store_get_hash_info(QCowHashInfo *hash_info,
+                                           uint8_t *buf,
+                                           uint64_t in_buf_idx)
+{
+    QCowHashInfo *info;
+    bool found = false;
+
+    info = (QCowHashInfo *) buf;
+
+    /* compare QCowHash to check if the hash info is found */
+    found = !memcmp(&hash_info->hash,
+                    &info[in_buf_idx].hash,
+                    sizeof(QCowHash));
+
+    /* if hash found copy the associated informations */
+    if (found) {
+        hash_info->physical_sect =
+            be64_to_cpu(info[in_buf_idx].physical_sect);
+        hash_info->first_logical_sect =
+            be64_to_cpu(info[in_buf_idx].first_logical_sect);
+    }
+
+    return found;
+}
+
+/* compute disk offset of the cluster to read  */
+static uint64_t qcow2_hash_store_buf_offset(uint64_t hash_table_offset,
+                                            uint64_t in_table_idx)
+{
+    uint32_t nb_in_cluster = qcow2_hash_store_nb_hash_info_per_cluster();
+    uint64_t cluster_index = in_table_idx / nb_in_cluster;
+    return hash_table_offset + cluster_index * HASH_STORE_CLUSTER_SIZE;
+}
+
+/* allocate buffer and read cluster from disk if *buf == NULL
+ *
+ * @offfset: the on disk offset of the cluster to read
+ * @buf:     the pointer to return the allocated area into
+ * @ret:     0 on nop, >0 on success and -errno on error
+ */
+static int qcow2_hash_store_cache_cluster(BlockDriverState *bs,
+                                          uint64_t offset,
+                                          uint8_t **buf)
+{
+    /* buffer already allocated and filed with disk data */
+    if (*buf) {
+        return 0;
+    }
+
+    /* allocate buffer */
+    *buf = qemu_blockalign(bs, HASH_STORE_CLUSTER_SIZE);
+
+    /* read cluster from disk */
+    return bdrv_pread(bs->file,
+                      offset,
+                      *buf,
+                      HASH_STORE_CLUSTER_SIZE);
+}
+
+/* this function probe the filter for a given tag and try to read from disk
+ * the QCowHashInfo if the tag match.
+ *
+ * @store:        the QCowHashStore we work with
+ * @incarnation:  the incarnation to work with
+ * @in_table_idx: the index in the filter and the hash table
+ * @tag:          the tag to try to match
+ * @hash_info:    the QCowHashInfo we are trying to complete
+ * @buf:          a pointer to pass a read buffer between the calls
+ * @ret:          1 on if found, 0 if not, -errno on error
+ */
+static int qcow2_hash_store_probe_read_entry(BlockDriverState *bs,
+                                             QCowHashStore *store,
+                                             QCowIncarnation *incarnation,
+                                             uint64_t in_table_idx,
+                                             uint64_t tag,
+                                             QCowHashInfo *hash_info,
+                                             uint8_t **buf)
+{
+    uint64_t filter_tag;
+    uint64_t in_buf_idx;
+    uint64_t buf_offset;
+    int ret = 0;
+
+    filter_tag = qcow2_hash_store_read_tag_from_filter(store->order,
+                                                       in_table_idx,
+                                                       incarnation->filter);
+
+    /* no matching tag found */
+    if (tag != filter_tag) {
+        return 0;
+    }
+
+    /* the code will allocate the buffer and read the cluster content from disk
+     * the first time it pass here (*buf == NULL)
+     */
+    buf_offset = qcow2_hash_store_buf_offset(incarnation->hash_table_offset,
+                                             in_table_idx);
+    ret = qcow2_hash_store_cache_cluster(bs,
+                                         buf_offset,
+                                         buf);
+
+    /* on read error */
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* check if this entry contains the QCowHashInfo we are looking for
+     * and copy it's associated informations if so
+     */
+    in_buf_idx = in_table_idx % qcow2_hash_store_nb_hash_info_per_cluster();
+    return qcow2_hash_store_get_hash_info(hash_info,
+                                          *buf,
+                                          in_buf_idx);
+}
+
+/* This function will probe the entries of a bucket for a given tag and try
+ * to read the missing QCowHashInfo from disk if an entry is a good candidate
+ *
+ * @store:       the store to work on
+ * @incarnation: the incarnation we are probing
+ * @bucket_idx:  the index of the bucket
+ * @tag:         the tag used for the probe
+ * @hash_info:   the QCowHashInfo we are trying to complete
+ * @ret:         1 on success, 0 if not found, -errno on error
+ */
+static int qcow2_hash_store_probe_read(BlockDriverState *bs,
+                                       QCowHashStore *store,
+                                       QCowIncarnation *incarnation,
+                                       uint64_t bucket_idx,
+                                       uint64_t tag,
+                                       QCowHashInfo *hash_info)
+{
+    uint64_t in_table_idx;
+    uint8_t *buf = NULL; /* NULL by default for qemu_vfree */
+    int ret = 0;
+    int i;
+
+    /* will scan each bucket entry
+     * note that the code handle the case where multiple entry have the same 
tag
+     */
+    for (i = 0; i < QCOW_LOG_STORE_BUCKET_SIZE; i++) {
+
+        in_table_idx = bucket_idx * QCOW_LOG_STORE_BUCKET_SIZE + i;
+
+        ret = qcow2_hash_store_probe_read_entry(bs,
+                                                store,
+                                                incarnation,
+                                                in_table_idx,
+                                                tag,
+                                                hash_info,
+                                                &buf);
+
+        if (ret) {
+             break;
+        }
+    }
+
+    /* will not free the buffer if not allocated because of NULL */
+    qemu_vfree(buf);
+    return ret;
+}
+
+/* This function check if a hash match the in ram filter and read it from disk
+ *
+ * @store:       the store to work on
+ * @incarnation: the incarnation containing the in ram filter
+ * @hash_info:   the QCowHashInfo to complete
+ * @ret:         1 on success, 0 if not found, -errno on error
+ */
+static int qcow2_hash_store_try_get(BlockDriverState *bs,
+                                    QCowHashStore *store,
+                                    QCowIncarnation *incarnation,
+                                    QCowHashInfo *hash_info)
+{
+    uint64_t h[2];
+    int i;
+    int ret = 0;
+
+    /* incarnation filter probably not loaded from disk yet -> not found */
+    if (!incarnation->filter) {
+        return 0;
+    }
+
+    h[0] = qcow2_dedup_h1(hash_info, store->order);
+    h[1] = qcow2_dedup_h2(hash_info, store->order);
+
+    for (i = 0; i < 2; i++) {
+        /* first check the in ram filter */
+        ret = qcow2_hash_store_probe_read(bs,
+                                          store,
+                                          incarnation,
+                                          h[i],        /* index */
+                                          h[1-i],      /* tag */
+                                          hash_info);
+
+        /* not found in filter */
+        if (ret) {
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
+/* This function look for a given hash info in the hash store
+ *
+ * @store:     the QCowHashStore to lookup into
+ * @hash_info: the QCowHashInfo to complete : at last the hash must be filled.
+ * @ret:       > 1 on successfull lookup, 0 if not found, -errno on error
+ */
+int qcow2_hash_store_get(BlockDriverState *bs, QCowHashStore *store,
+                         QCowHashInfo *hash_info)
+{
+    QCowIncarnation *incarnation;
+    int ret = 0;
+
+    /* we walk the list of incarnation from the youngest to the oldest */
+    QTAILQ_FOREACH(incarnation, &store->incarnations, next) {
+        /* check if this incarnation contains the hash, try to get it if so */
+        ret = qcow2_hash_store_try_get(bs,
+                                       store,
+                                       incarnation,
+                                       hash_info);
+
+        /* QCowHashInfo found or an error happened */
+        if (ret) {
+            return ret;
+        }
+    }
+
+    /* return not found */
+    return 0;
+}
+
+/* This function loads a single incarnation in ram filter from disk
+ *
+ * @incarnation: the incarnation to load the filer into
+ * @order:       the bit order
+ * @ret:         0 on success, -errno on error
+ */
+static int qcow2_hash_store_load_filter(BlockDriverState *bs,
+                                        QCowIncarnation *incarnation,
+                                        uint32_t order)
+{
+    int ret = 0;
+    uint64_t size = qcow2_hash_store_filter_size(order);
+    uint8_t *buf = qemu_blockalign(bs, size);
+
+    ret = bdrv_pread(bs->file, incarnation->filter_offset, buf, size);
+
+    if (ret < 0) {
+        qemu_vfree(buf);
+        return ret;
+    }
+
+    /* activate filter */
+    incarnation->filter = buf;
+    /* other fields where filled when allocating the incarnation */
+
+    return 0;
+}
+
+/* This structure is used only between the two next functions */
+typedef struct {
+    BlockDriverState *bs;
+    QCowHashStore *store;
+} QCowLoadFilterArgs;
+
+/* This coroutine will load all incarnations filters at startup
+ *
+ * @opaque: a QCowLoadFilterArgs containing a bs and the store
+ */
+static void coroutine_fn qcow2_hash_store_co_load_filters(void *opaque)
+{
+    QCowLoadFilterArgs *args = (QCowLoadFilterArgs *) opaque;
+    QCowIncarnation *incarnation;
+    int ret = 0;
+
+    /* yield so qcow2 will continue to start */
+    qemu_coroutine_yield();
+
+    QTAILQ_FOREACH(incarnation, &args->store->incarnations, next) {
+        ret = qcow2_hash_store_load_filter(args->bs,
+                                           incarnation,
+                                           args->store->order);
+
+        if (ret < 0) {
+            free(args);
+            return;
+        }
+    }
+
+    free(args);
+}
+
+/* This function starts the coroutine responsible for loading the RAM filters
+ *
+ * @store: the QCowHashStore to work on
+ */
+void qcow2_hash_store_load_filters(BlockDriverState *bs,
+                                   QCowHashStore *store)
+{
+    BDRVQcowState *s = bs->opaque;
+
+    /* will pass this as argument to the coroutine */
+    QCowLoadFilterArgs *args = g_new0(QCowLoadFilterArgs, 1);
+    args->bs = bs;
+    args->store = store;
+
+    s->load_filter_co =
+        qemu_coroutine_create(qcow2_hash_store_co_load_filters);
+    qemu_coroutine_enter(s->load_filter_co, args);
+}
+
+/* compute buffer space required to dump the hash store configuration
+ *
+ * @store: the hash store to work on
+ * @ret:   the space the hash store dump will take
+ */
+size_t qcow2_hash_store_dump_size(QCowHashStore *store)
+{
+    return sizeof(store->order) +
+           sizeof(store->nb_incarnations) +
+           sizeof(store->nb_in_limbo) +
+           store->nb_incarnations * sizeof(uint64_t) +
+           store->nb_in_limbo * sizeof(uint64_t);
+}
+
+/* dump a hash store in given buffer and return the buffer space size
+ *
+ * @buf:   the buffer to dump into
+ * @store: the store to dump
+ * @ret:   the used space size
+ */
+size_t qcow2_hash_store_dump(uint8_t *buf, QCowHashStore *store)
+{
+    QCowIncarnation *incarnation;
+    QCowLimbo *limbo;
+    uint32_t *buf32 = (uint32_t *) buf;
+    uint64_t *buf64;
+
+    buf32[0] = cpu_to_be32(store->order);
+    buf32[1] = cpu_to_be32(store->nb_incarnations);
+    buf32[2] = cpu_to_be32(store->nb_in_limbo);
+
+    /* we will write next data here */
+    buf64 = (uint64_t *) (buf + 3 * sizeof(uint32_t));
+
+    /* dump each incarnation offset */
+    QTAILQ_FOREACH(incarnation, &store->incarnations, next) {
+        *buf64 = cpu_to_be64(incarnation->filter_offset);
+        buf64++;
+    }
+
+    /* dump each limbo offset */
+    QTAILQ_FOREACH(limbo, &store->in_limbo, next) {
+        *buf64 = cpu_to_be64(limbo->offset);
+        buf64++;
+    }
+
+    /* return size */
+    return qcow2_hash_store_dump_size(store);
+}
+
+/* parse the given buffer into a QCowHashStore
+ *
+ * @store:   the hash store to parse into
+ * @buf:     the buffer to parse from
+ * @buf_end: the end of the buffer to parse from
+ * @ret:     the parsed size or -1 on error
+ */
+size_t qcow2_hash_store_parse(QCowHashStore *store,
+                              uint8_t *buf,
+                              uint8_t *buf_end)
+{
+    QCowIncarnation *incarnation;
+    QCowLimbo *limbo;
+    uint32_t *buf32 = (uint32_t *) buf;
+    uint64_t *buf64, *buf64_end;
+    uint32_t i;
+
+    buf64_end = (uint64_t *) buf_end;
+
+    /* reset hash store */
+    qcow2_hash_store_reset(store,
+                           be32_to_cpu(buf32[0]),  /* order */
+                           be32_to_cpu(buf32[1]),  /* nb_incarnations */
+                           be32_to_cpu(buf32[2])); /* nb_in_limbo */
+
+    /* we will read from here */
+    buf64 = (uint64_t *) (buf + 3 * sizeof(uint32_t));
+
+    /* instanciate each incarnation */
+    for (i = 0;
+         i < store->nb_incarnations && buf64 <= (buf64_end - 1);
+         i++) {
+        /* the filter will be loaded later in a coroutine and NULL filter
+         * won't harm
+         */
+        incarnation = qcow2_hash_store_incarnation_new(NULL,/* filter */
+                          be64_to_cpu(*buf64),            /* offset */
+                          store->order);
+        QTAILQ_INSERT_HEAD(&store->incarnations, incarnation, next);
+        buf64++;
+    }
+
+    /* buffer too small */
+    if (i < store->nb_incarnations) {
+        return -1;
+    }
+
+    /* instanciate each limbo disk space */
+    for (i = 0;
+         i < store->nb_in_limbo && buf64 <= (buf64_end - 1);
+         i++) {
+        limbo = qcow2_hash_store_limbo_new(be64_to_cpu(*buf64));
+        QTAILQ_INSERT_HEAD(&store->in_limbo, limbo, next);
+        buf64++;
+    }
+
+    if (i < store->nb_in_limbo) {
+        return -1;
+    }
+
+    return buf_end - buf;
+}
diff --git a/block/qcow2-log-store.c b/block/qcow2-log-store.c
index 24ae310..b0ebc28 100644
--- a/block/qcow2-log-store.c
+++ b/block/qcow2-log-store.c
@@ -283,6 +283,14 @@ static uint64_t 
qcow2_log_store_tag_by_bucket_idx(QCowLogStore *store,
     return 0;
 }
 
+static uint64_t qcow2_log_store_tag_by_table_idx(QCowLogStore *store,
+                                                 QCowHashInfo *hash_info,
+                                                 uint64_t in_table_idx)
+{
+    return qcow2_log_store_tag_by_bucket_idx(store, hash_info, in_table_idx /
+                                             QCOW_LOG_STORE_BUCKET_SIZE);
+}
+
 /* This function calculate the order (number of bit) required
  *
  * @min_nb_entries_required: minimal number of entry the log store must handle
@@ -997,6 +1005,240 @@ int 
qcow2_log_store_rebuild_from_journal(BlockDriverState *bs,
     return qcow2_journal_resume(bs, &store->journal, offset);
 }
 
+/* This function write the signifiant portion of a tag into a in ram filter
+ *
+ * This function is here to cope with the fact that the filter elements size
+ * will vary to keep the false lookup probability low.
+ * The bulk of the code will manipulate uint64_t variable and utility functions
+ * like this one will be used to write into and read from the in ram filter.
+ *
+ * The function takes care of endianess issues as the filter will be stored on
+ * disk.
+ *
+ * @store:  the QCowLogStore to work with
+ * @index:  the index of the element to write
+ * @tag:    the tag that will act as a filter element
+ * @filter: the in ram filter to write to
+ */
+static void qcow2_log_store_write_tag_to_filter(QCowLogStore *store,
+                                                uint64_t index,
+                                                uint64_t tag,
+                                                uint8_t *filter)
+{
+    uint16_t *filter_16 = (uint16_t *) filter;
+    uint32_t *filter_32 = (uint32_t *) filter;
+    uint64_t *filter_64 = (uint64_t *) filter;
+
+    switch (qcow2_hash_store_get_tag_size(store->order)) {
+    case 2:
+        filter_16[index] = cpu_to_be16((uint16_t) tag);
+        break;
+    case 4:
+        filter_32[index] = cpu_to_be32((uint32_t) tag);
+        break;
+    case 8:
+        filter_64[index] = cpu_to_be64((uint64_t) tag);
+        break;
+    default:
+        /* should never pass here given qcow2_hash_store_get_tag_size results 
*/
+        abort();
+    }
+}
+
+/* This function create a ram filter from a QCowLogStore and write it to disk
+ *
+ * @store:  the QCowLogStore to create the filter from
+ * @offset: the on disk offset to write the filter to
+ * @filter: the filter pointer to allocate, write and return the data into
+ * @ret:    0 on success, -errno on error
+ */
+int qcow2_log_store_freeze_filter(BlockDriverState *bs,
+                                  QCowLogStore *store,
+                                  uint64_t offset,
+                                  uint8_t **filter)
+{
+    QCowHashInfo *entry;
+    uint64_t i, size, tag;
+    int ret = 0;
+
+    /* allocate the filter with an alignement suitable for disk writes */
+    size = qcow2_hash_store_filter_size(store->order);
+    *filter = qemu_blockalign(bs, size);
+    /* zero allocated memory */
+    memset(*filter, 0, size);
+
+    /* build the filter */
+    for (i = 0; i < qcow2_log_store_nb_entries(store->order); i++) {
+        /* only store used entries */
+        if (!qcow2_log_store_is_used_by_table_idx(store->hash_table, i)) {
+            continue;
+        }
+
+        entry = &store->hash_table[i];
+        tag = qcow2_log_store_tag_by_table_idx(store, entry, i); 
+        qcow2_log_store_write_tag_to_filter(store,
+                                            i,
+                                            tag,
+                                            *filter);
+    }
+
+    /* we should not to write on disk if vm is suspended (migration) */
+    co_sleep_ns(vm_clock, 1);
+
+    /* dump the filter to disk */
+    ret = bdrv_pwrite(bs->file, offset, *filter, size);
+
+    if (ret < 0) {
+        goto error_exit;
+    }
+
+    return 0;
+
+error_exit:
+    qemu_vfree(*filter);
+    *filter = NULL;
+    return ret;
+}
+
+/* Write the buffer used to freeze the log store to disk and reset buffer state
+ *
+ * @store: the QCowHashStore to work on
+ * @ret:   0 on succes, -errno on error
+ */
+static int qcow2_log_store_write_freeze_buffer(BlockDriverState *bs,
+                                               QCowLogStore *store)
+{
+    int ret = 0;
+
+    /* we should not to write on disk if vm is suspended (migration) */
+    co_sleep_ns(vm_clock, 1);
+
+    /* write to disk */
+    ret = bdrv_pwrite(bs->file,
+                      store->write_buf_offset,
+                      store->write_buf,
+                      HASH_STORE_CLUSTER_SIZE);
+
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* prepare for next batch of QCowHashInfo */
+    memset(store->write_buf, 0, HASH_STORE_CLUSTER_SIZE);
+    store->write_buf_offset += HASH_STORE_CLUSTER_SIZE;
+    store->in_buf_offset = 0;
+
+    /* success */
+    return 0;
+}
+
+static bool qcow_log_store_freeze_buffer_full(QCowLogStore *store)
+{
+    /* we make sure that there is not bucket splitted between two clusters */
+    uint64_t count = store->in_buf_offset / sizeof(QCowHashInfo);
+    return count == qcow2_hash_store_nb_hash_info_per_cluster();
+}
+
+static bool qcow2_log_store_is_entry_at_right_place(QCowLogStore *store,
+                                                    QCowHashInfo *entry,
+                                                    uint64_t in_table_idx)
+{
+    uint64_t h[2];
+    uint64_t bucket_idx = in_table_idx / QCOW_LOG_STORE_BUCKET_SIZE;
+
+    h[0] = qcow2_dedup_h1(entry, store->order);
+    h[1] = qcow2_dedup_h2(entry, store->order);
+
+    /* check that the bucket index match one of the two sub hashes */
+    return (bucket_idx == h[0]) || (bucket_idx == h[1]);
+}
+
+/* Append a given log store entry to a frozen hash table suitable for hash 
store
+ *
+ * @store: the QCowLogStore to work on
+ * @index: the index of the entry in the QCowLogStore hash table
+ * @ret:   0 on succes, -errno on error
+ */
+static int qcow2_log_store_add_entry_to_frozen_hash_table(BlockDriverState *bs,
+                                                          QCowLogStore *store,
+                                                          uint64_t 
in_table_idx)
+{
+    QCowHashInfo hash_entry;
+    int ret = 0;
+    bool used = qcow2_log_store_is_used_by_table_idx(store->hash_table,
+                                                     in_table_idx);
+
+    /* get hash info from table if entry is used */
+    if (used) {
+        qcow2_log_store_copy_from_hash_table(&hash_entry,
+                                             store->hash_table,
+                                             in_table_idx);
+        hash_entry.physical_sect = cpu_to_be64(hash_entry.physical_sect);
+        hash_entry.first_logical_sect =
+            cpu_to_be64(hash_entry.first_logical_sect);
+    }
+
+    /* write buffer to disk if needed */
+    if (qcow_log_store_freeze_buffer_full(store)) {
+        ret = qcow2_log_store_write_freeze_buffer(bs, store);
+    }
+
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* validate position of the entry in the hash table */
+    if (used) {
+         assert(qcow2_log_store_is_entry_at_right_place(store,
+                                                        &hash_entry,
+                                                        in_table_idx));
+    }
+
+    /* the entry is used copy the journal entry QCowHashInfo */
+    if (used) {
+        memcpy(store->write_buf + store->in_buf_offset,
+               &hash_entry,
+               sizeof(QCowHashInfo));
+    } else {
+        memset(store->write_buf + store->in_buf_offset, 0,
+               sizeof(QCowHashInfo));
+    }
+
+    store->in_buf_offset += sizeof(QCowHashInfo);
+    return 0;
+}
+
+/* This function freeze the log store into an incarnation hash table
+ *
+ * @store:  the store to work on
+ * @offset: the on disk start offset of the hash table
+ * @ret:    0 on succes, -errno on error
+ */
+int qcow2_log_store_freeze_hash_table(BlockDriverState *bs,
+                                      QCowLogStore *store,
+                                      uint64_t offset)
+{
+    uint64_t i;
+    int ret = 0;
+
+    /* initialize data required to do the freeze */
+    memset(store->write_buf, 0, HASH_STORE_CLUSTER_SIZE);
+    store->write_buf_offset = offset;
+    store->in_buf_offset = 0;
+
+    /* build the hash table */
+    for (i = 0; i < qcow2_log_store_nb_entries(store->order); i++) {
+        ret = qcow2_log_store_add_entry_to_frozen_hash_table(bs, store, i);
+
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    /* write the last buffer to disk */
+    return qcow2_log_store_write_freeze_buffer(bs, store);
+}
+
 /* the on disk size of a log store dump */
 size_t qcow2_log_store_dump_size(void)
 {
diff --git a/block/qcow2.h b/block/qcow2.h
index d347471..d0037bf 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -79,6 +79,7 @@
                                         * on the log store size
                                         */
 #define QCOW2_NB_INCARNATION_GOAL  128 /* targeted number of incarnation */
+#define QCOW2_STORE_CO_SLEEP_NS    (100 * 1000 * 1000) /* 0.1 s in ns */
 
 #define QCOW_DEDUP_DIRTY 1 /* dirty flag in the qcow2 header extension */
 
@@ -657,8 +658,45 @@ int qcow2_log_store_flush(BlockDriverState *bs, 
QCowLogStore *store);
 int qcow2_log_store_stop(BlockDriverState *bs, QCowLogStore *store);
 int qcow2_log_store_rebuild_from_journal(BlockDriverState *bs,
                                          QCowLogStore *store);
+int qcow2_log_store_freeze_filter(BlockDriverState *bs,
+                                  QCowLogStore *store,
+                                  uint64_t offset,
+                                  uint8_t **filter);
+int qcow2_log_store_freeze_hash_table(BlockDriverState *bs,
+                                      QCowLogStore *store,
+                                      uint64_t offset);
 size_t qcow2_log_store_dump_size(void);
 size_t qcow2_log_store_dump(uint8_t *buf, QCowLogStore *store);
 size_t qcow2_log_store_parse(QCowLogStore *store, uint8_t *buf);
 
+/* qcow2-hash-store.c functions */
+
+int qcow2_hash_store_get_tag_size(uint32_t order);
+uint64_t qcow2_hash_store_nb_hash_info_per_cluster(void);
+uint64_t qcow2_hash_store_incarnation_size(uint32_t order);
+void qcow2_hash_store_reset(QCowHashStore *store,
+                            uint32_t order,
+                            uint32_t nb_incarnations,
+                            uint32_t nb_in_limbo);
+void qcow2_hash_store_init(QCowHashStore *store, uint32_t order);
+void qcow2_hash_store_cleanup(QCowHashStore *store);
+uint64_t qcow2_hash_store_filter_size(uint32_t order);
+void qcow2_hash_store_incarnation_free(QCowIncarnation *incarnation);
+int64_t qcow2_hash_store_get_incarnation_offset(BlockDriverState *bs,
+                                                QCowHashStore *store);
+void qcow2_hash_store_forget_all_incarnations(QCowHashStore *store);
+void qcow2_hash_store_add_incarnation(BlockDriverState *bs,
+                                      QCowHashStore *hash_store,
+                                      uint8_t *filter,
+                                      uint64_t offset);
+int qcow2_hash_store_get(BlockDriverState *bs, QCowHashStore *store,
+                         QCowHashInfo *hash_info);
+void qcow2_hash_store_load_filters(BlockDriverState *bs,
+                                   QCowHashStore *store);
+size_t qcow2_hash_store_dump_size(QCowHashStore *store);
+size_t qcow2_hash_store_dump(uint8_t *buf, QCowHashStore *store);
+size_t qcow2_hash_store_parse(QCowHashStore *store,
+                                uint8_t *buf,
+                                uint8_t *buf_end);
+
 #endif
-- 
1.7.10.4


Reply via email to