Signed-off-by: Benoit Canet <ben...@irqsave.net> --- block/qcow2-dedup.c | 436 +++++++++++++++++++++++++++++++++++++++++++++++++++ block/qcow2.h | 5 + 2 files changed, 441 insertions(+)
diff --git a/block/qcow2-dedup.c b/block/qcow2-dedup.c index 4e99eb1..5901749 100644 --- a/block/qcow2-dedup.c +++ b/block/qcow2-dedup.c @@ -117,3 +117,439 @@ fail: *data = NULL; return ret; } + +/* + * Build a QCowHashNode structure + * + * @hash: the given hash + * @physical_sect: the cluster offset in the QCOW2 file + * @first_logical_sect: the first logical cluster offset written + * @ret: the build QCowHashNode + */ +static QCowHashNode *qcow2_dedup_build_qcow_hash_node(QCowHash *hash, + uint64_t physical_sect, + uint64_t first_logical_sect) +{ + QCowHashNode *hash_node; + + hash_node = g_new0(QCowHashNode, 1); + memcpy(hash_node->hash.data, hash->data, HASH_LENGTH); + hash_node->physical_sect = physical_sect; + hash_node->first_logical_sect = first_logical_sect; + + return hash_node; +} + +/* + * Compute the hash of a given cluster + * + * @data: a buffer containing the cluster data + * @hash: a QCowHash where to store the computed hash + * @ret: 0 on success, negative on error + */ +static int qcow2_compute_cluster_hash(BlockDriverState *bs, + QCowHash *hash, + uint8_t *data) +{ + return 0; +} + +/* + * Get a QCowHashNode corresponding to a cluster data + * + * @phash: if phash can be used no hash is computed + * @data: a buffer containing the cluster + * @nb_clusters_processed: the number of cluster to skip in the buffer + * @err: Error code if any + * @ret: QCowHashNode of the duplicated cluster or NULL if not found + */ +static QCowHashNode *qcow2_get_hash_node_for_cluster(BlockDriverState *bs, + QcowPersistantHash *phash, + uint8_t *data, + int nb_clusters_processed, + int *err) +{ + BDRVQcowState *s = bs->opaque; + int ret = 0; + *err = 0; + + /* no hash has been provided compute it and store it for later usage */ + if (!phash->reuse) { + ret = qcow2_compute_cluster_hash(bs, + &phash->hash, + data + + nb_clusters_processed * + s->cluster_size); + } + + /* do not reuse the hash anymore if it was precomputed */ + phash->reuse = false; + + if (ret < 0) { + *err = ret; + return NULL; + } + + return g_tree_lookup(s->dedup_tree_by_hash, &phash->hash); +} + +/* + * Build a QCowHashNode from a given QCowHash and insert it into the tree + * + * @hash: the given QCowHash + */ +static void qcow2_build_and_insert_hash_node(BlockDriverState *bs, + QCowHash *hash) +{ + BDRVQcowState *s = bs->opaque; + QCowHashNode *hash_node; + + /* build the hash node with QCOW_FLAG_EMPTY as offsets so we will remember + * to fill these field later with real values. + */ + hash_node = qcow2_dedup_build_qcow_hash_node(hash, + QCOW_FLAG_EMPTY, + QCOW_FLAG_EMPTY); + g_tree_insert(s->dedup_tree_by_hash, &hash_node->hash, hash_node); +} + +/* + * Helper used to build a QCowHashElement + * + * @hash: the QCowHash to use + * @ret: a newly allocated QCowHashElement containing the given hash + */ +static QCowHashElement *qcow2_build_dedup_hash(QCowHash *hash) +{ + QCowHashElement *dedup_hash; + dedup_hash = g_new0(QCowHashElement, 1); + memcpy(dedup_hash->hash.data, hash->data, HASH_LENGTH); + return dedup_hash; +} + +/* + * Helper used to link a deduplicated cluster in the l2 + * + * @logical_sect: the cluster sector seen by the guest + * @physical_sect: the cluster sector in the QCOW2 file + * @overwrite: true if we must overwrite the L2 table entry + * @ret: + */ +static int qcow2_dedup_link_l2(BlockDriverState *bs, + uint64_t logical_sect, + uint64_t physical_sect, + bool overwrite) +{ + QCowL2Meta m = { + .alloc_offset = physical_sect << 9, + .offset = logical_sect << 9, + .nb_clusters = 1, + .nb_available = 0, + .cow_start = { + .offset = 0, + .nb_sectors = 0, + }, + .cow_end = { + .offset = 0, + .nb_sectors = 0, + }, + .oflag_copied = false, + .overwrite = overwrite, + }; + return qcow2_alloc_cluster_link_l2(bs, &m); +} + +/* Clear the QCOW_OFLAG_COPIED from the first L2 entry written for a physical + * cluster. + * + * @hash_node: the duplicated hash node + * @ret: 0 on success, negative on error + */ +static int qcow2_clear_l2_copied_flag_if_needed(BlockDriverState *bs, + QCowHashNode *hash_node) +{ + int ret = 0; + uint64_t first_logical_sect = hash_node->first_logical_sect; + + /* QCOW_OFLAG_COPIED already cleared -> do nothing */ + if (!(first_logical_sect & QCOW_FLAG_FIRST)) { + return 0; + } + + /* note : QCOW_FLAG_FIRST == QCOW_OFLAG_COPIED */ + first_logical_sect &= ~QCOW_FLAG_FIRST; + + /* overwrite first L2 entry to clear QCOW_FLAG_COPIED */ + ret = qcow2_dedup_link_l2(bs, first_logical_sect, + hash_node->physical_sect, + true); + + if (ret < 0) { + return ret; + } + + /* remember that we dont't need to clear QCOW_OFLAG_COPIED again */ + hash_node->first_logical_sect &= first_logical_sect; + + return 0; +} + +/* This function deduplicate a cluster + * + * @logical_sect: The logical sector of the write + * @hash_node: The duplicated cluster hash node + * @ret: 0 on success, negative on error + */ +static int qcow2_deduplicate_cluster(BlockDriverState *bs, + uint64_t logical_sect, + QCowHashNode *hash_node) +{ + BDRVQcowState *s = bs->opaque; + int ret = 0; + + /* create new L2 entry */ + ret = qcow2_dedup_link_l2(bs, logical_sect, + hash_node->physical_sect, + false); + + if (ret < 0) { + return ret; + } + + /* Increment the refcount of the cluster */ + return update_refcount(bs, + (hash_node->physical_sect / + s->cluster_sectors) << s->cluster_bits, + 1, 1); +} + +/* This function tries to deduplicate a given cluster. + * + * @sector_num: the logical sector number we are trying to deduplicate + * @phash: Used instead of computing the hash if provided + * @data: the buffer in which to look for a duplicated cluster + * @nb_clusters_processed: the number of cluster that must be skipped in data + * @ret: ret < 0 on error, 1 on deduplication else 0 + */ +static int qcow2_try_dedup_cluster(BlockDriverState *bs, + QcowPersistantHash *phash, + uint64_t sector_num, + uint8_t *data, + int nb_clusters_processed) +{ + BDRVQcowState *s = bs->opaque; + int ret = 0; + QCowHashNode *hash_node; + uint64_t logical_sect; + uint64_t existing_physical_offset; + int pnum = s->cluster_sectors; + + /* search the tree for duplicated cluster */ + hash_node = qcow2_get_hash_node_for_cluster(bs, + phash, + data, + nb_clusters_processed, + &ret); + + /* we won't reuse the hash on error */ + if (ret < 0) { + return ret; + } + + /* if cluster is not duplicated store hash for later usage */ + if (!hash_node) { + qcow2_build_and_insert_hash_node(bs, &phash->hash); + return 0; + } + + logical_sect = sector_num & ~(s->cluster_sectors - 1); + ret = qcow2_get_cluster_offset(bs, logical_sect << 9, + &pnum, &existing_physical_offset); + + if (ret < 0) { + return ret; + } + + /* if we are rewriting the same cluster at the same place do nothing */ + if (existing_physical_offset == hash_node->physical_sect << 9) { + return 1; + } + + /* take care of not having refcount > 1 and QCOW_OFLAG_COPIED at once */ + ret = qcow2_clear_l2_copied_flag_if_needed(bs, hash_node); + + if (ret < 0) { + return ret; + } + + /* do the deduplication */ + ret = qcow2_deduplicate_cluster(bs, logical_sect, + hash_node); + + if (ret < 0) { + return ret; + } + + return 1; +} + + +static void add_hash_to_undedupable_list(BlockDriverState *bs, + QCowDedupState *ds) +{ + /* memorise hash for later storage in gtree and disk */ + QCowHashElement *dedup_hash = qcow2_build_dedup_hash(&ds->phash.hash); + QTAILQ_INSERT_TAIL(&ds->undedupables, dedup_hash, next); +} + +static int qcow2_dedup_starting_from_begining(BlockDriverState *bs, + QCowDedupState *ds, + uint64_t sector_num, + uint8_t *data, + int left_to_process) +{ + BDRVQcowState *s = bs->opaque; + int i; + int ret = 0; + + for (i = 0; i < left_to_process; i++) { + ret = qcow2_try_dedup_cluster(bs, + &ds->phash, + sector_num + i * s->cluster_sectors, + data, + ds->nb_clusters_processed + i); + + if (ret < 0) { + return ret; + } + + /* stop if a cluster has not been deduplicated */ + if (ret != 1) { + break; + } + } + + return i; +} + +static int qcow2_count_next_non_dedupable_clusters(BlockDriverState *bs, + QCowDedupState *ds, + uint8_t *data, + int left_to_process) +{ + int i; + int ret = 0; + QCowHashNode *hash_node; + + for (i = 0; i < left_to_process; i++) { + hash_node = qcow2_get_hash_node_for_cluster(bs, + &ds->phash, + data, + ds->nb_clusters_processed + i, + &ret); + + if (ret < 0) { + return ret; + } + + /* found a duplicated cluster : stop here */ + if (hash_node) { + break; + } + + qcow2_build_and_insert_hash_node(bs, &ds->phash.hash); + add_hash_to_undedupable_list(bs, ds); + } + + return i; +} + + +/* Deduplicate all the cluster that can be deduplicated. + * + * Next it compute the number of non deduplicable sectors to come while storing + * the hashes of these sectors in a linked list for later usage. + * Then it compute the first duplicated cluster hash that come after non + * deduplicable cluster, this hash will be used at next call of the function + * + * @ds: a structure containing the state of the deduplication + * for this write request + * @sector_num: The logical sector + * @data: the buffer containing the data to deduplicate + * @data_nr: the size of the buffer in sectors + * + */ +int qcow2_dedup(BlockDriverState *bs, + QCowDedupState *ds, + uint64_t sector_num, + uint8_t *data, + int data_nr) +{ + BDRVQcowState *s = bs->opaque; + int ret = 0; + int deduped_clusters_nr = 0; + int left_to_process; + int begining_index; + + begining_index = sector_num & (s->cluster_sectors - 1); + + left_to_process = (data_nr / s->cluster_sectors) - + ds->nb_clusters_processed; + + /* start deduplicating all that can be cluster after cluster */ + ret = qcow2_dedup_starting_from_begining(bs, + ds, + sector_num, + data, + left_to_process); + + if (ret < 0) { + return ret; + } + + deduped_clusters_nr = ret; + + left_to_process -= ret; + ds->nb_clusters_processed += ret; + + /* We deduped everything till the end */ + if (!left_to_process) { + ds->nb_undedupable_sectors = 0; + goto exit; + } + + /* skip and account the first undedupable cluster found */ + left_to_process--; + ds->nb_clusters_processed++; + ds->nb_undedupable_sectors += s->cluster_sectors; + + add_hash_to_undedupable_list(bs, ds); + + /* Count how many non duplicated sector can be written and memorize hashes + * to write them after data has reached disk. + */ + ret = qcow2_count_next_non_dedupable_clusters(bs, + ds, + data, + left_to_process); + + if (ret < 0) { + return ret; + } + + left_to_process -= ret; + ds->nb_clusters_processed += ret; + ds->nb_undedupable_sectors += ret * s->cluster_sectors; + + /* remember to reuse the last hash computed at new qcow2_dedup call */ + if (left_to_process) { + ds->phash.reuse = true; + } + +exit: + if (!deduped_clusters_nr) { + return 0; + } + + return deduped_clusters_nr * s->cluster_sectors - begining_index; +} diff --git a/block/qcow2.h b/block/qcow2.h index 95bf848..46a5800 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -463,5 +463,10 @@ int qcow2_dedup_read_missing_and_concatenate(BlockDriverState *bs, int sectors_nr, uint8_t **dedup_cluster_data, int *dedup_cluster_data_nr); +int qcow2_dedup(BlockDriverState *bs, + QCowDedupState *ds, + uint64_t sector_num, + uint8_t *data, + int data_nr); #endif -- 1.7.10.4