When meeting request to write the cluster without copied flag,
allocate a new cluster and write original data with modification
to the new cluster. This also can add support for the writing
operation of the qcow2 compressed image.

Signed-off-by: Lan Tianyu <tianyu....@intel.com>
---
 tools/kvm/disk/qcow.c        |  322 ++++++++++++++++++++++++++++--------------
 tools/kvm/include/kvm/qcow.h |    2 +
 2 files changed, 218 insertions(+), 106 deletions(-)

diff --git a/tools/kvm/disk/qcow.c b/tools/kvm/disk/qcow.c
index 680b37d..2b9af73 100644
--- a/tools/kvm/disk/qcow.c
+++ b/tools/kvm/disk/qcow.c
@@ -122,9 +122,6 @@ static int cache_table(struct qcow *q, struct qcow_l2_table 
*c)
                 */
                lru = list_first_entry(&l1t->lru_list, struct qcow_l2_table, 
list);
 
-               if (qcow_l2_cache_write(q, lru) < 0)
-                       goto error;
-
                /* Remove the node from the cache */
                rb_erase(&lru->node, r);
                list_del_init(&lru->list);
@@ -728,35 +725,110 @@ error_free_rfb:
        return NULL;
 }
 
-/*
- * QCOW file might grow during a write operation. Not only data but metadata is
- * also written at the end of the file. Therefore it is necessary to ensure
- * every write is committed to disk. Hence we use uses qcow_pwrite_sync() to
- * synchronize the in-core state of QCOW image to disk.
- *
- * We also try to restore the image to a consistent state if the metdata
- * operation fails. The two metadat operations are: level 1 and level 2 table
- * update. If either of them fails the image is truncated to a consistent 
state.
+static u16 qcow_get_refcount(struct qcow *q, u64 clust_idx)
+{
+       struct qcow_refcount_block *rfb = NULL;
+       struct qcow_header *header = q->header;
+       u64 rfb_idx;
+
+       rfb = qcow_read_refcount_block(q, clust_idx);
+       if (!rfb) {
+               pr_warning("error while reading refcount table");
+               return -1;
+       }
+
+       rfb_idx = clust_idx & (((1ULL <<
+               (header->cluster_bits - QCOW_REFCOUNT_BLOCK_SHIFT)) - 1));
+
+       if (rfb_idx >= rfb->size) {
+               pr_warning("L1: refcount block index out of bounds");
+               return -1;
+       }
+
+       return be16_to_cpu(rfb->entries[rfb_idx]);
+}
+
+static int update_cluster_refcount(struct qcow *q, u64 clust_idx, u16 append)
+{
+       struct qcow_refcount_block *rfb = NULL;
+       struct qcow_header *header = q->header;
+       u16 refcount;
+       u64 rfb_idx;
+
+       rfb = qcow_read_refcount_block(q, clust_idx);
+       if (!rfb) {
+               pr_warning("error while reading refcount table");
+               return -1;
+       }
+
+       rfb_idx = clust_idx & (((1ULL <<
+               (header->cluster_bits - QCOW_REFCOUNT_BLOCK_SHIFT)) - 1));
+       if (rfb_idx >= rfb->size) {
+               pr_warning("refcount block index out of bounds");
+               return -1;
+       }
+
+       refcount = be16_to_cpu(rfb->entries[rfb_idx]) + append;
+       rfb->entries[rfb_idx] = cpu_to_be16(refcount);
+       rfb->dirty = 1;
+
+       /*write refcount block*/
+       write_refcount_block(q, rfb);
+
+       /*update free_clust_idx since refcount becomes zero*/
+       if (!refcount && clust_idx < q->free_clust_idx)
+               q->free_clust_idx = clust_idx;
+
+       return 0;
+}
+
+/*Allocate clusters according to the size. Find a postion that
+ *can satisfy the size. free_clust_idx is initialized to zero and
+ *Record last position.
+*/
+static u64 qcow_alloc_clusters(struct qcow *q, u64 size)
+{
+       struct qcow_header *header = q->header;
+       u16 clust_refcount;
+       u32 clust_idx, i;
+       u64 clust_num;
+
+       clust_num = (size + (q->cluster_size - 1)) >> header->cluster_bits;
+
+again:
+       for (i = 0; i < clust_num; i++) {
+               clust_idx = q->free_clust_idx++;
+               clust_refcount = qcow_get_refcount(q, clust_idx);
+               if (clust_refcount < 0)
+                       return -1;
+               else if (clust_refcount > 0)
+                       goto again;
+       }
+
+       for (i = 0; i < clust_num; i++)
+               update_cluster_refcount(q,
+                       q->free_clust_idx - clust_num + i, 1);
+
+       return (q->free_clust_idx - clust_num) << header->cluster_bits;
+}
+
+/*Get l2 table. If the table has been copied, read table directly.
+ *If the table exists, allocate a new cluster and copy the table
+ *to the new cluster.
  */
-static ssize_t qcow_write_cluster(struct qcow *q, u64 offset, void *buf, u32 
src_len)
+static int get_cluster_table(struct qcow *q, u64 offset,
+       struct qcow_l2_table **result_l2t, u64 *result_l2_index)
 {
        struct qcow_header *header = q->header;
        struct qcow_l1_table *l1t = &q->table;
        struct qcow_l2_table *l2t;
-       u64 clust_start;
-       u64 clust_flags;
-       u64 l2t_offset;
-       u64 clust_off;
-       u64 l2t_size;
-       u64 clust_sz;
        u64 l1t_idx;
+       u64 l2t_offset;
        u64 l2t_idx;
-       u64 f_sz;
-       u64 len;
+       u64 l2t_size;
+       u64 l2t_new_offset;
 
-       l2t             = NULL;
-       l2t_size        = 1 << header->l2_bits;
-       clust_sz        = 1 << header->cluster_bits;
+       l2t_size = 1 << header->l2_bits;
 
        l1t_idx = get_l1_index(q, offset);
        if (l1t_idx >= l1t->table_size)
@@ -766,122 +838,149 @@ static ssize_t qcow_write_cluster(struct qcow *q, u64 
offset, void *buf, u32 src
        if (l2t_idx >= l2t_size)
                return -1;
 
-       clust_off = get_cluster_offset(q, offset);
-       if (clust_off >= clust_sz)
-               return -1;
-
-       len = clust_sz - clust_off;
-       if (len > src_len)
-               len = src_len;
-
-       mutex_lock(&q->mutex);
-
        l2t_offset = be64_to_cpu(l1t->l1_table[l1t_idx]);
-       if (l2t_offset & QCOW2_OFLAG_COMPRESSED) {
-               pr_warning("compressed clusters are not supported");
-               goto error;
-       }
-       if (!(l2t_offset & QCOW2_OFLAG_COPIED)) {
-               pr_warning("L2 copy-on-write clusters are not supported");
-               goto error;
-       }
-
-       l2t_offset &= QCOW2_OFFSET_MASK;
-       if (l2t_offset) {
-               /* read and cache l2 table */
+       if (l2t_offset & QCOW2_OFLAG_COPIED) {
+               l2t_offset &= ~QCOW2_OFLAG_COPIED;
                l2t = qcow_read_l2_table(q, l2t_offset);
                if (!l2t)
                        goto error;
        } else {
-               l2t = new_cache_table(q, l2t_offset);
+               l2t_new_offset = qcow_alloc_clusters(q, l2t_size*sizeof(u64));
+               if (l2t_new_offset < 0)
+                       goto error;
+
+               l2t = new_cache_table(q, l2t_new_offset);
                if (!l2t)
                        goto error;
 
-               /* Capture the state of the consistent QCOW image */
-               f_sz = file_size(q->fd);
-               if (!f_sz)
-                       goto free_cache;
+               if (l2t_offset)
+                       qcow2_read_cluster(q, l2t_offset, l2t->table,
+                               l2t_size*sizeof(u64));
+               else
+                       memset(l2t->table, 0x00, l2t_size * sizeof(u64));
 
-               /* Write the l2 table of 0's at the end of the file */
-               l2t_offset = qcow_write_l2_table(q, l2t->table);
-               if (!l2t_offset)
+               /*write l2 table*/
+               l2t->dirty = 1;
+               if (qcow_l2_cache_write(q, l2t) < 0)
                        goto free_cache;
 
-               if (cache_table(q, l2t) < 0) {
-                       if (ftruncate(q->fd, f_sz) < 0)
-                               goto free_cache;
+               /*cache l2 table*/
+               cache_table(q, l2t);
 
-                       goto free_cache;
-               }
+               /* Update the l1 talble */
+               l1t->l1_table[l1t_idx] = cpu_to_be64(l2t_new_offset
+                       | QCOW2_OFLAG_COPIED);
 
-               /* Update the in-core entry */
-               l1t->l1_table[l1t_idx] = cpu_to_be64(l2t_offset);
+               if (pwrite_in_full(q->fd, l1t->l1_table,
+                       l1t->table_size * sizeof(u64),
+                       header->l1_table_offset) < 0)
+                       goto error;
        }
 
-       /* Capture the state of the consistent QCOW image */
-       f_sz            = file_size(q->fd);
-       if (!f_sz)
-               goto error;
+       *result_l2t = l2t;
+       *result_l2_index = l2t_idx;
 
-       clust_start = be64_to_cpu(l2t->table[l2t_idx]);
+       return 0;
 
-       clust_flags = clust_start & QCOW2_OFLAGS_MASK;
-       if (clust_flags & QCOW2_OFLAG_COMPRESSED) {
-               pr_warning("compressed clusters are not supported");
+free_cache:
+       free(l2t);
+error:
+       return -1;
+}
+
+/*If the cluster has been copied, write data directly. If not,
+ *read the original data and write it to the new cluster with
+ *modification.
+*/
+static ssize_t qcow_write_cluster(struct qcow *q, u64 offset,
+               void *buf, u32 src_len)
+{
+       struct qcow_header *header = q->header;
+       struct qcow_l2_table *l2t;
+       u64 clust_start;
+       u64 clust_flags;
+       u64 clust_off;
+       u64 l2t_idx;
+       u64 len;
+
+       l2t = NULL;
+
+       clust_off = get_cluster_offset(q, offset);
+       if (clust_off >= q->cluster_size)
+               return -1;
+
+       len = q->cluster_size - clust_off;
+       if (len > src_len)
+               len = src_len;
+
+       mutex_lock(&q->mutex);
+
+       if (get_cluster_table(q, offset, &l2t, &l2t_idx)) {
+               pr_warning("Get l2 table error");
                goto error;
        }
 
-       clust_start &= QCOW2_OFFSET_MASK;
-       if (!clust_start) {
-               clust_start             = ALIGN(f_sz, clust_sz);
-               l2t->table[l2t_idx]     = cpu_to_be64(clust_start | 
QCOW2_OFLAG_COPIED);
-               l2t->dirty              = 1;
-       }
+       clust_start = be64_to_cpu(l2t->table[l2t_idx]);
+       clust_flags = clust_start & QCOW2_OFLAGS_MASK;
 
+       clust_start &= QCOW2_OFFSET_MASK;
        if (!(clust_flags & QCOW2_OFLAG_COPIED)) {
-               struct qcow_refcount_block *rfb = NULL;
-               u16 clust_refcount;
-               u64 clust_idx;
-               u64 rfb_idx;
+               u64 clust_new_idx;
+               u64 clust_new_start;
 
-               clust_idx = (clust_start & QCOW2_OFFSET_MASK)
-                       >> (header->cluster_bits);
-
-               rfb = qcow_read_refcount_block(q, clust_idx);
-               if (!rfb) {
-                       pr_warning("L1: error while reading refcount table");
+               clust_new_start = qcow_alloc_clusters(q, q->cluster_size);
+               if (clust_new_start < 0) {
+                       pr_warning("Cluster alloc error!");
                        goto error;
                }
 
-               rfb_idx = clust_idx & (((1ULL << (header->cluster_bits - 
QCOW_REFCOUNT_BLOCK_SHIFT)) - 1));
-               if (rfb_idx >= rfb->size) {
-                       pr_warning("L1: refcount block index out of bounds");
+               clust_new_idx = clust_new_start >> header->cluster_bits;
+               offset &= ~(q->cluster_size - 1);
+
+               /*if clust_start is not zero, read the original data*/
+               if (clust_start) {
+                       mutex_unlock(&q->mutex);
+                       qcow2_read_cluster(q, offset, q->copy_buff,
+                               q->cluster_size);
+                       mutex_lock(&q->mutex);
+               } else
+                       memset(q->copy_buff, 0x00, q->cluster_size);
+
+               memcpy(q->copy_buff + clust_off, buf, len);
+
+                /* Write actual data */
+               if (pwrite_in_full(q->fd, q->copy_buff, q->cluster_size,
+                       clust_new_start) < 0)
                        goto error;
-               }
 
-               clust_refcount = be16_to_cpu(rfb->entries[rfb_idx]);
-               if (!clust_refcount) {
-                       clust_refcount = 1;
-                       rfb->entries[rfb_idx] = cpu_to_be16(clust_refcount);
-                       rfb->dirty = 1;
+               /*update l2 table*/
+               l2t->table[l2t_idx] = cpu_to_be64(clust_new_start
+                       | QCOW2_OFLAG_COPIED);
+               l2t->dirty = 1;
+               qcow_l2_cache_write(q, l2t);
+
+               if (clust_flags & QCOW2_OFLAG_COMPRESSED) {
+                       clust_start &= q->cluster_offset_mask;
+                       clust_start &= ~511;
                }
 
-               if (clust_refcount > 1) {
-                       pr_warning("L1 copy-on-write clusters are not 
supported");
+               /*update reference count. Reduce the refcount of the old cluster
+                * and increase the one of the new cluster
+                */
+               if (clust_start)
+                       update_cluster_refcount(q,
+                               (clust_start >> header->cluster_bits), -1);
+
+               update_cluster_refcount(q, clust_new_idx, 1);
+       } else {
+               /* Write actual data */
+               if (pwrite_in_full(q->fd, buf, len,
+                       clust_start + clust_off) < 0)
                        goto error;
-               }
        }
-
        mutex_unlock(&q->mutex);
-
-       /* Write actual data */
-       if (pwrite_in_full(q->fd, buf, len, clust_start + clust_off) < 0)
-               return -1;
-
        return len;
 
-free_cache:
-       free(l2t);
 error:
        mutex_unlock(&q->mutex);
        return -1;
@@ -993,6 +1092,7 @@ static int qcow_disk_close(struct disk_image *disk)
 
        refcount_table_free_cache(&q->refcount_table);
        l1_table_free_cache(&q->table);
+       free(q->copy_buff);
        free(q->cluster_data);
        free(q->cluster_cache);
        free(q->refcount_table.rf_table);
@@ -1117,10 +1217,16 @@ static struct disk_image *qcow2_probe(int fd, bool 
readonly)
        q->cluster_offset_mask = (1LL << q->csize_shift) - 1;
        q->cluster_size = 1 << q->header->cluster_bits;
 
+       q->copy_buff = malloc(q->cluster_size);
+       if (!q->copy_buff) {
+               pr_warning("copy buff malloc error!");
+               goto free_header;
+       }
+
        q->cluster_data = malloc(q->cluster_size);
        if (!q->cluster_data) {
                pr_warning("cluster data malloc error!");
-               goto free_header;
+               goto free_copy_buff;
        }
 
        q->cluster_cache = malloc(q->cluster_size);
@@ -1163,6 +1269,9 @@ free_cluster_cache:
 free_cluster_data:
        if (q->cluster_data)
                free(q->cluster_data);
+free_copy_buff:
+       if (q->cluster_data)
+               free(q->cluster_data);
 free_header:
        if (q->header)
                free(q->header);
@@ -1252,6 +1361,7 @@ static struct disk_image *qcow1_probe(int fd, bool 
readonly)
        q->version = QCOW1_VERSION;
        q->cluster_size = 1 << q->header->cluster_bits;
        q->cluster_offset_mask = (1LL << (63 - q->header->cluster_bits)) - 1;
+       q->free_clust_idx = 0;
 
        q->cluster_data = malloc(q->cluster_size);
        if (!q->cluster_data) {
diff --git a/tools/kvm/include/kvm/qcow.h b/tools/kvm/include/kvm/qcow.h
index bbf7913..e032a1e 100644
--- a/tools/kvm/include/kvm/qcow.h
+++ b/tools/kvm/include/kvm/qcow.h
@@ -84,8 +84,10 @@ struct qcow {
        u32                             version;
        u64                             cluster_size;
        u64                             cluster_offset_mask;
+       u64                             free_clust_idx;
        void                            *cluster_cache;
        void                            *cluster_data;
+       void                            *copy_buff;
 };
 
 struct qcow1_header_disk {
-- 
1.7.6.rc2.8.g28eb

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to