Signed-off-by: Kevin Wolf <kw...@redhat.com> --- block/vmdk.c | 209 ++++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 135 insertions(+), 74 deletions(-)
diff --git a/block/vmdk.c b/block/vmdk.c index d9b9b89..f177ffd 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -1016,27 +1016,26 @@ static void vmdk_refresh_limits(BlockDriverState *bs, Error **errp) */ static int get_whole_cluster(BlockDriverState *bs, VmdkExtent *extent, - uint64_t cluster_sector_num, - uint64_t sector_num, - uint64_t skip_start_sector, - uint64_t skip_end_sector) + uint64_t cluster_offset, + uint64_t offset, + uint64_t skip_start_bytes, + uint64_t skip_end_bytes) { int ret = VMDK_OK; int64_t cluster_bytes; uint8_t *whole_grain; /* For COW, align request sector_num to cluster start */ - sector_num = QEMU_ALIGN_DOWN(sector_num, extent->cluster_sectors); cluster_bytes = extent->cluster_sectors << BDRV_SECTOR_BITS; + offset = QEMU_ALIGN_DOWN(offset, cluster_bytes); whole_grain = qemu_blockalign(bs, cluster_bytes); if (!bs->backing) { - memset(whole_grain, 0, skip_start_sector << BDRV_SECTOR_BITS); - memset(whole_grain + (skip_end_sector << BDRV_SECTOR_BITS), 0, - cluster_bytes - (skip_end_sector << BDRV_SECTOR_BITS)); + memset(whole_grain, 0, skip_start_bytes); + memset(whole_grain + skip_end_bytes, 0, cluster_bytes - skip_end_bytes); } - assert(skip_end_sector <= extent->cluster_sectors); + assert(skip_end_bytes <= cluster_bytes); /* we will be here if it's first write on non-exist grain(cluster). * try to read from parent image, if exist */ if (bs->backing && !vmdk_is_cid_valid(bs)) { @@ -1045,42 +1044,43 @@ static int get_whole_cluster(BlockDriverState *bs, } /* Read backing data before skip range */ - if (skip_start_sector > 0) { + if (skip_start_bytes > 0) { if (bs->backing) { - ret = bdrv_read(bs->backing->bs, sector_num, - whole_grain, skip_start_sector); + ret = bdrv_pread(bs->backing->bs, offset, whole_grain, + skip_start_bytes); if (ret < 0) { ret = VMDK_ERROR; goto exit; } } - ret = bdrv_write(extent->file->bs, cluster_sector_num, whole_grain, - skip_start_sector); + ret = bdrv_pwrite(extent->file->bs, cluster_offset, whole_grain, + skip_start_bytes); if (ret < 0) { ret = VMDK_ERROR; goto exit; } } /* Read backing data after skip range */ - if (skip_end_sector < extent->cluster_sectors) { + if (skip_end_bytes < cluster_bytes) { if (bs->backing) { - ret = bdrv_read(bs->backing->bs, sector_num + skip_end_sector, - whole_grain + (skip_end_sector << BDRV_SECTOR_BITS), - extent->cluster_sectors - skip_end_sector); + ret = bdrv_pread(bs->backing->bs, offset + skip_end_bytes, + whole_grain + skip_end_bytes, + cluster_bytes - skip_end_bytes); if (ret < 0) { ret = VMDK_ERROR; goto exit; } } - ret = bdrv_write(extent->file->bs, cluster_sector_num + skip_end_sector, - whole_grain + (skip_end_sector << BDRV_SECTOR_BITS), - extent->cluster_sectors - skip_end_sector); + ret = bdrv_pwrite(extent->file->bs, cluster_offset + skip_end_bytes, + whole_grain + skip_end_bytes, + cluster_bytes - skip_end_bytes); if (ret < 0) { ret = VMDK_ERROR; goto exit; } } + ret = VMDK_OK; exit: qemu_vfree(whole_grain); return ret; @@ -1142,8 +1142,8 @@ static int get_cluster_offset(BlockDriverState *bs, uint64_t offset, bool allocate, uint64_t *cluster_offset, - uint64_t skip_start_sector, - uint64_t skip_end_sector) + uint64_t skip_start_bytes, + uint64_t skip_end_bytes) { unsigned int l1_index, l2_offset, l2_index; int min_index, i, j; @@ -1230,10 +1230,8 @@ static int get_cluster_offset(BlockDriverState *bs, * This problem may occur because of insufficient space on host disk * or inappropriate VM shutdown. */ - ret = get_whole_cluster(bs, extent, - cluster_sector, - offset >> BDRV_SECTOR_BITS, - skip_start_sector, skip_end_sector); + ret = get_whole_cluster(bs, extent, cluster_sector * BDRV_SECTOR_SIZE, + offset, skip_start_bytes, skip_end_bytes); if (ret) { return ret; } @@ -1330,38 +1328,57 @@ static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs, } static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset, - int64_t offset_in_cluster, const uint8_t *buf, - int nb_sectors, int64_t sector_num) + int64_t offset_in_cluster, QEMUIOVector *qiov, + uint64_t qiov_offset, uint64_t n_bytes, + uint64_t offset) { int ret; VmdkGrainMarker *data = NULL; uLongf buf_len; - const uint8_t *write_buf = buf; - int write_len = nb_sectors * 512; + QEMUIOVector local_qiov; + struct iovec iov; int64_t write_offset; int64_t write_end_sector; if (extent->compressed) { + void *compressed_data; + if (!extent->has_marker) { ret = -EINVAL; goto out; } buf_len = (extent->cluster_sectors << 9) * 2; data = g_malloc(buf_len + sizeof(VmdkGrainMarker)); - if (compress(data->data, &buf_len, buf, nb_sectors << 9) != Z_OK || - buf_len == 0) { + + compressed_data = g_malloc(n_bytes); + qemu_iovec_to_buf(qiov, qiov_offset, compressed_data, n_bytes); + ret = compress(data->data, &buf_len, compressed_data, n_bytes); + free(compressed_data); + + if (ret != Z_OK || buf_len == 0) { ret = -EINVAL; goto out; } - data->lba = sector_num; + + data->lba = offset >> BDRV_SECTOR_BITS; data->size = buf_len; - write_buf = (uint8_t *)data; - write_len = buf_len + sizeof(VmdkGrainMarker); + + n_bytes = buf_len + sizeof(VmdkGrainMarker); + iov = (struct iovec) { + .iov_base = data, + .iov_len = n_bytes, + }; + qemu_iovec_init_external(&local_qiov, &iov, 1); + } else { + qemu_iovec_init(&local_qiov, qiov->niov); + qemu_iovec_concat(&local_qiov, qiov, qiov_offset, n_bytes); } + write_offset = cluster_offset + offset_in_cluster, - ret = bdrv_pwrite(extent->file->bs, write_offset, write_buf, write_len); + ret = bdrv_co_pwritev(extent->file->bs, write_offset, n_bytes, + &local_qiov, 0); - write_end_sector = DIV_ROUND_UP(write_offset + write_len, BDRV_SECTOR_SIZE); + write_end_sector = DIV_ROUND_UP(write_offset + n_bytes, BDRV_SECTOR_SIZE); if (extent->compressed) { extent->next_cluster_sector = write_end_sector; @@ -1370,13 +1387,15 @@ static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset, write_end_sector); } - if (ret != write_len) { - ret = ret < 0 ? ret : -EIO; + if (ret < 0) { goto out; } ret = 0; out: g_free(data); + if (!extent->compressed) { + qemu_iovec_destroy(&local_qiov); + } return ret; } @@ -1521,38 +1540,38 @@ vmdk_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, * * Returns: error code with 0 for success. */ -static int vmdk_write(BlockDriverState *bs, int64_t sector_num, - const uint8_t *buf, int nb_sectors, - bool zeroed, bool zero_dry_run) +static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov, + bool zeroed, bool zero_dry_run) { BDRVVmdkState *s = bs->opaque; VmdkExtent *extent = NULL; int ret; - int64_t index_in_cluster, n; + int64_t offset_in_cluster, n_bytes; uint64_t cluster_offset; + uint64_t bytes_done = 0; VmdkMetaData m_data; - if (sector_num > bs->total_sectors) { - error_report("Wrong offset: sector_num=0x%" PRIx64 + if (DIV_ROUND_UP(offset, BDRV_SECTOR_SIZE) > bs->total_sectors) { + error_report("Wrong offset: offset=0x%" PRIx64 " total_sectors=0x%" PRIx64, - sector_num, bs->total_sectors); + offset, bs->total_sectors); return -EIO; } - while (nb_sectors > 0) { - extent = find_extent(s, sector_num, extent); + while (bytes > 0) { + extent = find_extent(s, offset >> BDRV_SECTOR_BITS, extent); if (!extent) { return -EIO; } - index_in_cluster = vmdk_find_index_in_cluster(extent, sector_num); - n = extent->cluster_sectors - index_in_cluster; - if (n > nb_sectors) { - n = nb_sectors; - } - ret = get_cluster_offset(bs, extent, &m_data, sector_num << 9, + offset_in_cluster = vmdk_find_offset_in_cluster(extent, offset); + n_bytes = MIN(bytes, extent->cluster_sectors * BDRV_SECTOR_SIZE + - offset_in_cluster); + + ret = get_cluster_offset(bs, extent, &m_data, offset, !(extent->compressed || zeroed), - &cluster_offset, - index_in_cluster, index_in_cluster + n); + &cluster_offset, offset_in_cluster, + offset_in_cluster + n_bytes); if (extent->compressed) { if (ret == VMDK_OK) { /* Refuse write to allocated cluster for streamOptimized */ @@ -1561,7 +1580,7 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, return -EIO; } else { /* allocate */ - ret = get_cluster_offset(bs, extent, &m_data, sector_num << 9, + ret = get_cluster_offset(bs, extent, &m_data, offset, true, &cluster_offset, 0, 0); } } @@ -1571,9 +1590,9 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, if (zeroed) { /* Do zeroed write, buf is ignored */ if (extent->has_zero_grain && - index_in_cluster == 0 && - n >= extent->cluster_sectors) { - n = extent->cluster_sectors; + offset_in_cluster == 0 && + n_bytes >= extent->cluster_sectors * BDRV_SECTOR_SIZE) { + n_bytes = extent->cluster_sectors * BDRV_SECTOR_SIZE; if (!zero_dry_run) { /* update L2 tables */ if (vmdk_L2update(extent, &m_data, VMDK_GTE_ZEROED) @@ -1585,9 +1604,8 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, return -ENOTSUP; } } else { - ret = vmdk_write_extent(extent, - cluster_offset, index_in_cluster * 512, - buf, n, sector_num); + ret = vmdk_write_extent(extent, cluster_offset, offset_in_cluster, + qiov, bytes_done, n_bytes, offset); if (ret) { return ret; } @@ -1600,9 +1618,9 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, } } } - nb_sectors -= n; - sector_num += n; - buf += n * 512; + bytes -= n_bytes; + offset += n_bytes; + bytes_done += n_bytes; /* update CID on the first write every time the virtual disk is * opened */ @@ -1617,25 +1635,65 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, return 0; } -static coroutine_fn int vmdk_co_write(BlockDriverState *bs, int64_t sector_num, - const uint8_t *buf, int nb_sectors) +static int coroutine_fn +vmdk_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) { int ret; BDRVVmdkState *s = bs->opaque; qemu_co_mutex_lock(&s->lock); - ret = vmdk_write(bs, sector_num, buf, nb_sectors, false, false); + ret = vmdk_pwritev(bs, offset, bytes, qiov, false, false); qemu_co_mutex_unlock(&s->lock); return ret; } +typedef struct VmdkWriteCompressedCo { + BlockDriverState *bs; + int64_t sector_num; + const uint8_t *buf; + int nb_sectors; + int ret; +} VmdkWriteCompressedCo; + +static void vmdk_co_write_compressed(void *opaque) +{ + VmdkWriteCompressedCo *co = opaque; + QEMUIOVector local_qiov; + uint64_t offset = co->sector_num * BDRV_SECTOR_SIZE; + uint64_t bytes = co->nb_sectors * BDRV_SECTOR_SIZE; + + struct iovec iov = (struct iovec) { + .iov_base = (uint8_t*) co->buf, + .iov_len = bytes, + }; + qemu_iovec_init_external(&local_qiov, &iov, 1); + + co->ret = vmdk_pwritev(co->bs, offset, bytes, &local_qiov, false, false); +} + static int vmdk_write_compressed(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors) { BDRVVmdkState *s = bs->opaque; + if (s->num_extents == 1 && s->extents[0].compressed) { - return vmdk_write(bs, sector_num, buf, nb_sectors, false, false); + Coroutine *co; + AioContext *aio_context = bdrv_get_aio_context(bs); + VmdkWriteCompressedCo data = { + .bs = bs, + .sector_num = sector_num, + .buf = buf, + .nb_sectors = nb_sectors, + .ret = -EINPROGRESS, + }; + co = qemu_coroutine_create(vmdk_co_write_compressed); + qemu_coroutine_enter(co, &data); + while (data.ret == -EINPROGRESS) { + aio_poll(aio_context, true); + } + return data.ret; } else { return -ENOTSUP; } @@ -1648,12 +1706,15 @@ static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs, { int ret; BDRVVmdkState *s = bs->opaque; + uint64_t offset = sector_num * BDRV_SECTOR_SIZE; + uint64_t bytes = nb_sectors * BDRV_SECTOR_SIZE; + qemu_co_mutex_lock(&s->lock); /* write zeroes could fail if sectors not aligned to cluster, test it with * dry_run == true before really updating image */ - ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, true); + ret = vmdk_pwritev(bs, offset, bytes, NULL, true, true); if (!ret) { - ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, false); + ret = vmdk_pwritev(bs, offset, bytes, NULL, true, false); } qemu_co_mutex_unlock(&s->lock); return ret; @@ -2337,7 +2398,7 @@ static BlockDriver bdrv_vmdk = { .bdrv_check = vmdk_check, .bdrv_reopen_prepare = vmdk_reopen_prepare, .bdrv_co_preadv = vmdk_co_preadv, - .bdrv_write = vmdk_co_write, + .bdrv_co_pwritev = vmdk_co_pwritev, .bdrv_write_compressed = vmdk_write_compressed, .bdrv_co_write_zeroes = vmdk_co_write_zeroes, .bdrv_close = vmdk_close, -- 1.8.3.1