Re: [Qemu-block] [PATCH v4 2/6] replication: add shared-disk and shared-disk-id options
On 04/12/2017 10:05 PM, zhanghailiang wrote: We use these two options to identify which disk is shared Signed-off-by: zhanghailiangSigned-off-by: Wen Congyang Signed-off-by: Zhang Chen --- v4: - Add proper comment for primary_disk (Stefan) v2: - Move g_free(s->shared_disk_id) to the common fail process place (Stefan) - Fix comments for these two options --- block/replication.c | 43 +-- qapi/block-core.json | 10 +- 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/block/replication.c b/block/replication.c index bf3c395..418b81b 100644 --- a/block/replication.c +++ b/block/replication.c @@ -25,9 +25,12 @@ typedef struct BDRVReplicationState { ReplicationMode mode; int replication_state; +bool is_shared_disk; +char *shared_disk_id; BdrvChild *active_disk; BdrvChild *hidden_disk; BdrvChild *secondary_disk; +BdrvChild *primary_disk; char *top_id; ReplicationState *rs; Error *blocker; @@ -53,6 +56,9 @@ static void replication_stop(ReplicationState *rs, bool failover, #define REPLICATION_MODE"mode" #define REPLICATION_TOP_ID "top-id" +#define REPLICATION_SHARED_DISK "shared-disk" +#define REPLICATION_SHARED_DISK_ID "shared-disk-id" + static QemuOptsList replication_runtime_opts = { .name = "replication", .head = QTAILQ_HEAD_INITIALIZER(replication_runtime_opts.head), @@ -65,6 +71,14 @@ static QemuOptsList replication_runtime_opts = { .name = REPLICATION_TOP_ID, .type = QEMU_OPT_STRING, }, +{ +.name = REPLICATION_SHARED_DISK_ID, +.type = QEMU_OPT_STRING, +}, +{ +.name = REPLICATION_SHARED_DISK, +.type = QEMU_OPT_BOOL, +}, { /* end of list */ } }, }; @@ -85,6 +99,9 @@ static int replication_open(BlockDriverState *bs, QDict *options, QemuOpts *opts = NULL; const char *mode; const char *top_id; +const char *shared_disk_id; +BlockBackend *blk; +BlockDriverState *tmp_bs; bs->file = bdrv_open_child(NULL, options, "file", bs, _file, false, errp); @@ -125,12 +142,33 @@ static int replication_open(BlockDriverState *bs, QDict *options, "The option mode's value should be primary or secondary"); goto fail; } +s->is_shared_disk = qemu_opt_get_bool(opts, REPLICATION_SHARED_DISK, + What If secondary side is supplied with 'REPLICATION_SHARED_DISK_ID'? Pls refer f4f2539bc to pefect the logical. false); +if (s->is_shared_disk && (s->mode == REPLICATION_MODE_PRIMARY)) { +shared_disk_id = qemu_opt_get(opts, REPLICATION_SHARED_DISK_ID); +if (!shared_disk_id) { +error_setg(_err, "Missing shared disk blk option"); +goto fail; +} +s->shared_disk_id = g_strdup(shared_disk_id); +blk = blk_by_name(s->shared_disk_id); +if (!blk) { +error_setg(_err, "There is no %s block", s->shared_disk_id); +goto fail; +} +/* We have a BlockBackend for the primary disk but use BdrvChild for + * consistency - active_disk, secondary_disk, etc are also BdrvChild. + */ +tmp_bs = blk_bs(blk); +s->primary_disk = QLIST_FIRST(_bs->parents); +} s->rs = replication_new(bs, _ops); -ret = 0; - +qemu_opts_del(opts); +return 0; fail: +g_free(s->shared_disk_id); qemu_opts_del(opts); error_propagate(errp, local_err); @@ -141,6 +179,7 @@ static void replication_close(BlockDriverState *bs) { BDRVReplicationState *s = bs->opaque; +g_free(s->shared_disk_id); if (s->replication_state == BLOCK_REPLICATION_RUNNING) { replication_stop(s->rs, false, NULL); } diff --git a/qapi/block-core.json b/qapi/block-core.json index 033457c..361c932 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -2661,12 +2661,20 @@ # node who owns the replication node chain. Must not be given in # primary mode. # +# @shared-disk-id: Id of shared disk while is replication mode, if @shared-disk +# is true, this option is required (Since: 2.10) +# Further explanations: For @shared-disk-id, it must/only be given when @shared-disk enable on Primary side. +# @shared-disk: To indicate whether or not a disk is shared by primary VM +# and secondary VM. (The default is false) (Since: 2.10) +# Further explanations: For @shared-disk, it must be given or not-given on both side at the same time. # Since: 2.9 ## { 'struct': 'BlockdevOptionsReplication', 'base': 'BlockdevOptionsGenericFormat', 'data': { 'mode': 'ReplicationMode', -
Re: [Qemu-block] [Qemu-devel] [PATCH 26/31] vdi: Avoid bitrot of debugging code
Am 18.04.2017 um 03:33 schrieb Eric Blake: Rework the debug define so that we always get -Wformat checking, even when debugging is disabled. Signed-off-by: Eric Blake--- Reviewed-by: Stefan Weil block/vdi.c | 12 +--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/block/vdi.c b/block/vdi.c index d12d9cd..a70b969 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -86,12 +86,18 @@ #define DEFAULT_CLUSTER_SIZE (1 * MiB) #if defined(CONFIG_VDI_DEBUG) -#define logout(fmt, ...) \ -fprintf(stderr, "vdi\t%-24s" fmt, __func__, ##__VA_ARGS__) +#define VDI_DEBUG 1 #else -#define logout(fmt, ...) ((void)0) +#define VDI_DEBUG 0 #endif +#define logout(fmt, ...) \ +do {\ +if (VDI_DEBUG) {\ +fprintf(stderr, "vdi\t%-24s" fmt, __func__, ##__VA_ARGS__); \ +} \ +} while (0) + /* Image signature. */ #define VDI_SIGNATURE 0xbeda107f
[Qemu-block] [PATCH] MAINTAINERS: update Wen's email address
So he can get CC'ed on future patches and bugs for this feature Signed-off-by: Changlong Xie--- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index c60235e..5638992 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1817,7 +1817,7 @@ S: Supported F: tests/image-fuzzer/ Replication -M: Wen Congyang +M: Wen Congyang M: Changlong Xie S: Supported F: replication* -- 1.9.3
Re: [Qemu-block] [Qemu-devel] [PULL 2/8] replication: clarify permissions
On 04/18/2017 09:36 AM, Hailiang Zhang wrote: On 2017/4/18 9:23, Eric Blake wrote: On 03/17/2017 08:15 AM, Kevin Wolf wrote: From: Changlong XieEven if hidden_disk, secondary_disk are backing files, they all need write permissions in replication scenario. Otherwise we will encouter below exceptions on secondary side during adding nbd server: {'execute': 'nbd-server-add', 'arguments': {'device': 'colo-disk', 'writable': true } } {"error": {"class": "GenericError", "desc": "Conflicts with use by hidden-qcow2-driver as 'backing', which does not allow 'write' on sec-qcow2-driver-for-nbd"}} CC: Zhang Hailiang CC: Zhang Chen CC: Wen Congyang This address for Wen Congyang is different than the one listed in MAINTAINERS for replication (M: Wen Congyang ), and different still from addresses my mailer has harvested from other posts (wencongy...@gmail.com). The MAINTAINERS entry is now resulting in 'undelivered mail' bounce messages, can you please submit an update to MAINTAINERS with your new preferred address? [or gently correct me if I'm confusing two people with the same name?] No, the same people, he just left his job from fujitsu, the entry in MAINTAINERS file needs to be updated. Cc: Changlong Xie Hi Changlong, would you please send a patch to update it ? Hi all After a short talk with Wen, i would like to update his new email address now. Thanks -Xie Hailiang .
[Qemu-block] [PATCH 30/31] vvfat: Switch to .bdrv_co_block_status()
We are gradually moving away from sector-based interfaces, towards byte-based. Update the vvfat driver accordingly. Signed-off-by: Eric Blake--- block/vvfat.c | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/block/vvfat.c b/block/vvfat.c index bef2056..825fe72 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -2961,13 +2961,13 @@ vvfat_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, return ret; } -static int64_t coroutine_fn vvfat_co_get_block_status(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, int *n, BlockDriverState **file) +static int64_t coroutine_fn vvfat_co_block_status(BlockDriverState *bs, +int64_t offset, int64_t bytes, int64_t *n, BlockDriverState **file) { BDRVVVFATState* s = bs->opaque; -*n = s->sector_count - sector_num; -if (*n > nb_sectors) { -*n = nb_sectors; +*n = s->sector_count * BDRV_SECTOR_SIZE - offset; +if (*n > bytes) { +*n = bytes; } else if (*n < 0) { return 0; } @@ -3124,7 +3124,7 @@ static BlockDriver bdrv_vvfat = { .bdrv_co_preadv = vvfat_co_preadv, .bdrv_co_pwritev= vvfat_co_pwritev, -.bdrv_co_get_block_status = vvfat_co_get_block_status, +.bdrv_co_block_status = vvfat_co_block_status, }; static void bdrv_vvfat_init(void) -- 2.9.3
Re: [Qemu-block] [PATCH 00/31] make bdrv_get_block_status byte-based
On 04/17/2017 08:33 PM, Eric Blake wrote: > There are patches floating around to add NBD_CMD_BLOCK_STATUS, > but NBD wants to report status on byte granularity (even if the > reporting will probably be naturally aligned to sectors or even > much higher levels). I've therefore started the task of > converting our block status code to report at a byte granularity > rather than sectors. Apologies for botching Kevin's address in the mail; if you reply, you'll want to edit the to list if you don't want a bounce message about an undeliverable address ;( -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3266 Virtualization: qemu.org | libvirt.org signature.asc Description: OpenPGP digital signature
[Qemu-block] [PATCH 29/31] vpc: Switch to .bdrv_co_block_status()
We are gradually moving away from sector-based interfaces, towards byte-based. Update the vpc driver accordingly. Signed-off-by: Eric Blake--- block/vpc.c | 31 +++ 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/block/vpc.c b/block/vpc.c index ecfee77..3cd56e7 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -689,46 +689,45 @@ fail: return ret; } -static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs, -int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file) +static int64_t coroutine_fn vpc_co_block_status(BlockDriverState *bs, +int64_t offset, int64_t bytes, int64_t *pnum, BlockDriverState **file) { BDRVVPCState *s = bs->opaque; VHDFooter *footer = (VHDFooter*) s->footer_buf; -int64_t start, offset; +int64_t start, image_offset; bool allocated; int n; if (be32_to_cpu(footer->type) == VHD_FIXED) { -*pnum = nb_sectors; +*pnum = bytes; *file = bs->file->bs; return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA | - (sector_num << BDRV_SECTOR_BITS); + (offset & BDRV_BLOCK_OFFSET_MASK); } -offset = get_sector_offset(bs, sector_num, 0); -start = offset; -allocated = (offset != -1); +image_offset = get_image_offset(bs, offset, 0); +start = image_offset & BDRV_BLOCK_OFFSET_MASK; +allocated = (image_offset != -1); *pnum = 0; do { /* All sectors in a block are contiguous (without using the bitmap) */ -n = ROUND_UP(sector_num + 1, s->block_size / BDRV_SECTOR_SIZE) - - sector_num; -n = MIN(n, nb_sectors); +n = ROUND_UP(offset + 1, s->block_size) - offset; +n = MIN(n, bytes); *pnum += n; -sector_num += n; -nb_sectors -= n; +offset += n; +bytes -= n; /* *pnum can't be greater than one block for allocated * sectors since there is always a bitmap in between. */ if (allocated) { *file = bs->file->bs; return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start; } -if (nb_sectors == 0) { +if (bytes == 0) { break; } -offset = get_sector_offset(bs, sector_num, 0); +image_offset = get_sector_offset(bs, offset, 0); } while (offset == -1); return 0; @@ -1074,7 +1073,7 @@ static BlockDriver bdrv_vpc = { .bdrv_co_preadv = vpc_co_preadv, .bdrv_co_pwritev= vpc_co_pwritev, -.bdrv_co_get_block_status = vpc_co_get_block_status, +.bdrv_co_block_status = vpc_co_block_status, .bdrv_get_info = vpc_get_info, -- 2.9.3
[Qemu-block] [PATCH 28/31] vmdk: Switch to .bdrv_co_block_status()
We are gradually moving away from sector-based interfaces, towards byte-based. Update the vmdk driver accordingly. Signed-off-by: Eric Blake--- block/vmdk.c | 24 ++-- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/block/vmdk.c b/block/vmdk.c index c61b9cc..c85ce96 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -1286,25 +1286,24 @@ static inline uint64_t vmdk_find_index_in_cluster(VmdkExtent *extent, return offset / BDRV_SECTOR_SIZE; } -static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs, -int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file) +static int64_t coroutine_fn vmdk_co_block_status(BlockDriverState *bs, +int64_t offset, int64_t bytes, int64_t *pnum, BlockDriverState **file) { BDRVVmdkState *s = bs->opaque; int64_t index_in_cluster, n, ret; -uint64_t offset; +uint64_t cluster_offset; VmdkExtent *extent; -extent = find_extent(s, sector_num, NULL); +extent = find_extent(s, offset >> BDRV_SECTOR_BITS, NULL); if (!extent) { return 0; } qemu_co_mutex_lock(>lock); -ret = get_cluster_offset(bs, extent, NULL, - sector_num * 512, false, , +ret = get_cluster_offset(bs, extent, NULL, offset, false, _offset, 0, 0); qemu_co_mutex_unlock(>lock); -index_in_cluster = vmdk_find_index_in_cluster(extent, sector_num); +index_in_cluster = vmdk_find_offset_in_cluster(extent, offset); switch (ret) { case VMDK_ERROR: ret = -EIO; @@ -1319,18 +1318,15 @@ static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs, ret = BDRV_BLOCK_DATA; if (!extent->compressed) { ret |= BDRV_BLOCK_OFFSET_VALID; -ret |= (offset + (index_in_cluster << BDRV_SECTOR_BITS)) +ret |= (cluster_offset + index_in_cluster) & BDRV_BLOCK_OFFSET_MASK; } *file = extent->file->bs; break; } -n = extent->cluster_sectors - index_in_cluster; -if (n > nb_sectors) { -n = nb_sectors; -} -*pnum = n; +n = extent->cluster_sectors * BDRV_SECTOR_SIZE - index_in_cluster; +*pnum = MIN(n, bytes); return ret; } @@ -2362,7 +2358,7 @@ static BlockDriver bdrv_vmdk = { .bdrv_close = vmdk_close, .bdrv_create = vmdk_create, .bdrv_co_flush_to_disk= vmdk_co_flush, -.bdrv_co_get_block_status = vmdk_co_get_block_status, +.bdrv_co_block_status = vmdk_co_block_status, .bdrv_get_allocated_file_size = vmdk_get_allocated_file_size, .bdrv_has_zero_init = vmdk_has_zero_init, .bdrv_get_specific_info = vmdk_get_specific_info, -- 2.9.3
[Qemu-block] [PATCH 26/31] vdi: Avoid bitrot of debugging code
Rework the debug define so that we always get -Wformat checking, even when debugging is disabled. Signed-off-by: Eric Blake--- block/vdi.c | 12 +--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/block/vdi.c b/block/vdi.c index d12d9cd..a70b969 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -86,12 +86,18 @@ #define DEFAULT_CLUSTER_SIZE (1 * MiB) #if defined(CONFIG_VDI_DEBUG) -#define logout(fmt, ...) \ -fprintf(stderr, "vdi\t%-24s" fmt, __func__, ##__VA_ARGS__) +#define VDI_DEBUG 1 #else -#define logout(fmt, ...) ((void)0) +#define VDI_DEBUG 0 #endif +#define logout(fmt, ...) \ +do {\ +if (VDI_DEBUG) {\ +fprintf(stderr, "vdi\t%-24s" fmt, __func__, ##__VA_ARGS__); \ +} \ +} while (0) + /* Image signature. */ #define VDI_SIGNATURE 0xbeda107f -- 2.9.3
[Qemu-block] [PATCH 24/31] raw: Switch to .bdrv_co_block_status()
We are gradually moving away from sector-based interfaces, towards byte-based. Update the raw driver accordingly. Signed-off-by: Eric Blake--- block/raw-format.c | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/block/raw-format.c b/block/raw-format.c index 36e6503..746beed 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -250,17 +250,17 @@ fail: return ret; } -static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, -int64_t sector_num, -int nb_sectors, int *pnum, -BlockDriverState **file) +static int64_t coroutine_fn raw_co_block_status(BlockDriverState *bs, +int64_t offset, +int64_t bytes, int64_t *pnum, +BlockDriverState **file) { BDRVRawState *s = bs->opaque; -*pnum = nb_sectors; +*pnum = bytes; *file = bs->file->bs; -sector_num += s->offset / BDRV_SECTOR_SIZE; +offset += s->offset; return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA | - (sector_num << BDRV_SECTOR_BITS); +(offset & BDRV_BLOCK_OFFSET_MASK); } static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs, @@ -475,7 +475,7 @@ BlockDriver bdrv_raw = { .bdrv_co_pwritev = _co_pwritev, .bdrv_co_pwrite_zeroes = _co_pwrite_zeroes, .bdrv_co_pdiscard = _co_pdiscard, -.bdrv_co_get_block_status = _co_get_block_status, +.bdrv_co_block_status = _co_block_status, .bdrv_truncate= _truncate, .bdrv_getlength = _getlength, .has_variable_length = true, -- 2.9.3
Re: [Qemu-block] [Qemu-devel] [PULL 2/8] replication: clarify permissions
On 2017/4/18 9:23, Eric Blake wrote: On 03/17/2017 08:15 AM, Kevin Wolf wrote: From: Changlong XieEven if hidden_disk, secondary_disk are backing files, they all need write permissions in replication scenario. Otherwise we will encouter below exceptions on secondary side during adding nbd server: {'execute': 'nbd-server-add', 'arguments': {'device': 'colo-disk', 'writable': true } } {"error": {"class": "GenericError", "desc": "Conflicts with use by hidden-qcow2-driver as 'backing', which does not allow 'write' on sec-qcow2-driver-for-nbd"}} CC: Zhang Hailiang CC: Zhang Chen CC: Wen Congyang This address for Wen Congyang is different than the one listed in MAINTAINERS for replication (M: Wen Congyang ), and different still from addresses my mailer has harvested from other posts (wencongy...@gmail.com). The MAINTAINERS entry is now resulting in 'undelivered mail' bounce messages, can you please submit an update to MAINTAINERS with your new preferred address? [or gently correct me if I'm confusing two people with the same name?] No, the same people, he just left his job from fujitsu, the entry in MAINTAINERS file needs to be updated. Cc: Changlong Xie Hi Changlong, would you please send a patch to update it ? Hailiang
[Qemu-block] [PATCH 25/31] sheepdog: Switch to .bdrv_co_block_status()
We are gradually moving away from sector-based interfaces, towards byte-based. Update the sheepdog driver accordingly. Signed-off-by: Eric Blake--- block/sheepdog.c | 23 +++ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index 1ccb81b..be7cc39 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -2971,18 +2971,17 @@ static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset, } static coroutine_fn int64_t -sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, - int *pnum, BlockDriverState **file) +sd_co_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes, + int64_t *pnum, BlockDriverState **file) { BDRVSheepdogState *s = bs->opaque; SheepdogInode *inode = >inode; uint32_t object_size = (UINT32_C(1) << inode->block_size_shift); -uint64_t offset = sector_num * BDRV_SECTOR_SIZE; unsigned long start = offset / object_size, - end = DIV_ROUND_UP((sector_num + nb_sectors) * - BDRV_SECTOR_SIZE, object_size); + end = DIV_ROUND_UP(offset + bytes, object_size); unsigned long idx; -int64_t ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset; +int64_t ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | +(offset & BDRV_BLOCK_OFFSET_MASK); for (idx = start; idx < end; idx++) { if (inode->data_vdi_id[idx] == 0) { @@ -2999,9 +2998,9 @@ sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, } } -*pnum = (idx - start) * object_size / BDRV_SECTOR_SIZE; -if (*pnum > nb_sectors) { -*pnum = nb_sectors; +*pnum = (idx - start) * object_size; +if (*pnum > bytes) { +*pnum = bytes; } if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID) { *file = bs; @@ -3079,7 +3078,7 @@ static BlockDriver bdrv_sheepdog = { .bdrv_co_writev = sd_co_writev, .bdrv_co_flush_to_disk = sd_co_flush_to_disk, .bdrv_co_pdiscard = sd_co_pdiscard, -.bdrv_co_get_block_status = sd_co_get_block_status, +.bdrv_co_block_status = sd_co_block_status, .bdrv_snapshot_create = sd_snapshot_create, .bdrv_snapshot_goto = sd_snapshot_goto, @@ -3115,7 +3114,7 @@ static BlockDriver bdrv_sheepdog_tcp = { .bdrv_co_writev = sd_co_writev, .bdrv_co_flush_to_disk = sd_co_flush_to_disk, .bdrv_co_pdiscard = sd_co_pdiscard, -.bdrv_co_get_block_status = sd_co_get_block_status, +.bdrv_co_block_status = sd_co_block_status, .bdrv_snapshot_create = sd_snapshot_create, .bdrv_snapshot_goto = sd_snapshot_goto, @@ -3151,7 +3150,7 @@ static BlockDriver bdrv_sheepdog_unix = { .bdrv_co_writev = sd_co_writev, .bdrv_co_flush_to_disk = sd_co_flush_to_disk, .bdrv_co_pdiscard = sd_co_pdiscard, -.bdrv_co_get_block_status = sd_co_get_block_status, +.bdrv_co_block_status = sd_co_block_status, .bdrv_snapshot_create = sd_snapshot_create, .bdrv_snapshot_goto = sd_snapshot_goto, -- 2.9.3
[Qemu-block] [PATCH 23/31] qed: Switch to .bdrv_co_block_status()
We are gradually moving away from sector-based interfaces, towards byte-based. Update the qed driver accordingly. Signed-off-by: Eric Blake--- block/qed.c | 22 +++--- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/block/qed.c b/block/qed.c index fd76817..336dae4 100644 --- a/block/qed.c +++ b/block/qed.c @@ -729,7 +729,7 @@ typedef struct { Coroutine *co; uint64_t pos; int64_t status; -int *pnum; +int64_t *pnum; BlockDriverState **file; } QEDIsAllocatedCB; @@ -737,10 +737,10 @@ static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t l { QEDIsAllocatedCB *cb = opaque; BDRVQEDState *s = cb->bs->opaque; -*cb->pnum = len / BDRV_SECTOR_SIZE; +*cb->pnum = len; switch (ret) { case QED_CLUSTER_FOUND: -offset |= qed_offset_into_cluster(s, cb->pos); +offset |= qed_offset_into_cluster(s, cb->pos) & BDRV_BLOCK_OFFSET_VALID; cb->status = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset; *cb->file = cb->bs->file->bs; break; @@ -762,23 +762,23 @@ static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t l } } -static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs, - int64_t sector_num, - int nb_sectors, int *pnum, - BlockDriverState **file) +static int64_t coroutine_fn bdrv_qed_co_block_status(BlockDriverState *bs, + int64_t offset, + int64_t bytes, + int64_t *pnum, + BlockDriverState **file) { BDRVQEDState *s = bs->opaque; -size_t len = (size_t)nb_sectors * BDRV_SECTOR_SIZE; QEDIsAllocatedCB cb = { .bs = bs, -.pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE, +.pos = offset, .status = BDRV_BLOCK_OFFSET_MASK, .pnum = pnum, .file = file, }; QEDRequest request = { .l2_table = NULL }; -qed_find_cluster(s, , cb.pos, len, qed_is_allocated_cb, ); +qed_find_cluster(s, , cb.pos, bytes, qed_is_allocated_cb, ); /* Now sleep if the callback wasn't invoked immediately */ while (cb.status == BDRV_BLOCK_OFFSET_MASK) { @@ -1710,7 +1710,7 @@ static BlockDriver bdrv_qed = { .bdrv_child_perm = bdrv_format_default_perms, .bdrv_create = bdrv_qed_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, -.bdrv_co_get_block_status = bdrv_qed_co_get_block_status, +.bdrv_co_block_status = bdrv_qed_co_block_status, .bdrv_aio_readv = bdrv_qed_aio_readv, .bdrv_aio_writev = bdrv_qed_aio_writev, .bdrv_co_pwrite_zeroes= bdrv_qed_co_pwrite_zeroes, -- 2.9.3
[Qemu-block] [PATCH 22/31] qcow2: Switch to .bdrv_co_block_status()
We are gradually moving away from sector-based interfaces, towards byte-based. Update the qcow2 driver accordingly. Signed-off-by: Eric Blake--- block/qcow2.c | 17 - 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index 4272cca..0de7210 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1358,8 +1358,8 @@ static void qcow2_join_options(QDict *options, QDict *old_options) } } -static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs, -int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file) +static int64_t coroutine_fn qcow2_co_block_status(BlockDriverState *bs, +int64_t offset, int64_t count, int64_t *pnum, BlockDriverState **file) { BDRVQcow2State *s = bs->opaque; uint64_t cluster_offset; @@ -1367,21 +1367,20 @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs, unsigned int bytes; int64_t status = 0; -bytes = MIN(INT_MAX, nb_sectors * BDRV_SECTOR_SIZE); +bytes = MIN(INT_MAX, count); qemu_co_mutex_lock(>lock); -ret = qcow2_get_cluster_offset(bs, sector_num << 9, , - _offset); +ret = qcow2_get_cluster_offset(bs, offset, , _offset); qemu_co_mutex_unlock(>lock); if (ret < 0) { return ret; } -*pnum = bytes >> BDRV_SECTOR_BITS; +*pnum = bytes; if (cluster_offset != 0 && ret != QCOW2_CLUSTER_COMPRESSED && !s->cipher) { -index_in_cluster = sector_num & (s->cluster_sectors - 1); -cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS); +index_in_cluster = offset & (s->cluster_size - 1); +cluster_offset |= (index_in_cluster & BDRV_BLOCK_OFFSET_MASK); *file = bs->file->bs; status |= BDRV_BLOCK_OFFSET_VALID | cluster_offset; } @@ -3429,7 +3428,7 @@ BlockDriver bdrv_qcow2 = { .bdrv_child_perm = bdrv_format_default_perms, .bdrv_create= qcow2_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, -.bdrv_co_get_block_status = qcow2_co_get_block_status, +.bdrv_co_block_status = qcow2_co_block_status, .bdrv_set_key = qcow2_set_key, .bdrv_co_preadv = qcow2_co_preadv, -- 2.9.3
[Qemu-block] [PATCH 11/31] block: Add .bdrv_co_block_status() callback
We are gradually moving away from sector-based interfaces, towards byte-based. Now that the block layer exposes byte-based allocation, it's time to tackle the drivers. Add a new callback that operates on as small as byte boundaries, and update the block layer to ensure that the callback is only used with inputs aligned to the device's request_alignment. Subsequent patches will then update individual drivers, and then finally remove .bdrv_co_get_block_status(). Signed-off-by: Eric Blake--- include/block/block_int.h | 12 block/io.c| 47 +++ 2 files changed, 47 insertions(+), 12 deletions(-) diff --git a/include/block/block_int.h b/include/block/block_int.h index bc3a28a..8f20bc3 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -163,11 +163,23 @@ struct BlockDriver { */ int coroutine_fn (*bdrv_co_pwrite_zeroes)(BlockDriverState *bs, int64_t offset, int count, BdrvRequestFlags flags); + int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs, int64_t offset, int count); + +/* + * Building block for bdrv_block_status[_above]. The block layer + * guarantees input aligned to request_alignment, as well as + * non-NULL pnum and file; and the result only has to worry about + * BDRV_BLOCK_DATA, _ZERO, _OFFSET_VALID, and _RAW, and only + * according to the current BDS. + */ int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file); +int64_t coroutine_fn (*bdrv_co_block_status)(BlockDriverState *bd, +int64_t offset, int64_t bytes, int64_t *pnum, +BlockDriverState **file); /* * Invalidate any cached meta-data. diff --git a/block/io.c b/block/io.c index 1b101cf..361eeb8 100644 --- a/block/io.c +++ b/block/io.c @@ -1718,7 +1718,6 @@ static int64_t coroutine_fn bdrv_co_block_status(BlockDriverState *bs, int64_t total_size; int64_t n; /* bytes */ int64_t ret, ret2; -int count; /* sectors */ BlockDriverState *tmp_file; total_size = bdrv_getlength(bs); @@ -1739,7 +1738,7 @@ static int64_t coroutine_fn bdrv_co_block_status(BlockDriverState *bs, bytes = n; } -if (!bs->drv->bdrv_co_get_block_status) { +if (!bs->drv->bdrv_co_get_block_status && !bs->drv->bdrv_co_block_status) { *pnum = bytes; ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED; if (bs->drv->protocol_name) { @@ -1753,20 +1752,44 @@ static int64_t coroutine_fn bdrv_co_block_status(BlockDriverState *bs, } *file = NULL; bdrv_inc_in_flight(bs); -/* TODO: Rather than require aligned offsets, we could instead - * round to the driver's request_alignment here, then touch up - * count afterwards back to the caller's expectations. But first - * we want to switch the driver callback to likewise be - * byte-based. */ -assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)); -ret = bs->drv->bdrv_co_get_block_status(bs, offset >> BDRV_SECTOR_BITS, -bytes >> BDRV_SECTOR_BITS, , -file); +if (bs->drv->bdrv_co_get_block_status) { +int count; /* sectors */ + +assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)); +ret = bs->drv->bdrv_co_get_block_status(bs, offset >> BDRV_SECTOR_BITS, +bytes >> BDRV_SECTOR_BITS, +, file); +*pnum = count * BDRV_SECTOR_SIZE; +} else { +/* Round out to request_alignment boundaries */ +int64_t aligned_offset, aligned_bytes; + +aligned_offset = QEMU_ALIGN_DOWN(offset, bs->bl.request_alignment); +aligned_bytes = ROUND_UP(offset + bytes, + bs->bl.request_alignment) - aligned_offset; +ret = bs->drv->bdrv_co_block_status(bs, aligned_offset, aligned_bytes, +, file); +/* Clamp pnum and ret to original request */ +if (aligned_offset != offset && ret >= 0) { +int sectors = DIV_ROUND_UP(offset, BDRV_SECTOR_SIZE) - +DIV_ROUND_UP(aligned_offset, BDRV_SECTOR_SIZE); + +assert(n >= offset - aligned_offset); +n -= offset - aligned_offset; +if (sectors) { +ret += sectors * BDRV_SECTOR_SIZE; +} +} +if (ret >= 0 && n > bytes) { +assert(aligned_bytes != bytes); +n = bytes; +} +*pnum = n; +} if (ret < 0) { *pnum = 0; goto out; } -*pnum = count * BDRV_SECTOR_SIZE; if (ret & BDRV_BLOCK_RAW) { assert(ret & BDRV_BLOCK_OFFSET_VALID); -- 2.9.3
[Qemu-block] [PATCH 21/31] qcow: Switch to .bdrv_co_block_status()
We are gradually moving away from sector-based interfaces, towards byte-based. Update the qcow driver accordingly. Signed-off-by: Eric Blake--- block/qcow.c | 22 -- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/block/qcow.c b/block/qcow.c index 5d147b9..d7dfa08 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -515,20 +515,22 @@ static uint64_t get_cluster_offset(BlockDriverState *bs, return cluster_offset; } -static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs, -int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file) +static int64_t coroutine_fn qcow_co_block_status(BlockDriverState *bs, +int64_t offset, int64_t bytes, int64_t *pnum, BlockDriverState **file) { BDRVQcowState *s = bs->opaque; -int index_in_cluster, n; +int index_in_cluster; +int64_t n; uint64_t cluster_offset; qemu_co_mutex_lock(>lock); -cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0); +cluster_offset = get_cluster_offset(bs, offset, 0, 0, 0, 0); qemu_co_mutex_unlock(>lock); -index_in_cluster = sector_num & (s->cluster_sectors - 1); -n = s->cluster_sectors - index_in_cluster; -if (n > nb_sectors) -n = nb_sectors; +index_in_cluster = offset & (s->cluster_size - 1); +n = s->cluster_size - index_in_cluster; +if (n > bytes) { +n = bytes; +} *pnum = n; if (!cluster_offset) { return 0; @@ -536,7 +538,7 @@ static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs, if ((cluster_offset & QCOW_OFLAG_COMPRESSED) || s->cipher) { return BDRV_BLOCK_DATA; } -cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS); +cluster_offset |= (index_in_cluster & BDRV_BLOCK_OFFSET_MASK); *file = bs->file->bs; return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | cluster_offset; } @@ -1061,7 +1063,7 @@ static BlockDriver bdrv_qcow = { .bdrv_co_readv = qcow_co_readv, .bdrv_co_writev = qcow_co_writev, -.bdrv_co_get_block_status = qcow_co_get_block_status, +.bdrv_co_block_status = qcow_co_block_status, .bdrv_set_key = qcow_set_key, .bdrv_make_empty= qcow_make_empty, -- 2.9.3
[Qemu-block] [PATCH 14/31] gluster: Switch to .bdrv_co_block_status()
We are gradually moving away from sector-based interfaces, towards byte-based. Update the gluster driver accordingly. Signed-off-by: Eric Blake--- block/gluster.c | 47 +++ 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/block/gluster.c b/block/gluster.c index 1d4e2f7..3f252c6 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -1332,24 +1332,24 @@ exit: /* * Returns the allocation status of the specified sectors. * - * If 'sector_num' is beyond the end of the disk image the return value is 0 + * If 'offset' is beyond the end of the disk image the return value is 0 * and 'pnum' is set to 0. * - * 'pnum' is set to the number of sectors (including and immediately following - * the specified sector) that are known to be in the same + * 'pnum' is set to the number of bytes (including and immediately following + * the specified offset) that are known to be in the same * allocated/unallocated state. * - * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes + * 'bytes' is the max value 'pnum' should be set to. If bytes goes * beyond the end of the disk image it will be clamped. * - * (Based on raw_co_get_block_status() from file-posix.c.) + * (Based on raw_co_block_status() from file-posix.c.) */ -static int64_t coroutine_fn qemu_gluster_co_get_block_status( -BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum, +static int64_t coroutine_fn qemu_gluster_co_block_status( +BlockDriverState *bs, int64_t offset, int64_t bytes, int64_t *pnum, BlockDriverState **file) { BDRVGlusterState *s = bs->opaque; -off_t start, data = 0, hole = 0; +off_t data = 0, hole = 0; int64_t total_size; int ret = -EINVAL; @@ -1357,41 +1357,40 @@ static int64_t coroutine_fn qemu_gluster_co_get_block_status( return ret; } -start = sector_num * BDRV_SECTOR_SIZE; total_size = bdrv_getlength(bs); if (total_size < 0) { return total_size; -} else if (start >= total_size) { +} else if (offset >= total_size) { *pnum = 0; return 0; -} else if (start + nb_sectors * BDRV_SECTOR_SIZE > total_size) { -nb_sectors = DIV_ROUND_UP(total_size - start, BDRV_SECTOR_SIZE); +} else if (offset + bytes > total_size) { +bytes = total_size - offset; } -ret = find_allocation(bs, start, , ); +ret = find_allocation(bs, offset, , ); if (ret == -ENXIO) { /* Trailing hole */ -*pnum = nb_sectors; +*pnum = bytes; ret = BDRV_BLOCK_ZERO; } else if (ret < 0) { /* No info available, so pretend there are no holes */ -*pnum = nb_sectors; +*pnum = bytes; ret = BDRV_BLOCK_DATA; -} else if (data == start) { +} else if (data == offset) { /* On a data extent, compute sectors to the end of the extent, * possibly including a partial sector at EOF. */ -*pnum = MIN(nb_sectors, DIV_ROUND_UP(hole - start, BDRV_SECTOR_SIZE)); +*pnum = MIN(bytes, hole - offset); ret = BDRV_BLOCK_DATA; } else { /* On a hole, compute sectors to the beginning of the next extent. */ -assert(hole == start); -*pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE); +assert(hole == offset); +*pnum = MIN(bytes, data - offset); ret = BDRV_BLOCK_ZERO; } *file = bs; -return ret | BDRV_BLOCK_OFFSET_VALID | start; +return ret | BDRV_BLOCK_OFFSET_VALID | (offset & BDRV_BLOCK_OFFSET_MASK); } @@ -1419,7 +1418,7 @@ static BlockDriver bdrv_gluster = { #ifdef CONFIG_GLUSTERFS_ZEROFILL .bdrv_co_pwrite_zeroes= qemu_gluster_co_pwrite_zeroes, #endif -.bdrv_co_get_block_status = qemu_gluster_co_get_block_status, +.bdrv_co_block_status = qemu_gluster_co_block_status, .create_opts = _gluster_create_opts, }; @@ -1447,7 +1446,7 @@ static BlockDriver bdrv_gluster_tcp = { #ifdef CONFIG_GLUSTERFS_ZEROFILL .bdrv_co_pwrite_zeroes= qemu_gluster_co_pwrite_zeroes, #endif -.bdrv_co_get_block_status = qemu_gluster_co_get_block_status, +.bdrv_co_block_status = qemu_gluster_co_block_status, .create_opts = _gluster_create_opts, }; @@ -1475,7 +1474,7 @@ static BlockDriver bdrv_gluster_unix = { #ifdef CONFIG_GLUSTERFS_ZEROFILL .bdrv_co_pwrite_zeroes= qemu_gluster_co_pwrite_zeroes, #endif -.bdrv_co_get_block_status = qemu_gluster_co_get_block_status, +.bdrv_co_block_status = qemu_gluster_co_block_status, .create_opts = _gluster_create_opts, }; @@ -1509,7 +1508,7 @@ static BlockDriver bdrv_gluster_rdma = { #ifdef CONFIG_GLUSTERFS_ZEROFILL .bdrv_co_pwrite_zeroes= qemu_gluster_co_pwrite_zeroes, #endif -.bdrv_co_get_block_status =
[Qemu-block] [PATCH 10/31] block: Convert bdrv_get_block_status_above() to bytes
We are gradually moving away from sector-based interfaces, towards byte-based. In the common case, allocation is unlikely to ever use values that are not naturally sector-aligned, but it is possible that byte-based values will let us be more precise about allocation at the end of an unaligned file that can do byte-based access. Changing the name of the function from bdrv_get_block_status_above() to bdrv_block_status_above() ensures that the compiler enforces that all callers are updated. For now, the io.c layer still assert()s that all callers are sector-aligned, but that can be relaxed when a later patch implements byte-based block status in the drivers. For the most part this patch is just the addition of scaling at the callers followed by inverse scaling at bdrv_block_status(). But some code, particularly bdrv_block_status(), gets a lot simpler because it no longer has to mess with sectors; also, it is now possible to pass NULL if the caller does not care how much of the image is allocated beyond the initial offset, or which BDS in the chain owns the sector. For ease of review, bdrv_get_block_status() was tackled separately. Signed-off-by: Eric Blake--- include/block/block.h | 10 +- block/io.c| 36 +++- block/mirror.c| 14 +- block/qcow2.c | 10 +++--- qemu-img.c| 37 + 5 files changed, 45 insertions(+), 62 deletions(-) diff --git a/include/block/block.h b/include/block/block.h index b9e7281..8f2b8a2 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -420,11 +420,11 @@ bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs); int64_t bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes, int64_t *pnum, BlockDriverState **file); -int64_t bdrv_get_block_status_above(BlockDriverState *bs, -BlockDriverState *base, -int64_t sector_num, -int nb_sectors, int *pnum, -BlockDriverState **file); +int64_t bdrv_block_status_above(BlockDriverState *bs, +BlockDriverState *base, +int64_t offset, +int64_t bytes, int64_t *pnum, +BlockDriverState **file); int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes, int64_t *pnum); int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base, diff --git a/block/io.c b/block/io.c index 10bc011..1b101cf 100644 --- a/block/io.c +++ b/block/io.c @@ -1842,7 +1842,7 @@ static int64_t coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs, return ret; } -/* Coroutine wrapper for bdrv_get_block_status_above() */ +/* Coroutine wrapper for bdrv_block_status_above() */ static void coroutine_fn bdrv_block_status_above_co_entry(void *opaque) { BdrvCoBlockStatusData *data = opaque; @@ -1858,21 +1858,19 @@ static void coroutine_fn bdrv_block_status_above_co_entry(void *opaque) * * See bdrv_co_block_status_above() for details. */ -int64_t bdrv_get_block_status_above(BlockDriverState *bs, -BlockDriverState *base, -int64_t sector_num, -int nb_sectors, int *pnum, -BlockDriverState **file) +int64_t bdrv_block_status_above(BlockDriverState *bs, +BlockDriverState *base, +int64_t offset, int64_t bytes, int64_t *pnum, +BlockDriverState **file) { Coroutine *co; -int64_t n; BdrvCoBlockStatusData data = { .bs = bs, .base = base, .file = file, -.offset = sector_num * BDRV_SECTOR_SIZE, -.bytes = nb_sectors * BDRV_SECTOR_SIZE, -.pnum = , +.offset = offset, +.bytes = bytes, +.pnum = pnum, .done = false, }; @@ -1884,7 +1882,6 @@ int64_t bdrv_get_block_status_above(BlockDriverState *bs, bdrv_coroutine_enter(bs, co); BDRV_POLL_WHILE(bs, !data.done); } -*pnum = n >> BDRV_SECTOR_BITS; return data.ret; } @@ -1892,27 +1889,16 @@ int64_t bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes, int64_t *pnum, BlockDriverState **file) { -int64_t ret; -int n; - -assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)); -assert(bytes <= BDRV_REQUEST_MAX_BYTES); -ret = bdrv_get_block_status_above(bs, backing_bs(bs), - offset >> BDRV_SECTOR_BITS, - bytes >> BDRV_SECTOR_BITS, , file); -
[Qemu-block] [PATCH 27/31] vdi: Switch to .bdrv_co_block_status()
We are gradually moving away from sector-based interfaces, towards byte-based. Update the vdi driver accordingly. Note that the TODO is already covered (the block layer guarantees bounds of its requests), and that we can remove the now-unused s->block_sectors. Signed-off-by: Eric Blake--- block/vdi.c | 25 - 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/block/vdi.c b/block/vdi.c index a70b969..390e2f1 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -171,8 +171,6 @@ typedef struct { uint32_t *bmap; /* Size of block (bytes). */ uint32_t block_size; -/* Size of block (sectors). */ -uint32_t block_sectors; /* First sector of block map. */ uint32_t bmap_sector; /* VDI header (converted to host endianness). */ @@ -462,7 +460,6 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags, bs->total_sectors = header.disk_size / SECTOR_SIZE; s->block_size = header.block_size; -s->block_sectors = header.block_size / SECTOR_SIZE; s->bmap_sector = header.offset_bmap / SECTOR_SIZE; s->header = header; @@ -508,23 +505,17 @@ static int vdi_reopen_prepare(BDRVReopenState *state, return 0; } -static int64_t coroutine_fn vdi_co_get_block_status(BlockDriverState *bs, -int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file) +static int64_t coroutine_fn vdi_co_block_status(BlockDriverState *bs, +int64_t offset, int64_t bytes, int64_t *pnum, BlockDriverState **file) { -/* TODO: Check for too large sector_num (in bdrv_is_allocated or here). */ BDRVVdiState *s = (BDRVVdiState *)bs->opaque; -size_t bmap_index = sector_num / s->block_sectors; -size_t sector_in_block = sector_num % s->block_sectors; -int n_sectors = s->block_sectors - sector_in_block; +size_t bmap_index = offset / s->block_size; +size_t index_in_block = offset % s->block_size; uint32_t bmap_entry = le32_to_cpu(s->bmap[bmap_index]); -uint64_t offset; int result; -logout("%p, %" PRId64 ", %d, %p\n", bs, sector_num, nb_sectors, pnum); -if (n_sectors > nb_sectors) { -n_sectors = nb_sectors; -} -*pnum = n_sectors; +logout("%p, %" PRId64 ", %" PRId64 ", %p\n", bs, offset, bytes, pnum); +*pnum = MIN(s->block_size, bytes); result = VDI_IS_ALLOCATED(bmap_entry); if (!result) { return 0; @@ -532,7 +523,7 @@ static int64_t coroutine_fn vdi_co_get_block_status(BlockDriverState *bs, offset = s->header.offset_data + (uint64_t)bmap_entry * s->block_size + - sector_in_block * SECTOR_SIZE; + (index_in_block & BDRV_BLOCK_OFFSET_MASK); *file = bs->file->bs; return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset; } @@ -901,7 +892,7 @@ static BlockDriver bdrv_vdi = { .bdrv_child_perm = bdrv_format_default_perms, .bdrv_create = vdi_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, -.bdrv_co_get_block_status = vdi_co_get_block_status, +.bdrv_co_block_status = vdi_co_block_status, .bdrv_make_empty = vdi_make_empty, .bdrv_co_preadv = vdi_co_preadv, -- 2.9.3
[Qemu-block] [PATCH 18/31] mirror: Switch to .bdrv_co_block_status()
We are gradually moving away from sector-based interfaces, towards byte-based. Update the mirror driver accordingly. Signed-off-by: Eric Blake--- block/mirror.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/block/mirror.c b/block/mirror.c index 750be1f..ebd0adf 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1027,14 +1027,14 @@ static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs) return bdrv_co_flush(bs->backing->bs); } -static int64_t coroutine_fn bdrv_mirror_top_get_block_status( -BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum, +static int64_t coroutine_fn bdrv_mirror_top_block_status( +BlockDriverState *bs, int64_t offset, int64_t bytes, int64_t *pnum, BlockDriverState **file) { -*pnum = nb_sectors; +*pnum = bytes; *file = bs->backing->bs; return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA | - (sector_num << BDRV_SECTOR_BITS); + (offset & BDRV_BLOCK_OFFSET_MASK); } static int coroutine_fn bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs, @@ -1083,7 +1083,7 @@ static BlockDriver bdrv_mirror_top = { .bdrv_co_pwrite_zeroes = bdrv_mirror_top_pwrite_zeroes, .bdrv_co_pdiscard = bdrv_mirror_top_pdiscard, .bdrv_co_flush = bdrv_mirror_top_flush, -.bdrv_co_get_block_status = bdrv_mirror_top_get_block_status, +.bdrv_co_block_status = bdrv_mirror_top_block_status, .bdrv_refresh_filename = bdrv_mirror_top_refresh_filename, .bdrv_close = bdrv_mirror_top_close, .bdrv_child_perm= bdrv_mirror_top_child_perm, -- 2.9.3
[Qemu-block] [PATCH 16/31] iscsi: Switch iscsi_allocmap_update() to byte-based
We are gradually converting to byte-based interfaces, as they are easier to reason about than sector-based. Convert all uses of the allocmap (no semantic change). Callers that already had bytes available are simpler, and callers that now scale to bytes will be easier to switch to byte-based in the future. Signed-off-by: Eric Blake--- block/iscsi.c | 90 +-- 1 file changed, 44 insertions(+), 46 deletions(-) diff --git a/block/iscsi.c b/block/iscsi.c index 9648a45..e51fdfb 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -488,24 +488,22 @@ static int iscsi_allocmap_init(IscsiLun *iscsilun, int open_flags) } static void -iscsi_allocmap_update(IscsiLun *iscsilun, int64_t sector_num, - int nb_sectors, bool allocated, bool valid) +iscsi_allocmap_update(IscsiLun *iscsilun, int64_t offset, + int64_t bytes, bool allocated, bool valid) { int64_t cl_num_expanded, nb_cls_expanded, cl_num_shrunk, nb_cls_shrunk; -int cluster_sectors = iscsilun->cluster_size >> BDRV_SECTOR_BITS; if (iscsilun->allocmap == NULL) { return; } /* expand to entirely contain all affected clusters */ -assert(cluster_sectors); -cl_num_expanded = sector_num / cluster_sectors; -nb_cls_expanded = DIV_ROUND_UP(sector_num + nb_sectors, - cluster_sectors) - cl_num_expanded; +assert(iscsilun->cluster_size); +cl_num_expanded = offset / iscsilun->cluster_size; +nb_cls_expanded = DIV_ROUND_UP(offset + bytes, iscsilun->cluster_size) +- cl_num_expanded; /* shrink to touch only completely contained clusters */ -cl_num_shrunk = DIV_ROUND_UP(sector_num, cluster_sectors); -nb_cls_shrunk = (sector_num + nb_sectors) / cluster_sectors - - cl_num_shrunk; +cl_num_shrunk = DIV_ROUND_UP(offset, iscsilun->cluster_size); +nb_cls_shrunk = (offset + bytes) / iscsilun->cluster_size - cl_num_shrunk; if (allocated) { bitmap_set(iscsilun->allocmap, cl_num_expanded, nb_cls_expanded); } else { @@ -528,26 +526,26 @@ iscsi_allocmap_update(IscsiLun *iscsilun, int64_t sector_num, } static void -iscsi_allocmap_set_allocated(IscsiLun *iscsilun, int64_t sector_num, - int nb_sectors) +iscsi_allocmap_set_allocated(IscsiLun *iscsilun, int64_t offset, + int64_t bytes) { -iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, true, true); +iscsi_allocmap_update(iscsilun, offset, bytes, true, true); } static void -iscsi_allocmap_set_unallocated(IscsiLun *iscsilun, int64_t sector_num, - int nb_sectors) +iscsi_allocmap_set_unallocated(IscsiLun *iscsilun, int64_t offset, + int64_t bytes) { /* Note: if cache.direct=on the fifth argument to iscsi_allocmap_update * is ignored, so this will in effect be an iscsi_allocmap_set_invalid. */ -iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, false, true); +iscsi_allocmap_update(iscsilun, offset, bytes, false, true); } -static void iscsi_allocmap_set_invalid(IscsiLun *iscsilun, int64_t sector_num, - int nb_sectors) +static void iscsi_allocmap_set_invalid(IscsiLun *iscsilun, int64_t offset, + int64_t bytes) { -iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, false, false); +iscsi_allocmap_update(iscsilun, offset, bytes, false, false); } static void iscsi_allocmap_invalidate(IscsiLun *iscsilun) @@ -561,34 +559,30 @@ static void iscsi_allocmap_invalidate(IscsiLun *iscsilun) } static inline bool -iscsi_allocmap_is_allocated(IscsiLun *iscsilun, int64_t sector_num, -int nb_sectors) +iscsi_allocmap_is_allocated(IscsiLun *iscsilun, int64_t offset, +int64_t bytes) { unsigned long size; if (iscsilun->allocmap == NULL) { return true; } assert(iscsilun->cluster_size); -size = DIV_ROUND_UP(sector_num + nb_sectors, -iscsilun->cluster_size >> BDRV_SECTOR_BITS); +size = DIV_ROUND_UP(offset + bytes, iscsilun->cluster_size); return !(find_next_bit(iscsilun->allocmap, size, - sector_num * BDRV_SECTOR_SIZE / - iscsilun->cluster_size) == size); + offset / iscsilun->cluster_size) == size); } static inline bool iscsi_allocmap_is_valid(IscsiLun *iscsilun, - int64_t sector_num, int nb_sectors) + int64_t offset, int64_t bytes) { unsigned long size; if (iscsilun->allocmap_valid == NULL) { return false; } assert(iscsilun->cluster_size); -size = DIV_ROUND_UP(sector_num + nb_sectors, -
[Qemu-block] [PATCH 19/31] null: Switch to .bdrv_co_block_status()
We are gradually moving away from sector-based interfaces, towards byte-based. Update the null driver accordingly. Signed-off-by: Eric Blake--- block/null.c | 21 ++--- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/block/null.c b/block/null.c index b300390..2dc2dd7 100644 --- a/block/null.c +++ b/block/null.c @@ -204,22 +204,21 @@ static int null_reopen_prepare(BDRVReopenState *reopen_state, return 0; } -static int64_t coroutine_fn null_co_get_block_status(BlockDriverState *bs, - int64_t sector_num, - int nb_sectors, int *pnum, - BlockDriverState **file) +static int64_t coroutine_fn null_co_block_status(BlockDriverState *bs, + int64_t offset, + int64_t bytes, int64_t *pnum, + BlockDriverState **file) { BDRVNullState *s = bs->opaque; -off_t start = sector_num * BDRV_SECTOR_SIZE; +int64_t ret = BDRV_BLOCK_OFFSET_VALID | (offset & BDRV_BLOCK_OFFSET_MASK); -*pnum = nb_sectors; +*pnum = bytes; *file = bs; if (s->read_zeroes) { -return BDRV_BLOCK_OFFSET_VALID | start | BDRV_BLOCK_ZERO; -} else { -return BDRV_BLOCK_OFFSET_VALID | start; +ret |= BDRV_BLOCK_ZERO; } +return ret; } static void null_refresh_filename(BlockDriverState *bs, QDict *opts) @@ -250,7 +249,7 @@ static BlockDriver bdrv_null_co = { .bdrv_co_flush_to_disk = null_co_flush, .bdrv_reopen_prepare= null_reopen_prepare, -.bdrv_co_get_block_status = null_co_get_block_status, +.bdrv_co_block_status = null_co_block_status, .bdrv_refresh_filename = null_refresh_filename, }; @@ -269,7 +268,7 @@ static BlockDriver bdrv_null_aio = { .bdrv_aio_flush = null_aio_flush, .bdrv_reopen_prepare= null_reopen_prepare, -.bdrv_co_get_block_status = null_co_get_block_status, +.bdrv_co_block_status = null_co_block_status, .bdrv_refresh_filename = null_refresh_filename, }; -- 2.9.3
[Qemu-block] [PATCH 09/31] block: Switch bdrv_co_get_block_status_above() to byte-based
We are gradually converting to byte-based interfaces, as they are easier to reason about than sector-based. Convert another internal type (no semantic change), and rename it to match the corresponding public function rename. Signed-off-by: Eric Blake--- block/io.c | 42 -- 1 file changed, 16 insertions(+), 26 deletions(-) diff --git a/block/io.c b/block/io.c index f7ece8d..10bc011 100644 --- a/block/io.c +++ b/block/io.c @@ -1819,11 +1819,11 @@ out: return ret; } -static int64_t coroutine_fn bdrv_co_get_block_status_above(BlockDriverState *bs, +static int64_t coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs, BlockDriverState *base, -int64_t sector_num, -int nb_sectors, -int *pnum, +int64_t offset, +int64_t bytes, +int64_t *pnum, BlockDriverState **file) { BlockDriverState *p; @@ -1831,41 +1831,32 @@ static int64_t coroutine_fn bdrv_co_get_block_status_above(BlockDriverState *bs, assert(bs != base); for (p = bs; p != base; p = backing_bs(p)) { -int64_t count; - -ret = bdrv_co_block_status(p, sector_num * BDRV_SECTOR_SIZE, - nb_sectors * BDRV_SECTOR_SIZE, , - file); -*pnum = count >> BDRV_SECTOR_BITS; +ret = bdrv_co_block_status(p, offset, bytes, pnum, file); if (ret < 0 || ret & BDRV_BLOCK_ALLOCATED) { break; } -/* [sector_num, pnum] unallocated on this layer, which could be only - * the first part of [sector_num, nb_sectors]. */ -nb_sectors = MIN(nb_sectors, *pnum); +/* [offset, pnum] unallocated on this layer, which could be only + * the first part of [offset, bytes]. */ +bytes = MIN(bytes, *pnum); } return ret; } /* Coroutine wrapper for bdrv_get_block_status_above() */ -static void coroutine_fn bdrv_get_block_status_above_co_entry(void *opaque) +static void coroutine_fn bdrv_block_status_above_co_entry(void *opaque) { BdrvCoBlockStatusData *data = opaque; -int n; -data->ret = bdrv_co_get_block_status_above(data->bs, data->base, - data->offset >> BDRV_SECTOR_BITS, - data->bytes >> BDRV_SECTOR_BITS, - , - data->file); -*data->pnum = n * BDRV_SECTOR_SIZE; +data->ret = bdrv_co_block_status_above(data->bs, data->base, + data->offset, data->bytes, + data->pnum, data->file); data->done = true; } /* - * Synchronous wrapper around bdrv_co_get_block_status_above(). + * Synchronous wrapper around bdrv_co_block_status_above(). * - * See bdrv_co_get_block_status_above() for details. + * See bdrv_co_block_status_above() for details. */ int64_t bdrv_get_block_status_above(BlockDriverState *bs, BlockDriverState *base, @@ -1887,10 +1878,9 @@ int64_t bdrv_get_block_status_above(BlockDriverState *bs, if (qemu_in_coroutine()) { /* Fast-path if already in coroutine context */ -bdrv_get_block_status_above_co_entry(); +bdrv_block_status_above_co_entry(); } else { -co = qemu_coroutine_create(bdrv_get_block_status_above_co_entry, - ); +co = qemu_coroutine_create(bdrv_block_status_above_co_entry, ); bdrv_coroutine_enter(bs, co); BDRV_POLL_WHILE(bs, !data.done); } -- 2.9.3
[Qemu-block] [PATCH 13/31] file-posix: Switch to .bdrv_co_block_status()
We are gradually moving away from sector-based interfaces, towards byte-based. Update the file protocol driver accordingly. Signed-off-by: Eric Blake--- block/file-posix.c | 47 +++ 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/block/file-posix.c b/block/file-posix.c index 1941fb6..690bd45 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -1832,22 +1832,22 @@ static int find_allocation(BlockDriverState *bs, off_t start, /* * Returns the allocation status of the specified sectors. * - * If 'sector_num' is beyond the end of the disk image the return value is 0 + * If 'offset' is beyond the end of the disk image the return value is 0 * and 'pnum' is set to 0. * - * 'pnum' is set to the number of sectors (including and immediately following - * the specified sector) that are known to be in the same + * 'pnum' is set to the number of bytes (including and immediately following + * the specified offset) that are known to be in the same * allocated/unallocated state. * - * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes + * 'bytes' is the max value 'pnum' should be set to. If bytes goes * beyond the end of the disk image it will be clamped. */ -static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, -int64_t sector_num, -int nb_sectors, int *pnum, -BlockDriverState **file) +static int64_t coroutine_fn raw_co_block_status(BlockDriverState *bs, +int64_t offset, +int64_t bytes, int64_t *pnum, +BlockDriverState **file) { -off_t start, data = 0, hole = 0; +off_t data = 0, hole = 0; int64_t total_size; int ret; @@ -1856,39 +1856,38 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, return ret; } -start = sector_num * BDRV_SECTOR_SIZE; total_size = bdrv_getlength(bs); if (total_size < 0) { return total_size; -} else if (start >= total_size) { +} else if (offset >= total_size) { *pnum = 0; return 0; -} else if (start + nb_sectors * BDRV_SECTOR_SIZE > total_size) { -nb_sectors = DIV_ROUND_UP(total_size - start, BDRV_SECTOR_SIZE); +} else if (offset + bytes > total_size) { +bytes = total_size - offset; } -ret = find_allocation(bs, start, , ); +ret = find_allocation(bs, offset, , ); if (ret == -ENXIO) { /* Trailing hole */ -*pnum = nb_sectors; +*pnum = bytes; ret = BDRV_BLOCK_ZERO; } else if (ret < 0) { /* No info available, so pretend there are no holes */ -*pnum = nb_sectors; +*pnum = bytes; ret = BDRV_BLOCK_DATA; -} else if (data == start) { -/* On a data extent, compute sectors to the end of the extent, +} else if (data == offset) { +/* On a data extent, compute bytes to the end of the extent, * possibly including a partial sector at EOF. */ -*pnum = MIN(nb_sectors, DIV_ROUND_UP(hole - start, BDRV_SECTOR_SIZE)); +*pnum = MIN(bytes, hole - offset); ret = BDRV_BLOCK_DATA; } else { -/* On a hole, compute sectors to the beginning of the next extent. */ -assert(hole == start); -*pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE); +/* On a hole, compute bytes to the beginning of the next extent. */ +assert(hole == offset); +*pnum = MIN(bytes, data - offset); ret = BDRV_BLOCK_ZERO; } *file = bs; -return ret | BDRV_BLOCK_OFFSET_VALID | start; +return ret | BDRV_BLOCK_OFFSET_VALID | (offset & BDRV_BLOCK_OFFSET_MASK); } static coroutine_fn BlockAIOCB *raw_aio_pdiscard(BlockDriverState *bs, @@ -1963,7 +1962,7 @@ BlockDriver bdrv_file = { .bdrv_close = raw_close, .bdrv_create = raw_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, -.bdrv_co_get_block_status = raw_co_get_block_status, +.bdrv_co_block_status = raw_co_block_status, .bdrv_co_pwrite_zeroes = raw_co_pwrite_zeroes, .bdrv_co_preadv = raw_co_preadv, -- 2.9.3
[Qemu-block] [PATCH 12/31] commit: Switch to .bdrv_co_block_status()
We are gradually moving away from sector-based interfaces, towards byte-based. Update the commit driver accordingly. Signed-off-by: Eric Blake--- block/commit.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/block/commit.c b/block/commit.c index 989de7d..1cc7a00 100644 --- a/block/commit.c +++ b/block/commit.c @@ -228,14 +228,14 @@ static int coroutine_fn bdrv_commit_top_preadv(BlockDriverState *bs, return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags); } -static int64_t coroutine_fn bdrv_commit_top_get_block_status( -BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum, +static int64_t coroutine_fn bdrv_commit_top_block_status( +BlockDriverState *bs, int64_t offset, int64_t bytes, int64_t *pnum, BlockDriverState **file) { -*pnum = nb_sectors; +*pnum = bytes; *file = bs->backing->bs; return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA | - (sector_num << BDRV_SECTOR_BITS); + (offset & BDRV_BLOCK_OFFSET_MASK); } static void bdrv_commit_top_refresh_filename(BlockDriverState *bs, QDict *opts) @@ -263,7 +263,7 @@ static void bdrv_commit_top_child_perm(BlockDriverState *bs, BdrvChild *c, static BlockDriver bdrv_commit_top = { .format_name= "commit_top", .bdrv_co_preadv = bdrv_commit_top_preadv, -.bdrv_co_get_block_status = bdrv_commit_top_get_block_status, +.bdrv_co_block_status = bdrv_commit_top_block_status, .bdrv_refresh_filename = bdrv_commit_top_refresh_filename, .bdrv_close = bdrv_commit_top_close, .bdrv_child_perm= bdrv_commit_top_child_perm, -- 2.9.3
[Qemu-block] [PATCH 04/31] block: Switch bdrv_make_zero() to byte-based
We are gradually converting to byte-based interfaces, as they are easier to reason about than sector-based. Change the internal loop iteration of zeroing a device to track by bytes instead of sectors (although we are still guaranteed that we iterate by steps that are sector-aligned). Signed-off-by: Eric Blake--- block/io.c | 32 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/block/io.c b/block/io.c index 07165dc..1f8ae81 100644 --- a/block/io.c +++ b/block/io.c @@ -666,39 +666,39 @@ int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, */ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags) { -int64_t target_sectors, ret, nb_sectors, sector_num = 0; +int64_t target_size, ret, bytes, offset = 0; BlockDriverState *bs = child->bs; BlockDriverState *file; -int n; +int n; /* sectors */ -target_sectors = bdrv_nb_sectors(bs); -if (target_sectors < 0) { -return target_sectors; +target_size = bdrv_getlength(bs); +if (target_size < 0) { +return target_size; } for (;;) { -nb_sectors = MIN(target_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS); -if (nb_sectors <= 0) { +bytes = MIN(target_size - offset, BDRV_REQUEST_MAX_BYTES); +if (bytes <= 0) { return 0; } -ret = bdrv_get_block_status(bs, sector_num, nb_sectors, , ); +ret = bdrv_get_block_status(bs, offset >> BDRV_SECTOR_BITS, +bytes >> BDRV_SECTOR_BITS, , ); if (ret < 0) { -error_report("error getting block status at sector %" PRId64 ": %s", - sector_num, strerror(-ret)); +error_report("error getting block status at offset %" PRId64 ": %s", + offset, strerror(-ret)); return ret; } if (ret & BDRV_BLOCK_ZERO) { -sector_num += n; +offset += n * BDRV_SECTOR_BITS; continue; } -ret = bdrv_pwrite_zeroes(child, sector_num << BDRV_SECTOR_BITS, - n << BDRV_SECTOR_BITS, flags); +ret = bdrv_pwrite_zeroes(child, offset, n * BDRV_SECTOR_SIZE, flags); if (ret < 0) { -error_report("error writing zeroes at sector %" PRId64 ": %s", - sector_num, strerror(-ret)); +error_report("error writing zeroes at offset %" PRId64 ": %s", + offset, strerror(-ret)); return ret; } -sector_num += n; +offset += n * BDRV_SECTOR_SIZE; } } -- 2.9.3
[Qemu-block] [PATCH 08/31] block: Switch BdrvCoGetBlockStatusData to byte-based
We are gradually converting to byte-based interfaces, as they are easier to reason about than sector-based. Convert another internal type (no semantic change), and rename it to match the corresponding public function rename. Signed-off-by: Eric Blake--- block/io.c | 30 +- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/block/io.c b/block/io.c index e804bdd..f7ece8d 100644 --- a/block/io.c +++ b/block/io.c @@ -1680,16 +1680,16 @@ int bdrv_flush_all(void) } -typedef struct BdrvCoGetBlockStatusData { +typedef struct BdrvCoBlockStatusData { BlockDriverState *bs; BlockDriverState *base; BlockDriverState **file; -int64_t sector_num; -int nb_sectors; -int *pnum; +int64_t offset; +int64_t bytes; +int64_t *pnum; int64_t ret; bool done; -} BdrvCoGetBlockStatusData; +} BdrvCoBlockStatusData; /* * Returns the allocation status of the specified sectors. @@ -1850,13 +1850,15 @@ static int64_t coroutine_fn bdrv_co_get_block_status_above(BlockDriverState *bs, /* Coroutine wrapper for bdrv_get_block_status_above() */ static void coroutine_fn bdrv_get_block_status_above_co_entry(void *opaque) { -BdrvCoGetBlockStatusData *data = opaque; +BdrvCoBlockStatusData *data = opaque; +int n; data->ret = bdrv_co_get_block_status_above(data->bs, data->base, - data->sector_num, - data->nb_sectors, - data->pnum, + data->offset >> BDRV_SECTOR_BITS, + data->bytes >> BDRV_SECTOR_BITS, + , data->file); +*data->pnum = n * BDRV_SECTOR_SIZE; data->done = true; } @@ -1872,13 +1874,14 @@ int64_t bdrv_get_block_status_above(BlockDriverState *bs, BlockDriverState **file) { Coroutine *co; -BdrvCoGetBlockStatusData data = { +int64_t n; +BdrvCoBlockStatusData data = { .bs = bs, .base = base, .file = file, -.sector_num = sector_num, -.nb_sectors = nb_sectors, -.pnum = pnum, +.offset = sector_num * BDRV_SECTOR_SIZE, +.bytes = nb_sectors * BDRV_SECTOR_SIZE, +.pnum = , .done = false, }; @@ -1891,6 +1894,7 @@ int64_t bdrv_get_block_status_above(BlockDriverState *bs, bdrv_coroutine_enter(bs, co); BDRV_POLL_WHILE(bs, !data.done); } +*pnum = n >> BDRV_SECTOR_BITS; return data.ret; } -- 2.9.3
[Qemu-block] [PATCH 06/31] block: Convert bdrv_get_block_status() to bytes
We are gradually moving away from sector-based interfaces, towards byte-based. In the common case, allocation is unlikely to ever use values that are not naturally sector-aligned, but it is possible that byte-based values will let us be more precise about allocation at the end of an unaligned file that can do byte-based access. Changing the name of the function from bdrv_get_block_status() to bdrv_block_status() ensures that the compiler enforces that all callers are updated. For now, the io.c layer still assert()s that all callers are sector-aligned, but that can be relaxed when a later patch implements byte-based block status in the drivers. Note that we have an inherent limitation in the BDRV_BLOCK_* return values: BDRV_BLOCK_OFFSET_VALID can only return the start of a sector, even if we later relax the interface to query for the status starting at an intermediate byte; document the obvious interpretation that valid offsets are always sector-relative. Therefore, for the most part this patch is just the addition of scaling at the callers followed by inverse scaling at bdrv_block_status(). But some code, particularly bdrv_is_allocated(), gets a lot simpler because it no longer has to mess with sectors; also, it is now possible to pass NULL if the caller does not care how much of the image is allocated beyond the initial offset. For ease of review, bdrv_get_block_status_above() will be tackled separately. Signed-off-by: Eric Blake--- include/block/block.h | 15 +-- block/io.c| 51 ++- block/qcow2-cluster.c | 2 +- qemu-img.c| 19 ++- 4 files changed, 46 insertions(+), 41 deletions(-) diff --git a/include/block/block.h b/include/block/block.h index eed1330..b9e7281 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -121,10 +121,10 @@ typedef struct HDGeometry { /* * Allocation status flags - * BDRV_BLOCK_DATA: data is read from a file returned by bdrv_get_block_status. + * BDRV_BLOCK_DATA: data is read from a file returned by bdrv_block_status. * BDRV_BLOCK_ZERO: sectors read as zero * BDRV_BLOCK_OFFSET_VALID: sector stored as raw data in a file returned by - * bdrv_get_block_status. + * bdrv_block_status. * BDRV_BLOCK_ALLOCATED: the content of the block is determined by this * layer (as opposed to the backing file) * BDRV_BLOCK_RAW: used internally to indicate that the request @@ -132,7 +132,10 @@ typedef struct HDGeometry { * should look in bs->file directly. * * If BDRV_BLOCK_OFFSET_VALID is set, bits 9-62 represent the offset in - * bs->file where sector data can be read from as raw data. + * bs->file that begins the sector containing the address in question, + * where the sector can be read from as raw data with individual bytes + * at the same sector-relative locations (and thus, this bit cannot be + * set for mappings which are not equivalent modulo 512). * * DATA == 0 && ZERO == 0 means that data is read from backing_hd if present. * @@ -414,9 +417,9 @@ int bdrv_has_zero_init_1(BlockDriverState *bs); int bdrv_has_zero_init(BlockDriverState *bs); bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs); bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs); -int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, int *pnum, - BlockDriverState **file); +int64_t bdrv_block_status(BlockDriverState *bs, int64_t offset, + int64_t bytes, int64_t *pnum, + BlockDriverState **file); int64_t bdrv_get_block_status_above(BlockDriverState *bs, BlockDriverState *base, int64_t sector_num, diff --git a/block/io.c b/block/io.c index 1f8ae81..5cdb1f0 100644 --- a/block/io.c +++ b/block/io.c @@ -669,7 +669,6 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags) int64_t target_size, ret, bytes, offset = 0; BlockDriverState *bs = child->bs; BlockDriverState *file; -int n; /* sectors */ target_size = bdrv_getlength(bs); if (target_size < 0) { @@ -681,24 +680,23 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags) if (bytes <= 0) { return 0; } -ret = bdrv_get_block_status(bs, offset >> BDRV_SECTOR_BITS, -bytes >> BDRV_SECTOR_BITS, , ); +ret = bdrv_block_status(bs, offset, bytes, , ); if (ret < 0) { error_report("error getting block status at offset %" PRId64 ": %s", offset, strerror(-ret)); return ret; } if (ret & BDRV_BLOCK_ZERO) { -offset += n * BDRV_SECTOR_BITS; +offset += bytes; continue;
[Qemu-block] [PATCH 07/31] block: Switch bdrv_co_get_block_status() to byte-based
We are gradually converting to byte-based interfaces, as they are easier to reason about than sector-based. Convert another internal function (no semantic change), and as with its public counterpart, rename to bdrv_co_block_status() to make the compiler enforce that we catch all uses. For now, we assert that callers still pass aligned data, but ultimately, this will be the function where we hand off to a byte-based driver callback, and will eventually need to add logic to ensure we round calls according to the driver's request_alignment then touch up the result handed back to the caller, if we start permitting a caller to pass unaligned offsets. While at it, adjust the function to accept NULL for pnum or file, while still guaranteeing the driver callback has a non-NULL pointer to write into. Signed-off-by: Eric Blake--- block/io.c | 88 +- 1 file changed, 53 insertions(+), 35 deletions(-) diff --git a/block/io.c b/block/io.c index 5cdb1f0..e804bdd 100644 --- a/block/io.c +++ b/block/io.c @@ -1696,69 +1696,83 @@ typedef struct BdrvCoGetBlockStatusData { * Drivers not implementing the functionality are assumed to not support * backing files, hence all their sectors are reported as allocated. * - * If 'sector_num' is beyond the end of the disk image the return value is 0 + * If 'offset' is beyond the end of the disk image the return value is 0 * and 'pnum' is set to 0. * - * 'pnum' is set to the number of sectors (including and immediately following - * the specified sector) that are known to be in the same - * allocated/unallocated state. + * 'pnum' is set to the number of bytes (including and immediately following + * the specified offset) that are known to be in the same + * allocated/unallocated state. It may be NULL. * - * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes + * 'bytes' is the max value 'pnum' should be set to. If bytes goes * beyond the end of the disk image it will be clamped. * - * If returned value is positive and BDRV_BLOCK_OFFSET_VALID bit is set, 'file' - * points to the BDS which the sector range is allocated in. + * If returned value is positive and BDRV_BLOCK_OFFSET_VALID bit is set, and + * 'file' is not NULL, then *file points to the BDS which owns the allocated + * sector that contains offset. */ -static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, - int64_t sector_num, - int nb_sectors, int *pnum, - BlockDriverState **file) +static int64_t coroutine_fn bdrv_co_block_status(BlockDriverState *bs, + int64_t offset, int64_t bytes, + int64_t *pnum, + BlockDriverState **file) { -int64_t total_sectors; -int64_t n; +int64_t total_size; +int64_t n; /* bytes */ int64_t ret, ret2; +int count; /* sectors */ +BlockDriverState *tmp_file; -total_sectors = bdrv_nb_sectors(bs); -if (total_sectors < 0) { -return total_sectors; +total_size = bdrv_getlength(bs); +if (total_size < 0) { +return total_size; } -if (sector_num >= total_sectors) { +if (!pnum) { +pnum = +} +if (offset >= total_size) { *pnum = 0; return 0; } -n = total_sectors - sector_num; -if (n < nb_sectors) { -nb_sectors = n; +n = total_size - offset; +if (n < bytes) { +bytes = n; } if (!bs->drv->bdrv_co_get_block_status) { -*pnum = nb_sectors; +*pnum = bytes; ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED; if (bs->drv->protocol_name) { -ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE); +ret |= BDRV_BLOCK_OFFSET_VALID | (offset & BDRV_BLOCK_OFFSET_MASK); } return ret; } +if (!file) { +file = _file; +} *file = NULL; bdrv_inc_in_flight(bs); -ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum, +/* TODO: Rather than require aligned offsets, we could instead + * round to the driver's request_alignment here, then touch up + * count afterwards back to the caller's expectations. But first + * we want to switch the driver callback to likewise be + * byte-based. */ +assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)); +ret = bs->drv->bdrv_co_get_block_status(bs, offset >> BDRV_SECTOR_BITS, +bytes >> BDRV_SECTOR_BITS, , file); if (ret < 0) { *pnum = 0; goto out; } +*pnum = count * BDRV_SECTOR_SIZE; if (ret & BDRV_BLOCK_RAW) { -
[Qemu-block] [PATCH 01/31] block: Drop unused bdrv_round_sectors_to_clusters()
Now that the last user [mirror_iteration()] has converted to using bytes, we no longer need a function to round sectors to clusters. Signed-off-by: Eric Blake--- include/block/block.h | 4 block/io.c| 21 - 2 files changed, 25 deletions(-) diff --git a/include/block/block.h b/include/block/block.h index 740cb86..86ad511 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -467,10 +467,6 @@ const char *bdrv_get_device_or_node_name(const BlockDriverState *bs); int bdrv_get_flags(BlockDriverState *bs); int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi); ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs); -void bdrv_round_sectors_to_clusters(BlockDriverState *bs, -int64_t sector_num, int nb_sectors, -int64_t *cluster_sector_num, -int *cluster_nb_sectors); void bdrv_round_to_clusters(BlockDriverState *bs, int64_t offset, unsigned int bytes, int64_t *cluster_offset, diff --git a/block/io.c b/block/io.c index d22d35f..d61a906 100644 --- a/block/io.c +++ b/block/io.c @@ -419,27 +419,6 @@ static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align) } /** - * Round a region to cluster boundaries (sector-based) - */ -void bdrv_round_sectors_to_clusters(BlockDriverState *bs, -int64_t sector_num, int nb_sectors, -int64_t *cluster_sector_num, -int *cluster_nb_sectors) -{ -BlockDriverInfo bdi; - -if (bdrv_get_info(bs, ) < 0 || bdi.cluster_size == 0) { -*cluster_sector_num = sector_num; -*cluster_nb_sectors = nb_sectors; -} else { -int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE; -*cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c); -*cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num + -nb_sectors, c); -} -} - -/** * Round a region to cluster boundaries */ void bdrv_round_to_clusters(BlockDriverState *bs, -- 2.9.3
[Qemu-block] [PATCH 00/31] make bdrv_get_block_status byte-based
There are patches floating around to add NBD_CMD_BLOCK_STATUS, but NBD wants to report status on byte granularity (even if the reporting will probably be naturally aligned to sectors or even much higher levels). I've therefore started the task of converting our block status code to report at a byte granularity rather than sectors. This is part three of that conversion: dirty-bitmap. Earlier parts have been (mostly) reviewed, for bdrv_is_allocated and dirty-bitmaps. Perhaps I could have split this in two; patches 1-10 vs. 11-31 make a nice division of labor. Available as a tag at: git fetch git://repo.or.cz/qemu/ericb.git nbd-byte-status-v1 It requires the following (v1 of bdrv_is_allocated, v1 of dirty-bitmap, v9 of blkdebug, and Max's block-next tree): https://lists.gnu.org/archive/html/qemu-devel/2017-04/msg01995.html https://lists.gnu.org/archive/html/qemu-devel/2017-04/msg01723.html https://lists.gnu.org/archive/html/qemu-devel/2017-04/msg01298.html https://lists.gnu.org/archive/html/qemu-devel/2017-04/msg02163.html The diffstat shows no net change in total lines - but I know that the new code has more lines of comments than the old ;) I still haven't felt like tackling the task of rewriting migration/block.c to use bytes (instead of sectors) everywhere - that might give another net win in lines of code and legibility, but I also know it would conflict with some of the refactoring work that Juan is currently posting for review. Eric Blake (31): block: Drop unused bdrv_round_sectors_to_clusters() block: Make bdrv_round_to_clusters() signature more useful qcow2: Switch is_zero_sectors() to byte-based block: Switch bdrv_make_zero() to byte-based qemu-img: Switch get_block_status() to byte-based block: Convert bdrv_get_block_status() to bytes block: Switch bdrv_co_get_block_status() to byte-based block: Switch BdrvCoGetBlockStatusData to byte-based block: Switch bdrv_co_get_block_status_above() to byte-based block: Convert bdrv_get_block_status_above() to bytes block: Add .bdrv_co_block_status() callback commit: Switch to .bdrv_co_block_status() file-posix: Switch to .bdrv_co_block_status() gluster: Switch to .bdrv_co_block_status() iscsi: Switch cluster_sectors to byte-based iscsi: Switch iscsi_allocmap_update() to byte-based iscsi: Switch to .bdrv_co_block_status() mirror: Switch to .bdrv_co_block_status() null: Switch to .bdrv_co_block_status() parallels: Switch to .bdrv_co_block_status() qcow: Switch to .bdrv_co_block_status() qcow2: Switch to .bdrv_co_block_status() qed: Switch to .bdrv_co_block_status() raw: Switch to .bdrv_co_block_status() sheepdog: Switch to .bdrv_co_block_status() vdi: Avoid bitrot of debugging code vdi: Switch to .bdrv_co_block_status() vmdk: Switch to .bdrv_co_block_status() vpc: Switch to .bdrv_co_block_status() vvfat: Switch to .bdrv_co_block_status() block: Drop unused .bdrv_co_get_block_status() include/block/block.h | 33 +++ include/block/block_int.h | 13 ++- block/commit.c| 10 +- block/file-posix.c| 47 + block/gluster.c | 47 + block/io.c| 243 ++ block/iscsi.c | 144 ++- block/mirror.c| 33 +++ block/null.c | 21 ++-- block/parallels.c | 15 +-- block/qcow.c | 22 +++-- block/qcow2-cluster.c | 2 +- block/qcow2.c | 51 +- block/qed.c | 22 ++--- block/raw-format.c| 16 +-- block/sheepdog.c | 23 +++-- block/vdi.c | 37 --- block/vmdk.c | 24 ++--- block/vpc.c | 31 +++--- block/vvfat.c | 12 +-- qemu-img.c| 70 ++--- block/trace-events| 2 +- 22 files changed, 459 insertions(+), 459 deletions(-) -- 2.9.3
[Qemu-block] [PATCH 03/31] qcow2: Switch is_zero_sectors() to byte-based
We are gradually converting to byte-based interfaces, as they are easier to reason about than sector-based. Convert another internal function (no semantic change), and rename it to is_zero_above() in the process. Signed-off-by: Eric Blake--- block/qcow2.c | 32 ++-- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index 4d34610..fe4ccf6 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2442,23 +2442,30 @@ finish: } -static bool is_zero_sectors(BlockDriverState *bs, int64_t start, -uint32_t count) +static bool is_zero_above(BlockDriverState *bs, int64_t offset, int64_t bytes) { int nr; BlockDriverState *file; int64_t res; +int64_t start; -if (start + count > bs->total_sectors) { -count = bs->total_sectors - start; +/* Widen to sector boundaries, then clamp to image length, before + * checking status of underlying sectors */ +start = QEMU_ALIGN_DOWN(offset, BDRV_SECTOR_SIZE); +bytes = QEMU_ALIGN_UP(offset + bytes, BDRV_SECTOR_SIZE) - start; + +if (start + bytes > bs->total_sectors * BDRV_SECTOR_SIZE) { +bytes = bs->total_sectors * BDRV_SECTOR_SIZE - offset; } -if (!count) { +if (!bytes) { return true; } -res = bdrv_get_block_status_above(bs, NULL, start, count, +res = bdrv_get_block_status_above(bs, NULL, start >> BDRV_SECTOR_BITS, + bytes >> BDRV_SECTOR_BITS, , ); -return res >= 0 && (res & BDRV_BLOCK_ZERO) && nr == count; +return res >= 0 && (res & BDRV_BLOCK_ZERO) && +nr * BDRV_SECTOR_SIZE == bytes; } static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs, @@ -2476,24 +2483,21 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs, } if (head || tail) { -int64_t cl_start = (offset - head) >> BDRV_SECTOR_BITS; uint64_t off; unsigned int nr; assert(head + count <= s->cluster_size); /* check whether remainder of cluster already reads as zero */ -if (!(is_zero_sectors(bs, cl_start, - DIV_ROUND_UP(head, BDRV_SECTOR_SIZE)) && - is_zero_sectors(bs, (offset + count) >> BDRV_SECTOR_BITS, - DIV_ROUND_UP(-tail & (s->cluster_size - 1), - BDRV_SECTOR_SIZE { +if (!(is_zero_above(bs, offset - head, head) && + is_zero_above(bs, offset + count, +tail ? s->cluster_size - tail : 0))) { return -ENOTSUP; } qemu_co_mutex_lock(>lock); /* We can have new write after previous check */ -offset = cl_start << BDRV_SECTOR_BITS; +offset = QEMU_ALIGN_DOWN(offset, s->cluster_size); count = s->cluster_size; nr = s->cluster_size; ret = qcow2_get_cluster_offset(bs, offset, , ); -- 2.9.3
[Qemu-block] [PATCH 05/31] qemu-img: Switch get_block_status() to byte-based
We are gradually converting to byte-based interfaces, as they are easier to reason about than sector-based. Continue by converting an internal function (no semantic change), and simplifying its caller accordingly. Signed-off-by: Eric Blake--- qemu-img.c | 24 +++- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/qemu-img.c b/qemu-img.c index d96b4d6..8cb5165 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -2662,14 +2662,16 @@ static void dump_map_entry(OutputFormat output_format, MapEntry *e, } } -static int get_block_status(BlockDriverState *bs, int64_t sector_num, -int nb_sectors, MapEntry *e) +static int get_block_status(BlockDriverState *bs, int64_t offset, +int64_t bytes, MapEntry *e) { int64_t ret; int depth; BlockDriverState *file; bool has_offset; +int nb_sectors = bytes >> BDRV_SECTOR_BITS; +assert(bytes < INT_MAX); /* As an optimization, we could cache the current range of unallocated * clusters in each file of the chain, and avoid querying the same * range repeatedly. @@ -2677,8 +2679,8 @@ static int get_block_status(BlockDriverState *bs, int64_t sector_num, depth = 0; for (;;) { -ret = bdrv_get_block_status(bs, sector_num, nb_sectors, _sectors, -); +ret = bdrv_get_block_status(bs, offset >> BDRV_SECTOR_BITS, nb_sectors, +_sectors, ); if (ret < 0) { return ret; } @@ -2698,7 +2700,7 @@ static int get_block_status(BlockDriverState *bs, int64_t sector_num, has_offset = !!(ret & BDRV_BLOCK_OFFSET_VALID); *e = (MapEntry) { -.start = sector_num * BDRV_SECTOR_SIZE, +.start = offset, .length = nb_sectors * BDRV_SECTOR_SIZE, .data = !!(ret & BDRV_BLOCK_DATA), .zero = !!(ret & BDRV_BLOCK_ZERO), @@ -2823,16 +2825,12 @@ static int img_map(int argc, char **argv) length = blk_getlength(blk); while (curr.start + curr.length < length) { -int64_t nsectors_left; -int64_t sector_num; -int n; - -sector_num = (curr.start + curr.length) >> BDRV_SECTOR_BITS; +int64_t offset = curr.start + curr.length; +int64_t n; /* Probe up to 1 GiB at a time. */ -nsectors_left = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE) - sector_num; -n = MIN(1 << (30 - BDRV_SECTOR_BITS), nsectors_left); -ret = get_block_status(bs, sector_num, n, ); +n = QEMU_ALIGN_DOWN(MIN(1 << 30, length - offset), BDRV_SECTOR_SIZE); +ret = get_block_status(bs, offset, n, ); if (ret < 0) { error_report("Could not read file metadata: %s", strerror(-ret)); -- 2.9.3
[Qemu-block] [PATCH 02/31] block: Make bdrv_round_to_clusters() signature more useful
In the process of converting sector-based interfaces to bytes, I'm finding it easier to represent a byte count as a 64-bit integer at the block layer (even if we are internally capped by SIZE_MAX or even INT_MAX for individual transactions, it's still nicer to not have to worry about truncation/overflow issues on as many variables). Update the signature of bdrv_round_to_clusters() to uniformly use uint64_t, matching the signature already chosen for bdrv_is_allocated, and adjust clients according to the required fallout. Signed-off-by: Eric Blake--- include/block/block.h | 4 ++-- block/io.c| 7 --- block/mirror.c| 9 - block/trace-events| 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/block/block.h b/include/block/block.h index 86ad511..eed1330 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -468,9 +468,9 @@ int bdrv_get_flags(BlockDriverState *bs); int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi); ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs); void bdrv_round_to_clusters(BlockDriverState *bs, -int64_t offset, unsigned int bytes, +int64_t offset, int64_t bytes, int64_t *cluster_offset, -unsigned int *cluster_bytes); +int64_t *cluster_bytes); const char *bdrv_get_encrypted_filename(BlockDriverState *bs); void bdrv_get_backing_filename(BlockDriverState *bs, diff --git a/block/io.c b/block/io.c index d61a906..07165dc 100644 --- a/block/io.c +++ b/block/io.c @@ -422,9 +422,9 @@ static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align) * Round a region to cluster boundaries */ void bdrv_round_to_clusters(BlockDriverState *bs, -int64_t offset, unsigned int bytes, +int64_t offset, int64_t bytes, int64_t *cluster_offset, -unsigned int *cluster_bytes) +int64_t *cluster_bytes) { BlockDriverInfo bdi; @@ -920,7 +920,7 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, struct iovec iov; QEMUIOVector bounce_qiov; int64_t cluster_offset; -unsigned int cluster_bytes; +int64_t cluster_bytes; size_t skip_bytes; int ret; @@ -941,6 +941,7 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, trace_bdrv_co_do_copy_on_readv(bs, offset, bytes, cluster_offset, cluster_bytes); +assert(cluster_bytes < SIZE_MAX); iov.iov_len = cluster_bytes; iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len); if (bounce_buffer == NULL) { diff --git a/block/mirror.c b/block/mirror.c index 846e392..2510793 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -177,7 +177,7 @@ static void mirror_read_complete(void *opaque, int ret) /* Clip bytes relative to offset to not exceed end-of-file */ static inline void mirror_clip_bytes(MirrorBlockJob *s, int64_t offset, - unsigned int *bytes) + int64_t *bytes) { *bytes = MIN(*bytes, s->bdev_length - offset); } @@ -190,10 +190,9 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset, bool need_cow; int ret = 0; int64_t align_offset = *offset; -unsigned int align_bytes = *bytes; +int64_t align_bytes = *bytes; int max_bytes = s->granularity * s->max_iov; -assert(*bytes < INT_MAX); need_cow = !test_bit(*offset / s->granularity, s->cow_bitmap); need_cow |= !test_bit((*offset + *bytes - 1) / s->granularity, s->cow_bitmap); @@ -384,7 +383,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) while (nb_chunks > 0 && offset < s->bdev_length) { int64_t ret; int io_sectors; -unsigned int io_bytes; +int64_t io_bytes; int64_t io_bytes_acct; BlockDriverState *file; enum MirrorMethod { @@ -410,7 +409,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) io_bytes = s->granularity; } else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) { int64_t target_offset; -unsigned int target_bytes; +int64_t target_bytes; bdrv_round_to_clusters(blk_bs(s->target), offset, io_bytes, _offset, _bytes); if (target_offset == offset && diff --git a/block/trace-events b/block/trace-events index 04f6463..2301a50 100644 --- a/block/trace-events +++ b/block/trace-events @@ -15,7 +15,7 @@ bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs bdrv_co_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p
Re: [Qemu-block] [Qemu-devel] [PULL 2/8] replication: clarify permissions
On 03/17/2017 08:15 AM, Kevin Wolf wrote: > From: Changlong Xie> > Even if hidden_disk, secondary_disk are backing files, they all need > write permissions in replication scenario. Otherwise we will encouter > below exceptions on secondary side during adding nbd server: > > {'execute': 'nbd-server-add', 'arguments': {'device': 'colo-disk', > 'writable': true } } > {"error": {"class": "GenericError", "desc": "Conflicts with use by > hidden-qcow2-driver as 'backing', which does not allow 'write' on > sec-qcow2-driver-for-nbd"}} > > CC: Zhang Hailiang > CC: Zhang Chen > CC: Wen Congyang This address for Wen Congyang is different than the one listed in MAINTAINERS for replication (M: Wen Congyang ), and different still from addresses my mailer has harvested from other posts (wencongy...@gmail.com). The MAINTAINERS entry is now resulting in 'undelivered mail' bounce messages, can you please submit an update to MAINTAINERS with your new preferred address? [or gently correct me if I'm confusing two people with the same name?] -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3266 Virtualization: qemu.org | libvirt.org signature.asc Description: OpenPGP digital signature
Re: [Qemu-block] [PATCH 00/17] make bdrv_is_allocated[_above] byte-based
On 04/17/2017 06:42 PM, John Snow wrote: > > > On 04/11/2017 06:29 PM, Eric Blake wrote: >> There are patches floating around to add NBD_CMD_BLOCK_STATUS, >> but NBD wants to report status on byte granularity (even if the >> reporting will probably be naturally aligned to sectors or even >> much higher levels). I've therefore started the task of >> converting our block status code to report at a byte granularity >> rather than sectors. >> >> This is part one of that conversion: bdrv_is_allocated(). >> Other parts (still to be written) include tracking dirty bitmaps >> by bytes (it's still one bit per granularity, but now we won't >> be double-scaling from bytes to sectors to granularity), That series is not only written, but reviewed now: https://lists.gnu.org/archive/html/qemu-devel/2017-04/msg02163.html >> then >> replacing bdrv_get_block_status() with a byte based callback >> in all the drivers. Coming up soon. >> >> Available as a tag at: >> git fetch git://repo.or.cz/qemu/ericb.git nbd-byte-allocated-v1 >> >> It requires v9 or later of my prior work on blkdebug: >> https://lists.gnu.org/archive/html/qemu-devel/2017-04/msg01723.html >> which in turn requires Max's block-next tree: >> https://lists.gnu.org/archive/html/qemu-devel/2017-04/msg01298.html >> >> include/block/block.h| 6 +- >> include/block/block_backup.h | 11 +- >> include/qemu/ratelimit.h | 3 +- >> block/backup.c | 126 -- >> block/commit.c | 54 >> block/io.c | 59 + >> block/mirror.c | 300 >> ++- >> block/replication.c | 29 +++-- >> block/stream.c | 35 +++-- >> block/vvfat.c| 34 +++-- >> migration/block.c| 9 +- >> qemu-img.c | 15 ++- >> qemu-io-cmds.c | 57 >> block/trace-events | 14 +- >> 14 files changed, 381 insertions(+), 371 deletions(-) > > Shame, you added ten lines! Yeah, some of that back-and-forth scaling is verbose and resulted in line breaks that used to fit in one. Of course, the second series regained those ten lines and more, once I got to flatten some of the interfaces away from repeated scaling: $ git diff --stat nbd-byte-allocated-v1..nbd-byte-dirty-v1 | cat include/block/block_int.h| 2 +- include/block/dirty-bitmap.h | 21 --- block/backup.c | 7 ++-- block/dirty-bitmap.c | 83 block/io.c | 6 ++-- block/mirror.c | 73 +- migration/block.c| 14 7 files changed, 74 insertions(+), 132 deletions(-) > >> > > Patches 1-15: > > Reviewed-by: John Snow> > 9: Is there a good reason for a void fn() to return its argument via a > passed parameter? I see you're matching the other interface, but that > strikes me as wonky. It bothered me a bit too; beyond the copy-and-paste factor, my justification was that the parameter is both input and output, rather than output-only. But I don't mind respinning to add a preliminary patch that fixes mirror_clip_sectors() to return a value, then pattern mirror_clip_bytes to do likewise. > > 11: Looks correct to me, but this one's a bit hairier than the rest. How > many times do we truly need to round, adjust, clip, round again, align, > clip, round, align, ... I don't know that there are that many rounds of clipping going on, but there is definitely a lot of scaling, and it gets better as later patches make even more things be byte-based. > > I'll take a peek at the last two tomorrow. Thanks for plodding through it so far. For supposedly being no semantic change, it is still definitely a lot to think about. But the end result is more legible, in my opinion. -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3266 Virtualization: qemu.org | libvirt.org signature.asc Description: OpenPGP digital signature
Re: [Qemu-block] [PATCH 13/17] backup: Switch block_backup.h to byte-based
On 04/17/2017 06:24 PM, John Snow wrote: > > > On 04/11/2017 06:29 PM, Eric Blake wrote: >> We are gradually converting to byte-based interfaces, as they are >> easier to reason about than sector-based. Continue by converting >> the public interface to backup jobs (no semantic change), including >> a change to CowRequest to track by bytes instead of cluster indices. >> >> Signed-off-by: Eric Blake>> --- >> >> -void backup_wait_for_overlapping_requests(BlockJob *job, int64_t sector_num, >> - int nb_sectors); >> +void backup_wait_for_overlapping_requests(BlockJob *job, int64_t offset, >> + uint64_t bytes); >> void backup_cow_request_begin(CowRequest *req, BlockJob *job, >> - int64_t sector_num, >> - int nb_sectors); >> + int64_t offset, uint64_t bytes); >> void backup_cow_request_end(CowRequest *req); > > Should we adjust the parameter names of cow_request_begin and > wait_for_overlapping_requests, too? Sure, I can do that. -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3266 Virtualization: qemu.org | libvirt.org signature.asc Description: OpenPGP digital signature
Re: [Qemu-block] [PATCH 00/17] make bdrv_is_allocated[_above] byte-based
On 04/11/2017 06:29 PM, Eric Blake wrote: > There are patches floating around to add NBD_CMD_BLOCK_STATUS, > but NBD wants to report status on byte granularity (even if the > reporting will probably be naturally aligned to sectors or even > much higher levels). I've therefore started the task of > converting our block status code to report at a byte granularity > rather than sectors. > > This is part one of that conversion: bdrv_is_allocated(). > Other parts (still to be written) include tracking dirty bitmaps > by bytes (it's still one bit per granularity, but now we won't > be double-scaling from bytes to sectors to granularity), then > replacing bdrv_get_block_status() with a byte based callback > in all the drivers. > > Available as a tag at: > git fetch git://repo.or.cz/qemu/ericb.git nbd-byte-allocated-v1 > > It requires v9 or later of my prior work on blkdebug: > https://lists.gnu.org/archive/html/qemu-devel/2017-04/msg01723.html > which in turn requires Max's block-next tree: > https://lists.gnu.org/archive/html/qemu-devel/2017-04/msg01298.html > > Eric Blake (17): > blockjob: Track job ratelimits via bytes, not sectors > trace: Show blockjob actions via bytes, not sectors > stream: Switch stream_populate() to byte-based > stream: Switch stream_run() to byte-based > commit: Switch commit_populate() to byte-based > commit: Switch commit_run() to byte-based > mirror: Switch MirrorBlockJob to byte-based > mirror: Switch mirror_do_zero_or_discard() to byte-based > mirror: Switch mirror_cow_align() to byte-based > mirror: Switch mirror_do_read() to byte-based > mirror: Switch mirror_iteration() to byte-based > backup: Switch BackupBlockJob to byte-based > backup: Switch block_backup.h to byte-based > backup: Switch backup_do_cow() to byte-based > backup: Switch backup_run() to byte-based > block: Make bdrv_is_allocated() byte-based > block: Make bdrv_is_allocated_above() byte-based > > include/block/block.h| 6 +- > include/block/block_backup.h | 11 +- > include/qemu/ratelimit.h | 3 +- > block/backup.c | 126 -- > block/commit.c | 54 > block/io.c | 59 + > block/mirror.c | 300 > ++- > block/replication.c | 29 +++-- > block/stream.c | 35 +++-- > block/vvfat.c| 34 +++-- > migration/block.c| 9 +- > qemu-img.c | 15 ++- > qemu-io-cmds.c | 57 > block/trace-events | 14 +- > 14 files changed, 381 insertions(+), 371 deletions(-) Shame, you added ten lines! > Patches 1-15: Reviewed-by: John Snow9: Is there a good reason for a void fn() to return its argument via a passed parameter? I see you're matching the other interface, but that strikes me as wonky. 11: Looks correct to me, but this one's a bit hairier than the rest. How many times do we truly need to round, adjust, clip, round again, align, clip, round, align, ... I'll take a peek at the last two tomorrow. --js
Re: [Qemu-block] [PATCH 13/17] backup: Switch block_backup.h to byte-based
On 04/11/2017 06:29 PM, Eric Blake wrote: > We are gradually converting to byte-based interfaces, as they are > easier to reason about than sector-based. Continue by converting > the public interface to backup jobs (no semantic change), including > a change to CowRequest to track by bytes instead of cluster indices. > > Signed-off-by: Eric Blake> --- > include/block/block_backup.h | 11 +-- > block/backup.c | 29 ++--- > block/replication.c | 12 > 3 files changed, 27 insertions(+), 25 deletions(-) > > diff --git a/include/block/block_backup.h b/include/block/block_backup.h > index 8a75947..994a3bd 100644 > --- a/include/block/block_backup.h > +++ b/include/block/block_backup.h > @@ -21,17 +21,16 @@ > #include "block/block_int.h" > > typedef struct CowRequest { > -int64_t start; > -int64_t end; > +int64_t start_byte; > +int64_t end_byte; > QLIST_ENTRY(CowRequest) list; > CoQueue wait_queue; /* coroutines blocked on this request */ > } CowRequest; > > -void backup_wait_for_overlapping_requests(BlockJob *job, int64_t sector_num, > - int nb_sectors); > +void backup_wait_for_overlapping_requests(BlockJob *job, int64_t offset, > + uint64_t bytes); > void backup_cow_request_begin(CowRequest *req, BlockJob *job, > - int64_t sector_num, > - int nb_sectors); > + int64_t offset, uint64_t bytes); > void backup_cow_request_end(CowRequest *req); Should we adjust the parameter names of cow_request_begin and wait_for_overlapping_requests, too? > > void backup_do_checkpoint(BlockJob *job, Error **errp); > diff --git a/block/backup.c b/block/backup.c > index a64a162..0502c1a 100644 > --- a/block/backup.c > +++ b/block/backup.c > @@ -64,7 +64,7 @@ static void coroutine_fn > wait_for_overlapping_requests(BackupBlockJob *job, > do { > retry = false; > QLIST_FOREACH(req, >inflight_reqs, list) { > -if (end > req->start && start < req->end) { > +if (end > req->start_byte && start < req->end_byte) { > qemu_co_queue_wait(>wait_queue, NULL); > retry = true; > break; > @@ -77,8 +77,8 @@ static void coroutine_fn > wait_for_overlapping_requests(BackupBlockJob *job, > static void cow_request_begin(CowRequest *req, BackupBlockJob *job, > int64_t start, int64_t end) > { > -req->start = start; > -req->end = end; > +req->start_byte = start; > +req->end_byte = end; > qemu_co_queue_init(>wait_queue); > QLIST_INSERT_HEAD(>inflight_reqs, req, list); > } > @@ -114,8 +114,10 @@ static int coroutine_fn backup_do_cow(BackupBlockJob > *job, >sector_num * BDRV_SECTOR_SIZE, >nb_sectors * BDRV_SECTOR_SIZE); > > -wait_for_overlapping_requests(job, start, end); > -cow_request_begin(_request, job, start, end); > +wait_for_overlapping_requests(job, start * job->cluster_size, > + end * job->cluster_size); > +cow_request_begin(_request, job, start * job->cluster_size, > + end * job->cluster_size); > > for (; start < end; start++) { > if (test_bit(start, job->done_bitmap)) { > @@ -277,32 +279,29 @@ void backup_do_checkpoint(BlockJob *job, Error **errp) > bitmap_zero(backup_job->done_bitmap, len); > } > > -void backup_wait_for_overlapping_requests(BlockJob *job, int64_t sector_num, > - int nb_sectors) > +void backup_wait_for_overlapping_requests(BlockJob *job, int64_t offset, > + uint64_t bytes) > { > BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common); > -int64_t sectors_per_cluster = cluster_size_sectors(backup_job); > int64_t start, end; > > assert(job->driver->job_type == BLOCK_JOB_TYPE_BACKUP); > > -start = sector_num / sectors_per_cluster; > -end = DIV_ROUND_UP(sector_num + nb_sectors, sectors_per_cluster); > +start = QEMU_ALIGN_DOWN(offset, backup_job->cluster_size); > +end = QEMU_ALIGN_UP(offset + bytes, backup_job->cluster_size); > wait_for_overlapping_requests(backup_job, start, end); > } > > void backup_cow_request_begin(CowRequest *req, BlockJob *job, > - int64_t sector_num, > - int nb_sectors) > + int64_t offset, uint64_t bytes) > { > BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common); > -int64_t sectors_per_cluster = cluster_size_sectors(backup_job); > int64_t start, end; > > assert(job->driver->job_type == BLOCK_JOB_TYPE_BACKUP); > > -start = sector_num /
Re: [Qemu-block] [PATCH 02/17] trace: Show blockjob actions via bytes, not sectors
On 04/17/2017 02:18 PM, John Snow wrote: >> @@ -346,13 +349,15 @@ static uint64_t coroutine_fn >> mirror_iteration(MirrorBlockJob *s) >> if (sector_num < 0) { >> bdrv_set_dirty_iter(s->dbi, 0); >> sector_num = bdrv_dirty_iter_next(s->dbi); >> -trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap)); >> +trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap) * >> + BDRV_SECTOR_SIZE); > > mirror_restart_iter(void *s, int64_t cnt) "s %p dirty count %"PRId64 > > I guess the print doesn't really bother to say what the unit is, just a > "dirty count." > > Does this conflict with the bitmap series, though? (Won't need to scale > by BDRV_SECTOR_SIZE after that, I think.) > That series depends on this one going in first (as currently written), and indeed, in that series, the code DOES simplify to drop the '* BDRV_SECTOR_SIZE' in the patch that changes the return scale of bdrv_get_dirty_count(). >> backup_do_cow_skip(void *job, int64_t start) "job %p start %"PRId64 >> backup_do_cow_process(void *job, int64_t start) "job %p start %"PRId64 >> backup_do_cow_read_fail(void *job, int64_t start, int ret) "job %p start >> %"PRId64" ret %d" > > I guess these three were "cluster" based, but you didn't need to change > anything to assert them as byte-based. > >> > > Under the assumption that it's okay to change tracing output without > being able to differentiate between new and old output: I'll leave that to Stefan's discretion as trace maintainer, but my thoughts are that tracing is for debugging purposes, and as long as you know what binary you are debugging, you can figure out what the trace message meant by referring back to the source that generated that binary. > > Reviewed-by: John Snow> -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3266 Virtualization: qemu.org | libvirt.org signature.asc Description: OpenPGP digital signature
Re: [Qemu-block] [PATCH 01/17] blockjob: Track job ratelimits via bytes, not sectors
On 04/17/2017 02:18 PM, John Snow wrote: > > > On 04/11/2017 06:29 PM, Eric Blake wrote: >> The user interface specifies job rate limits in bytes/second. >> It's pointless to have our internal representation track things >> in sectors/second, particularly since we want to move away from >> sector-based interfaces. >> >> Fix up a doc typo found while verifying that the ratelimit >> code handles the scaling difference. >> >> Signed-off-by: Eric Blake>> --- >> +++ b/block/commit.c >> @@ -195,7 +195,8 @@ static void coroutine_fn commit_run(void *opaque) >> s->common.offset += n * BDRV_SECTOR_SIZE; >> >> if (copy && s->common.speed) { >> -delay_ns = ratelimit_calculate_delay(>limit, n); >> +delay_ns = ratelimit_calculate_delay(>limit, >> + n * BDRV_SECTOR_SIZE); > > You could probably factor out this calculation in conjunction with the > offset update above, but no matter. It gets simplified in a later patch, when I switch the entire function to track by bytes instead of sectors. >> +++ b/block/stream.c >> @@ -191,7 +191,8 @@ static void coroutine_fn stream_run(void *opaque) >> /* Publish progress */ >> s->common.offset += n * BDRV_SECTOR_SIZE; >> if (copy && s->common.speed) { >> -delay_ns = ratelimit_calculate_delay(>limit, n); >> +delay_ns = ratelimit_calculate_delay(>limit, >> + n * BDRV_SECTOR_SIZE); > > Same kind of comment here. And same response :) -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3266 Virtualization: qemu.org | libvirt.org signature.asc Description: OpenPGP digital signature
Re: [Qemu-block] [PATCH 01/17] blockjob: Track job ratelimits via bytes, not sectors
On 04/11/2017 06:29 PM, Eric Blake wrote: > The user interface specifies job rate limits in bytes/second. > It's pointless to have our internal representation track things > in sectors/second, particularly since we want to move away from > sector-based interfaces. > > Fix up a doc typo found while verifying that the ratelimit > code handles the scaling difference. > > Signed-off-by: Eric Blake> --- > include/qemu/ratelimit.h | 3 ++- > block/backup.c | 5 +++-- > block/commit.c | 5 +++-- > block/mirror.c | 13 +++-- > block/stream.c | 5 +++-- > 5 files changed, 18 insertions(+), 13 deletions(-) > > diff --git a/include/qemu/ratelimit.h b/include/qemu/ratelimit.h > index 8da1232..8dece48 100644 > --- a/include/qemu/ratelimit.h > +++ b/include/qemu/ratelimit.h > @@ -24,7 +24,8 @@ typedef struct { > > /** Calculate and return delay for next request in ns > * > - * Record that we sent @p n data units. If we may send more data units > + * Record that we sent @n data units (where @n matches the scale chosen > + * during ratelimit_set_speed). If we may send more data units > * in the current time slice, return 0 (i.e. no delay). Otherwise > * return the amount of time (in ns) until the start of the next time > * slice that will permit sending the next chunk of data. > diff --git a/block/backup.c b/block/backup.c > index a4fb288..bcfa321 100644 > --- a/block/backup.c > +++ b/block/backup.c > @@ -208,7 +208,7 @@ static void backup_set_speed(BlockJob *job, int64_t > speed, Error **errp) > error_setg(errp, QERR_INVALID_PARAMETER, "speed"); > return; > } > -ratelimit_set_speed(>limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME); > +ratelimit_set_speed(>limit, speed, SLICE_TIME); > } > > static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret) > @@ -359,7 +359,8 @@ static bool coroutine_fn yield_and_check(BackupBlockJob > *job) > */ > if (job->common.speed) { > uint64_t delay_ns = ratelimit_calculate_delay(>limit, > - job->sectors_read); > + job->sectors_read * > + BDRV_SECTOR_SIZE); > job->sectors_read = 0; > block_job_sleep_ns(>common, QEMU_CLOCK_REALTIME, delay_ns); > } else { > diff --git a/block/commit.c b/block/commit.c > index 76a0d98..8b684e0 100644 > --- a/block/commit.c > +++ b/block/commit.c > @@ -195,7 +195,8 @@ static void coroutine_fn commit_run(void *opaque) > s->common.offset += n * BDRV_SECTOR_SIZE; > > if (copy && s->common.speed) { > -delay_ns = ratelimit_calculate_delay(>limit, n); > +delay_ns = ratelimit_calculate_delay(>limit, > + n * BDRV_SECTOR_SIZE); You could probably factor out this calculation in conjunction with the offset update above, but no matter. > } > } > > @@ -217,7 +218,7 @@ static void commit_set_speed(BlockJob *job, int64_t > speed, Error **errp) > error_setg(errp, QERR_INVALID_PARAMETER, "speed"); > return; > } > -ratelimit_set_speed(>limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME); > +ratelimit_set_speed(>limit, speed, SLICE_TIME); > } > > static const BlockJobDriver commit_job_driver = { > diff --git a/block/mirror.c b/block/mirror.c > index 2173a2f..a7d0960 100644 > --- a/block/mirror.c > +++ b/block/mirror.c > @@ -391,7 +391,8 @@ static uint64_t coroutine_fn > mirror_iteration(MirrorBlockJob *s) > bitmap_set(s->in_flight_bitmap, sector_num / sectors_per_chunk, > nb_chunks); > while (nb_chunks > 0 && sector_num < end) { > int64_t ret; > -int io_sectors, io_sectors_acct; > +int io_sectors; > +int64_t io_bytes_acct; > BlockDriverState *file; > enum MirrorMethod { > MIRROR_METHOD_COPY, > @@ -439,16 +440,16 @@ static uint64_t coroutine_fn > mirror_iteration(MirrorBlockJob *s) > switch (mirror_method) { > case MIRROR_METHOD_COPY: > io_sectors = mirror_do_read(s, sector_num, io_sectors); > -io_sectors_acct = io_sectors; > +io_bytes_acct = io_sectors * BDRV_SECTOR_SIZE; > break; > case MIRROR_METHOD_ZERO: > case MIRROR_METHOD_DISCARD: > mirror_do_zero_or_discard(s, sector_num, io_sectors, >mirror_method == > MIRROR_METHOD_DISCARD); > if (write_zeroes_ok) { > -io_sectors_acct = 0; > +io_bytes_acct = 0; > } else { > -io_sectors_acct = io_sectors; > +io_bytes_acct = io_sectors * BDRV_SECTOR_SIZE; > } > break; > default: > @@ -458,7 +459,7 @@ static uint64_t coroutine_fn >
Re: [Qemu-block] [PATCH 02/17] trace: Show blockjob actions via bytes, not sectors
On 04/11/2017 06:29 PM, Eric Blake wrote: > Upcoming patches are going to switch to byte-based interfaces > instead of sector-based. Even worse, trace_backup_do_cow_enter() > had a weird mix of cluster and sector indices. Make the tracing > uniformly use bytes. > > Signed-off-by: Eric Blake> --- > block/backup.c | 16 ++-- > block/commit.c | 3 ++- > block/mirror.c | 26 +- > block/stream.c | 3 ++- > block/trace-events | 14 +++--- > 5 files changed, 38 insertions(+), 24 deletions(-) > > diff --git a/block/backup.c b/block/backup.c > index bcfa321..b28df8c 100644 > --- a/block/backup.c > +++ b/block/backup.c > @@ -102,6 +102,7 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job, > void *bounce_buffer = NULL; > int ret = 0; > int64_t sectors_per_cluster = cluster_size_sectors(job); > +int64_t bytes_per_cluster = sectors_per_cluster * BDRV_SECTOR_SIZE; > int64_t start, end; > int n; > > @@ -110,18 +111,20 @@ static int coroutine_fn backup_do_cow(BackupBlockJob > *job, > start = sector_num / sectors_per_cluster; > end = DIV_ROUND_UP(sector_num + nb_sectors, sectors_per_cluster); > > -trace_backup_do_cow_enter(job, start, sector_num, nb_sectors); > +trace_backup_do_cow_enter(job, start * bytes_per_cluster, > + sector_num * BDRV_SECTOR_SIZE, > + nb_sectors * BDRV_SECTOR_SIZE); > > wait_for_overlapping_requests(job, start, end); > cow_request_begin(_request, job, start, end); > > for (; start < end; start++) { > if (test_bit(start, job->done_bitmap)) { > -trace_backup_do_cow_skip(job, start); > +trace_backup_do_cow_skip(job, start * bytes_per_cluster); > continue; /* already copied */ > } > > -trace_backup_do_cow_process(job, start); > +trace_backup_do_cow_process(job, start * bytes_per_cluster); > > n = MIN(sectors_per_cluster, > job->common.len / BDRV_SECTOR_SIZE - > @@ -138,7 +141,7 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job, > bounce_qiov.size, _qiov, > is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0); > if (ret < 0) { > -trace_backup_do_cow_read_fail(job, start, ret); > +trace_backup_do_cow_read_fail(job, start * bytes_per_cluster, > ret); > if (error_is_read) { > *error_is_read = true; > } > @@ -154,7 +157,7 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job, > job->compress ? BDRV_REQ_WRITE_COMPRESSED : > 0); > } > if (ret < 0) { > -trace_backup_do_cow_write_fail(job, start, ret); > +trace_backup_do_cow_write_fail(job, start * bytes_per_cluster, > ret); > if (error_is_read) { > *error_is_read = false; > } > @@ -177,7 +180,8 @@ out: > > cow_request_end(_request); > > -trace_backup_do_cow_return(job, sector_num, nb_sectors, ret); > +trace_backup_do_cow_return(job, sector_num * BDRV_SECTOR_SIZE, > + nb_sectors * BDRV_SECTOR_SIZE, ret); > > qemu_co_rwlock_unlock(>flush_rwlock); > > diff --git a/block/commit.c b/block/commit.c > index 8b684e0..11dab98 100644 > --- a/block/commit.c > +++ b/block/commit.c > @@ -176,7 +176,8 @@ static void coroutine_fn commit_run(void *opaque) >COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE, >); > copy = (ret == 1); > -trace_commit_one_iteration(s, sector_num, n, ret); > +trace_commit_one_iteration(s, sector_num * BDRV_SECTOR_SIZE, > + n * BDRV_SECTOR_SIZE, ret); > if (copy) { > ret = commit_populate(s->top, s->base, sector_num, n, buf); > bytes_written += n * BDRV_SECTOR_SIZE; > diff --git a/block/mirror.c b/block/mirror.c > index a7d0960..d83d482 100644 > --- a/block/mirror.c > +++ b/block/mirror.c > @@ -103,7 +103,8 @@ static void mirror_iteration_done(MirrorOp *op, int ret) > int64_t chunk_num; > int i, nb_chunks, sectors_per_chunk; > > -trace_mirror_iteration_done(s, op->sector_num, op->nb_sectors, ret); > +trace_mirror_iteration_done(s, op->sector_num * BDRV_SECTOR_SIZE, > +op->nb_sectors * BDRV_SECTOR_SIZE, ret); > > s->in_flight--; > s->sectors_in_flight -= op->nb_sectors; > @@ -268,7 +269,8 @@ static int mirror_do_read(MirrorBlockJob *s, int64_t > sector_num, > nb_chunks = DIV_ROUND_UP(nb_sectors, sectors_per_chunk); > > while (s->buf_free_count < nb_chunks) { > -trace_mirror_yield_in_flight(s, sector_num, s->in_flight); > +trace_mirror_yield_in_flight(s, sector_num *
Re: [Qemu-block] [Qemu-devel] [PATCH for-2.10 1/2] block: An empty filename counts as no filename
Hi Max, On 04/13/2017 01:06 PM, Max Reitz wrote: Reproducer: $ ./qemu-img info '' qemu-img: ./block.c:1008: bdrv_open_driver: Assertion `!drv->bdrv_needs_filename || bs->filename[0]' failed. [1]26105 abort (core dumped) ./qemu-img info '' This patch fixes this to be: $ ./qemu-img info '' qemu-img: Could not open '': The 'file' block driver requires a file name Cc: qemu-stableSigned-off-by: Max Reitz --- block.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block.c b/block.c index 1fbbb8d606..46da908c93 100644 --- a/block.c +++ b/block.c @@ -1167,7 +1167,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, filename = qdict_get_try_str(options, "filename"); } -if (drv->bdrv_needs_filename && !filename) { +if (drv->bdrv_needs_filename && (!filename || !filename[0])) { What do you think about adding an inline function in "qemu/option.h" like "is_valid_[option_]filename()" to avoid this bug template? Anyway: Reviewed-by: Philippe Mathieu-Daudé error_setg(errp, "The '%s' block driver requires a file name", drv->format_name); ret = -EINVAL;
Re: [Qemu-block] [Qemu-devel] [PATCH v2 for-2.10 12/16] block/qcow2: Extract qcow2_calc_size_usage()
On 04/03/2017 01:09 PM, Max Reitz wrote: We will need very similar functionality for full/falloc preallocation in qcow2_truncate(). Although we will not be able to reuse much of the actual code, it still makes sense to keep all of this in one place. Signed-off-by: Max ReitzReviewed-by: Stefan Hajnoczi Reviewed-by: Philippe Mathieu-Daudé --- block/qcow2.c | 126 +++--- 1 file changed, 68 insertions(+), 58 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index bb0bd5561c..aafbc8dbed 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2108,6 +2108,70 @@ done: return ret; } +static uint64_t qcow2_calc_size_usage(uint64_t new_size, + int cluster_bits, int refcount_order) +{ +size_t cluster_size = 1u << cluster_bits; + +/* Note: The following calculation does not need to be exact; if it is a + * bit off, either some bytes will be "leaked" (which is fine) or we + * will need to increase the file size by some bytes (which is fine, + * too, as long as the bulk is allocated here). Therefore, using + * floating point arithmetic is fine. */ +int64_t meta_size = 0; +uint64_t nreftablee, nrefblocke, nl1e, nl2e; +uint64_t aligned_total_size = align_offset(new_size, cluster_size); +int refblock_bits, refblock_size; +/* refcount entry size in bytes */ +double rces = (1 << refcount_order) / 8.; + +/* see qcow2_open() */ +refblock_bits = cluster_bits - (refcount_order - 3); +refblock_size = 1 << refblock_bits; + +/* header: 1 cluster */ +meta_size += cluster_size; + +/* total size of L2 tables */ +nl2e = aligned_total_size / cluster_size; +nl2e = align_offset(nl2e, cluster_size / sizeof(uint64_t)); +meta_size += nl2e * sizeof(uint64_t); + +/* total size of L1 tables */ +nl1e = nl2e * sizeof(uint64_t) / cluster_size; +nl1e = align_offset(nl1e, cluster_size / sizeof(uint64_t)); +meta_size += nl1e * sizeof(uint64_t); + +/* total size of refcount blocks + * + * note: every host cluster is reference-counted, including metadata + * (even refcount blocks are recursively included). + * Let: + * a = total_size (this is the guest disk size) + * m = meta size not including refcount blocks and refcount tables + * c = cluster size + * y1 = number of refcount blocks entries + * y2 = meta size including everything + * rces = refcount entry size in bytes + * then, + * y1 = (y2 + a)/c + * y2 = y1 * rces + y1 * rces * sizeof(u64) / c + m + * we can get y1: + * y1 = (a + m) / (c - rces - rces * sizeof(u64) / c) + */ +nrefblocke = (aligned_total_size + meta_size + cluster_size) + / (cluster_size - rces - rces * sizeof(uint64_t) + / cluster_size); +meta_size += DIV_ROUND_UP(nrefblocke, refblock_size) * cluster_size; + +/* total size of refcount tables */ +nreftablee = nrefblocke / refblock_size; +nreftablee = align_offset(nreftablee, cluster_size / sizeof(uint64_t)); +meta_size += nreftablee * sizeof(uint64_t); + +return aligned_total_size + meta_size; +} + static int qcow2_create2(const char *filename, int64_t total_size, const char *backing_file, const char *backing_format, int flags, size_t cluster_size, PreallocMode prealloc, @@ -2146,64 +2210,10 @@ static int qcow2_create2(const char *filename, int64_t total_size, int ret; if (prealloc == PREALLOC_MODE_FULL || prealloc == PREALLOC_MODE_FALLOC) { -/* Note: The following calculation does not need to be exact; if it is a - * bit off, either some bytes will be "leaked" (which is fine) or we - * will need to increase the file size by some bytes (which is fine, - * too, as long as the bulk is allocated here). Therefore, using - * floating point arithmetic is fine. */ -int64_t meta_size = 0; -uint64_t nreftablee, nrefblocke, nl1e, nl2e; -int64_t aligned_total_size = align_offset(total_size, cluster_size); -int refblock_bits, refblock_size; -/* refcount entry size in bytes */ -double rces = (1 << refcount_order) / 8.; - -/* see qcow2_open() */ -refblock_bits = cluster_bits - (refcount_order - 3); -refblock_size = 1 << refblock_bits; - -/* header: 1 cluster */ -meta_size += cluster_size; - -/* total size of L2 tables */ -nl2e = aligned_total_size / cluster_size; -nl2e = align_offset(nl2e, cluster_size / sizeof(uint64_t)); -meta_size += nl2e * sizeof(uint64_t); - -/* total size of L1 tables */ -nl1e = nl2e * sizeof(uint64_t) / cluster_size; -nl1e = align_offset(nl1e, cluster_size / sizeof(uint64_t)); -meta_size
Re: [Qemu-block] [Qemu-devel] [PATCH for-2.9-rc5 v2] block: Drain BH in bdrv_drained_begin
On Mon, Apr 17, 2017 at 04:27:19PM +0800, Fam Zheng wrote: > On Fri, 04/14 09:51, Stefan Hajnoczi wrote: > > On Fri, Apr 14, 2017 at 9:02 AM, Fam Zhengwrote: > > > @@ -398,11 +399,15 @@ void bdrv_drain_all(void); > > > */\ > > > assert(!bs_->wakeup); \ > > > bs_->wakeup = true;\ > > > -while ((cond)) { \ > > > -aio_context_release(ctx_); \ > > > -aio_poll(qemu_get_aio_context(), true);\ > > > -aio_context_acquire(ctx_); \ > > > -waited_ = true;\ > > > +while (busy_) {\ > > > +if ((cond)) { \ > > > +waited_ = busy_ = true;\ > > > +aio_context_release(ctx_); \ > > > +aio_poll(qemu_get_aio_context(), true);\ > > > +aio_context_acquire(ctx_); \ > > > +} else { \ > > > +busy_ = aio_poll(ctx_, false); \ > > > +} \ > > > > Wait, I'm confused. The current thread is not in the BDS AioContext. > > We're not allowed to call aio_poll(ctx_, false). > > > > Did you mean aio_poll(qemu_get_aio_context(), false) in order to > > process defer to main loop BHs? > > At this point it's even unclear to me what should be the plan for 2.9. v1 IMO > was the least intrusive, but didn't cover bdrv_drain_all_begin. v2 has this > controversial "aio_poll(ctx_, false)", however its alternative, > "aio_poll(qemu_get_aio_context(), false)", "introduces" another crash that is > not seen otherwise. > > What should we do now? > > Fam > I think the priority is fixing the regression, which v1 does. Is there a regression lurking in the bdrv_drain_all() path? I've not been able to find one. If there is no regressive path through bdrv_drain_all(), my vote would be the simplest, least intrusive patch that fixes the current regression, and I think that is v1. Then we can fix everything correctly, and more comprehensively, for 2.10. -Jeff
Re: [Qemu-block] [PATCH for-2.9-rc5 v2] block: Drain BH in bdrv_drained_begin
On Fri, 04/14 09:51, Stefan Hajnoczi wrote: > On Fri, Apr 14, 2017 at 9:02 AM, Fam Zhengwrote: > > @@ -398,11 +399,15 @@ void bdrv_drain_all(void); > > */\ > > assert(!bs_->wakeup); \ > > bs_->wakeup = true;\ > > -while ((cond)) { \ > > -aio_context_release(ctx_); \ > > -aio_poll(qemu_get_aio_context(), true);\ > > -aio_context_acquire(ctx_); \ > > -waited_ = true;\ > > +while (busy_) {\ > > +if ((cond)) { \ > > +waited_ = busy_ = true;\ > > +aio_context_release(ctx_); \ > > +aio_poll(qemu_get_aio_context(), true);\ > > +aio_context_acquire(ctx_); \ > > +} else { \ > > +busy_ = aio_poll(ctx_, false); \ > > +} \ > > Wait, I'm confused. The current thread is not in the BDS AioContext. > We're not allowed to call aio_poll(ctx_, false). > > Did you mean aio_poll(qemu_get_aio_context(), false) in order to > process defer to main loop BHs? At this point it's even unclear to me what should be the plan for 2.9. v1 IMO was the least intrusive, but didn't cover bdrv_drain_all_begin. v2 has this controversial "aio_poll(ctx_, false)", however its alternative, "aio_poll(qemu_get_aio_context(), false)", "introduces" another crash that is not seen otherwise. What should we do now? Fam
Re: [Qemu-block] [Qemu-devel] [PATCH for-2.10 0/2] block: An empty filename counts as no filename
On Thu, 04/13 18:06, Max Reitz wrote: > See patch 1 for what is happening when you try "qemu-img info ''" and > for the fix. > > (Patch 2 just adds a simple test.) Reviewed-by: Fam Zheng
Re: [Qemu-block] [Qemu-devel] [PATCH v4 2/6] replication: add shared-disk and shared-disk-id options
On 2017/4/12 22:28, Eric Blake wrote: On 04/12/2017 09:05 AM, zhanghailiang wrote: We use these two options to identify which disk is shared Signed-off-by: zhanghailiangSigned-off-by: Wen Congyang Signed-off-by: Zhang Chen --- v4: - Add proper comment for primary_disk (Stefan) v2: - Move g_free(s->shared_disk_id) to the common fail process place (Stefan) - Fix comments for these two options --- +++ b/qapi/block-core.json @@ -2661,12 +2661,20 @@ # node who owns the replication node chain. Must not be given in # primary mode. # +# @shared-disk-id: Id of shared disk while is replication mode, if @shared-disk +# is true, this option is required (Since: 2.10) +# +# @shared-disk: To indicate whether or not a disk is shared by primary VM +# and secondary VM. (The default is false) (Since: 2.10) +# # Since: 2.9 ## { 'struct': 'BlockdevOptionsReplication', 'base': 'BlockdevOptionsGenericFormat', 'data': { 'mode': 'ReplicationMode', -'*top-id': 'str' } } +'*top-id': 'str', +'*shared-disk-id': 'str', +'*shared-disk': 'bool' } } Do we need a separate bool and string? Or is it sufficient to say that if shared-disk is omitted, we are not sharing, and that if a shared-disk string is present, then we are sharing and it names the id of the shared disk. Er, Yes, We need both of them, the command line of secondary sides is like: -drive if=virtio,id=active-disk0,driver=replication,mode=secondary,\ file.driver=qcow2,top-id=active-disk0,\ file.file.filename=/mnt/ramfs/active_disk.img,\ file.backing=hidden_disk0,shared-disk=on We only need the bool shared-disk to indicate whether disk is sharing or not, but for primary side, we need to the blockdev-add command to tell primary which disk is shared. { 'execute': 'blockdev-add', 'arguments': { 'driver': 'replication', 'node-name': 'rep', 'mode': 'primary', 'shared-disk-id': 'primary_disk0', 'shared-disk': true, 'file': { 'driver': 'nbd', 'export': 'hidden_disk0', 'server': { 'type': 'inet', 'data': { 'host': 'xxx.xxx.xxx.xxx', 'port': 'yyy' } } } } }