the following PATCH v5 has:
1. make sure the version of VHD is bigger than 1.2, to be compatible with VHD from vhd-utils, because of tdbatmap. 2. Add support of w2ru. 3. Dont not support backing file chain, because need to add a more bdvr_pread, bad to weakly performance On Tue, Sep 23, 2014 at 10:25 PM, Xiaodong Gong <gordongong0...@gmail.com> wrote: > This structure is not documented in the official VHD specs, nor is the > 'tdbatmap' > cookie used in differencing VHDs created by Windows Server 2012. Isn't > Hyper-V > compatibility desired? > > Yepp, this structure of tdbatmap is a extra feature from vhd-util from > Xen, here we > could make sure the version of VHD is bigger than 1.2, I'll add it in v5. > > Aren't those checks required no matter what platform specific locator is > used? > > Why not? IMO, w2ru (relative paths) support MUST be added before merging > this > feature. > > I think the licence of win2008 or others is a problem to me. It is hard to > test it in > my environment. May be a co-author of this patch is needed, do you ? > > What if we have a chain of differencing VHD images and the immediate parent > does not have the required sector either, which may be at an upper level? > Shouldn't this method be called recursively in this case? > > It is exactly, the vpc_open and vpc_read all should support the chain of > snapshot. > But the missing of cache to bat table and bitmap, performance of snapshot > is alread > really hard to accept. So the support to a chain is better to have after > we have cache, > I think. > > On Mon, Sep 22, 2014 at 6:26 PM, Lucian Petrut <petrutlucia...@gmail.com> > wrote: > >> From: =?ISO-8859-1?B?MjFH?= <gordongong0...@gmail.com> >> >> Now qemu only supports vhd type VHD_FIXED and VHD_DYNAMIC, >> so qemu can't read snapshot volume of vhd, and can't support >> other storage features of vhd file. >> >> This patch add read parent information in function "vpc_open", >> read bitmap in "vpc_read", and change bitmap in "vpc_write". >> >> Signed-off-by: Xiaodong Gong <gordongong0...@gmail.com> >> --- >> > Now qemu only supports vhd type VHD_FIXED and VHD_DYNAMIC, >> > so qemu can't read snapshot volume of vhd, and can't support >> > other storage features of vhd file. >> > >> > This patch add read parent information in function "vpc_open", >> > read bitmap in "vpc_read", and change bitmap in "vpc_write". >> > >> > Signed-off-by: Xiaodong Gong <pri...@gmail.com> >> >> > --- >> > block/vpc.c | 355 >> ++++++++++++++++++++++++++++++++++++---------- >> > include/block/block_int.h | 6 +- >> > 2 files changed, 288 insertions(+), 73 deletions(-) >> > >> > diff --git a/block/vpc.c b/block/vpc.c >> > index c024b4c..2ff2bba 100644 >> > --- a/block/vpc.c >> > +++ b/block/vpc.c >> > @@ -33,13 +33,18 @@ >> > /**************************************************************/ >> > >> > #define HEADER_SIZE 512 >> > +#define DYNAMIC_HEADER_SIZE 1024 >> > +#define PARENT_LOCATOR_NUM 8 >> > +#define PARENT_PREFIX_LEN 7 /* such as file:// */ >> > +#define TBBATMAP_HEAD_SIZE 28 >> > +#define PLATFORM_MACX 0x5863614d /* big endian */ >> > >> > //#define CACHE >> > >> > enum vhd_type { >> > VHD_FIXED = 2, >> > VHD_DYNAMIC = 3, >> > - VHD_DIFFERENCING = 4, >> > + VHD_DIFF = 4, >> > }; >> > >> > // Seconds since Jan 1, 2000 0:00:00 (UTC) >> > @@ -138,6 +143,15 @@ typedef struct BDRVVPCState { >> > Error *migration_blocker; >> > } BDRVVPCState; >> > >> > +typedef struct vhd_tdbatmap_header { >> > + char magic[8]; /* always "tdbatmap" */ >> > + >> > + uint64_t batmap_offset; >> > + uint32_t batmap_size; >> > + uint32_t batmap_version; >> > + uint32_t checksum; >> > +} QEMU_PACKED VHDTdBatmapHeader; >> > + >> >> This structure is not documented in the official VHD specs, nor is the >> 'tdbatmap' >> cookie used in differencing VHDs created by Windows Server 2012. Isn't >> Hyper-V >> compatibility desired? >> >> > static uint32_t vpc_checksum(uint8_t* buf, size_t size) >> > { >> > uint32_t res = 0; >> > @@ -153,7 +167,7 @@ static uint32_t vpc_checksum(uint8_t* buf, size_t >> size) >> > static int vpc_probe(const uint8_t *buf, int buf_size, const char >> *filename) >> > { >> > if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8)) >> > - return 100; >> > + return 100; >> > return 0; >> > } >> > >> > @@ -164,11 +178,17 @@ static int vpc_open(BlockDriverState *bs, QDict >> *options, int flags, >> > int i; >> > VHDFooter *footer; >> > VHDDynDiskHeader *dyndisk_header; >> > - uint8_t buf[HEADER_SIZE]; >> > + uint8_t buf[DYNAMIC_HEADER_SIZE]; >> > + uint8_t tdbatmap_header_buf[TBBATMAP_HEAD_SIZE]; >> > uint32_t checksum; >> > uint64_t computed_size; >> > - int disk_type = VHD_DYNAMIC; >> > + uint32_t disk_type; >> > int ret; >> > + VHDTdBatmapHeader *tdbatmap_header; >> > + int parent_locator_offset = 0; >> > + int64_t data_offset = 0; >> > + int data_length = 0; >> > + uint32_t platform; >> > >> > ret = bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE); >> > if (ret < 0) { >> > @@ -176,6 +196,8 @@ static int vpc_open(BlockDriverState *bs, QDict >> *options, int flags, >> > } >> > >> > footer = (VHDFooter *) s->footer_buf; >> > + disk_type = be32_to_cpu(footer->type); >> > + >> > if (strncmp(footer->creator, "conectix", 8)) { >> > int64_t offset = bdrv_getlength(bs->file); >> > if (offset < 0) { >> > @@ -230,9 +252,9 @@ static int vpc_open(BlockDriverState *bs, QDict >> *options, int flags, >> > goto fail; >> > } >> > >> > - if (disk_type == VHD_DYNAMIC) { >> > + if (disk_type == VHD_DYNAMIC || disk_type == VHD_DIFF) { >> > ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), >> buf, >> > - HEADER_SIZE); >> > + DYNAMIC_HEADER_SIZE); >> > if (ret < 0) { >> > goto fail; >> > } >> > @@ -286,6 +308,63 @@ static int vpc_open(BlockDriverState *bs, QDict >> *options, int flags, >> > s->free_data_block_offset = >> > (s->bat_offset + (s->max_table_entries * 4) + 511) & ~511; >> > >> > + /* Read tdbatmap header by offset */ >> > + ret = bdrv_pread(bs->file, s->free_data_block_offset, >> > + tdbatmap_header_buf, TBBATMAP_HEAD_SIZE); >> > + if (ret < 0) { >> > + goto fail; >> > + } >> > + >> > + tdbatmap_header = (VHDTdBatmapHeader *) tdbatmap_header_buf; >> > + if (!strncmp(tdbatmap_header->magic, "tdbatmap", 8)) { >> > + s->free_data_block_offset = >> > + be32_to_cpu(tdbatmap_header->batmap_size) * 512 >> > + + be64_to_cpu(tdbatmap_header->batmap_offset); >> > + } >> > + >> > + /* Read backing file location from dyn header table */ >> > + if (dyndisk_header->parent_name[0] || >> dyndisk_header->parent_name[1]) { >> > + for (i = 0; i < PARENT_LOCATOR_NUM; i++) { >> > + data_offset = >> > + >> be64_to_cpu(dyndisk_header->parent_locator[i].data_offset); >> > + data_length = >> > + >> be32_to_cpu(dyndisk_header->parent_locator[i].data_length); >> > + platform = dyndisk_header->parent_locator[i].platform; >> > + >> > + if (platform == PLATFORM_MACX) { >> >> Aren't those checks required no matter what platform specific locator is >> used? >> >> > + if (data_offset + PARENT_PREFIX_LEN > >> > + s->max_table_entries * s->block_size) { >> > + goto fail; >> > + } >> > + if (data_length - PARENT_PREFIX_LEN > >> PARENT_MAX_LOCATOR) { >> > + goto fail; >> > + } >> > + ret = bdrv_pread(bs->file, data_offset + >> PARENT_PREFIX_LEN, >> > + bs->backing_file, data_length - >> PARENT_PREFIX_LEN); >> > + if (ret < 0) { >> > + goto fail; >> > + } >> > + >> > + bs->backing_file[data_length - PARENT_PREFIX_LEN] >> = '\0'; >> > + } >> > + >> > + if (data_offset > parent_locator_offset) { >> > + parent_locator_offset = data_offset; >> > + } >> > + } >> > + >> > + if (strlen(bs->backing_file) == 0) { >> > + error_setg(errp, "block-vpc: differencing is not >> support in" >> > + "w2ru or w2ku"); >> >> Why not? IMO, w2ru (relative paths) support MUST be added before merging >> this >> feature. >> >> > + ret = -EINVAL; >> > + goto fail; >> > + } >> > + } >> > + >> > + if (parent_locator_offset + 512 > s->free_data_block_offset) { >> > + s->free_data_block_offset = parent_locator_offset + 512; >> > + } >> > + >> > for (i = 0; i < s->max_table_entries; i++) { >> > be32_to_cpus(&s->pagetable[i]); >> > if (s->pagetable[i] != 0xFFFFFFFF) { >> > @@ -340,35 +419,76 @@ static int vpc_reopen_prepare(BDRVReopenState >> *state, >> > } >> > >> > /* >> > - * Returns the absolute byte offset of the given sector in the image >> file. >> > - * If the sector is not allocated, -1 is returned instead. >> > + * Returns the absolute byte offset of the given sector in the >> differencing >> > + * image file. >> > * >> > - * The parameter write must be 1 if the offset will be used for a write >> > - * operation (the block bitmaps is updated then), 0 otherwise. >> > + * If error happened, -1 is returned. >> > + * >> > + * When write all type or read dynamic, if the sector is not >> allocated, -2 >> > + * is returned instead. If the sector is allocated in current file, >> the block >> > + * offset is returned. >> > + * >> > + * When read diff. If the sector is not allocated, -2 is returned >> instead. >> > + * If the sector is allocated in the backing file, -3 is returned. If >> the >> > + * sector is allocated in current file, the block offset is returned. >> > */ >> > static inline int64_t get_sector_offset(BlockDriverState *bs, >> > - int64_t sector_num, int write) >> > + int64_t sector_num, bool write, bool diff) >> > { >> > BDRVVPCState *s = bs->opaque; >> > - uint64_t offset = sector_num * 512; >> > - uint64_t bitmap_offset, block_offset; >> > + uint64_t offset = sector_num << BDRV_SECTOR_BITS; >> > + uint64_t bitmap_offset; >> > uint32_t pagetable_index, pageentry_index; >> > + int64_t block_offset = LONG_MIN; >> > + int ret; >> > >> > pagetable_index = offset / s->block_size; >> > - pageentry_index = (offset % s->block_size) / 512; >> > + pageentry_index = (offset % s->block_size) >> BDRV_SECTOR_BITS; >> > >> > - if (pagetable_index >= s->max_table_entries || >> s->pagetable[pagetable_index] == 0xffffffff) >> > - return -1; // not allocated >> > + if (pagetable_index >= s->max_table_entries) { >> > + return -2; >> > + } >> > + if (s->pagetable[pagetable_index] == 0xffffffff) { >> > + if (!write && diff) { >> > + return -3; /* parent allocated */ >> > + } else { >> > + return -2; /* not allocated */ >> > + } >> > + } >> > >> > - bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index]; >> > - block_offset = bitmap_offset + s->bitmap_size + (512 * >> pageentry_index); >> > + bitmap_offset = (uint64_t) s->pagetable[pagetable_index] >> > + << BDRV_SECTOR_BITS; >> > + >> > + if (!diff || write) { >> > + block_offset = bitmap_offset + s->bitmap_size >> > + + (pageentry_index << BDRV_SECTOR_BITS); >> > + } else { >> > + uint32_t bitmap_index, bitmapentry_index; >> > + uint8_t bitmap[s->bitmap_size]; >> > + >> > + if (bitmap_offset > s->max_table_entries * s->block_size) { >> > + return -1; >> > + } >> > + ret = bdrv_pread(bs->file, bitmap_offset, bitmap, >> s->bitmap_size); >> > + if (ret < 0) { >> > + return -1; >> > + } >> > >> > + bitmap_index = pageentry_index / 8; >> > + bitmapentry_index = 7 - pageentry_index % 8; >> > + if (bitmap[bitmap_index] & 0x1 << bitmapentry_index) { >> > + block_offset = bitmap_offset + s->bitmap_size >> > + + (pageentry_index << BDRV_SECTOR_BITS); >> > + } else { >> > + return -3; >> > + } >> > + } >> > // We must ensure that we don't write to any sectors which are >> marked as >> > // unused in the bitmap. We get away with setting all bits in the >> block >> > // bitmap each time we write to a new block. This might cause >> Virtual PC to >> > // miss sparse read optimization, but it's not a problem in terms >> of >> > // correctness. >> > - if (write && (s->last_bitmap_offset != bitmap_offset)) { >> > + if (!diff && write && (s->last_bitmap_offset != bitmap_offset)) { >> > uint8_t bitmap[s->bitmap_size]; >> > >> > s->last_bitmap_offset = bitmap_offset; >> > @@ -376,7 +496,7 @@ static inline int64_t >> get_sector_offset(BlockDriverState *bs, >> > bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, >> s->bitmap_size); >> > } >> > >> > -// printf("sector: %" PRIx64 ", index: %x, offset: %x, bioff: %" >> PRIx64 ", bloff: %" PRIx64 "\n", >> > +// printf("sector: %" PRIx64 ", index: %x, offset: %x, bioff: %" >> PRIx64 ", bloff: %" PRIx64 "\n", >> > // sector_num, pagetable_index, pageentry_index, >> > // bitmap_offset, block_offset); >> > >> > @@ -437,7 +557,8 @@ static int rewrite_footer(BlockDriverState* bs) >> > * >> > * Returns the sectors' offset in the image file on success and < 0 on >> error >> > */ >> > -static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num) >> > +static int64_t alloc_block(BlockDriverState *bs, int64_t sector_num, >> > + bool diff) >> > { >> > BDRVVPCState *s = bs->opaque; >> > int64_t bat_offset; >> > @@ -457,7 +578,11 @@ static int64_t alloc_block(BlockDriverState* bs, >> int64_t sector_num) >> > s->pagetable[index] = s->free_data_block_offset / 512; >> > >> > // Initialize the block's bitmap >> > - memset(bitmap, 0xff, s->bitmap_size); >> > + if (diff) { >> > + memset(bitmap, 0x0, s->bitmap_size); >> > + } else { >> > + memset(bitmap, 0xff, s->bitmap_size); >> > + } >> > ret = bdrv_pwrite_sync(bs->file, s->free_data_block_offset, bitmap, >> > s->bitmap_size); >> > if (ret < 0) { >> > @@ -477,7 +602,7 @@ static int64_t alloc_block(BlockDriverState* bs, >> int64_t sector_num) >> > if (ret < 0) >> > goto fail; >> > >> > - return get_sector_offset(bs, sector_num, 0); >> > + return get_sector_offset(bs, sector_num, false, diff); >> > >> > fail: >> > s->free_data_block_offset -= (s->block_size + s->bitmap_size); >> > @@ -501,36 +626,66 @@ static int vpc_read(BlockDriverState *bs, int64_t >> sector_num, >> > uint8_t *buf, int nb_sectors) >> > { >> > BDRVVPCState *s = bs->opaque; >> > - int ret; >> > - int64_t offset; >> > - int64_t sectors, sectors_per_block; >> > VHDFooter *footer = (VHDFooter *) s->footer_buf; >> > + int64_t sectors_per_block = s->block_size >> BDRV_SECTOR_BITS; >> > + int64_t offset, sectors; >> > + int ret; >> > >> > - if (be32_to_cpu(footer->type) == VHD_FIXED) { >> > + switch (be32_to_cpu(footer->type)) { >> > + case VHD_FIXED: >> > return bdrv_read(bs->file, sector_num, buf, nb_sectors); >> > - } >> > - while (nb_sectors > 0) { >> > - offset = get_sector_offset(bs, sector_num, 0); >> > - >> > - sectors_per_block = s->block_size >> BDRV_SECTOR_BITS; >> > - sectors = sectors_per_block - (sector_num % sectors_per_block); >> > - if (sectors > nb_sectors) { >> > - sectors = nb_sectors; >> > - } >> > + case VHD_DYNAMIC: >> > + while (nb_sectors > 0) { >> > + sectors = sectors_per_block - (sector_num % >> sectors_per_block); >> > + if (sectors > nb_sectors) { >> > + sectors = nb_sectors; >> > + } >> > >> > - if (offset == -1) { >> > - memset(buf, 0, sectors * BDRV_SECTOR_SIZE); >> > - } else { >> > - ret = bdrv_pread(bs->file, offset, buf, >> > - sectors * BDRV_SECTOR_SIZE); >> > - if (ret != sectors * BDRV_SECTOR_SIZE) { >> > + offset = get_sector_offset(bs, sector_num, false, false); >> > + if (offset == -1) { >> > return -1; >> > + } else if (offset == -2) { >> > + memset(buf, 0, sectors * BDRV_SECTOR_SIZE); >> > + } else { >> > + ret = bdrv_pread(bs->file, offset, buf, >> > + sectors * BDRV_SECTOR_SIZE); >> > + if (ret != sectors * BDRV_SECTOR_SIZE) { >> > + return -1; >> > + } >> > } >> > + >> > + nb_sectors -= sectors; >> > + sector_num += sectors; >> > + buf += sectors * BDRV_SECTOR_SIZE; >> > } >> > + break; >> > + case VHD_DIFF: >> > + while (nb_sectors > 0) { >> > + offset = get_sector_offset(bs, sector_num, false, true); >> > + if (offset == -1) { >> > + return -1; >> > + } else if (offset == -2) { >> > + memset(buf, 0, BDRV_SECTOR_SIZE); >> > + } else if (offset == -3) { >> > + ret = bdrv_pread(bs->backing_hd, sector_num << >> BDRV_SECTOR_BITS >> > + , buf, BDRV_SECTOR_SIZE); >> >> What if we have a chain of differencing VHD images and the immediate >> parent >> does not have the required sector either, which may be at an upper level? >> Shouldn't this method be called recursively in this case? >> >> > + if (ret < 0) { >> > + return -1; >> > + } >> > + } else { >> > + ret = bdrv_pread(bs->file, offset, buf, >> BDRV_SECTOR_SIZE); >> > + if (ret != BDRV_SECTOR_SIZE) { >> > + return -1; >> > + } >> > + } >> > >> > - nb_sectors -= sectors; >> > - sector_num += sectors; >> > - buf += sectors * BDRV_SECTOR_SIZE; >> > + nb_sectors--; >> > + sector_num++; >> > + buf += BDRV_SECTOR_SIZE; >> > + } >> > + break; >> > + default: >> > + return -1; >> > } >> > return 0; >> > } >> > @@ -546,44 +701,101 @@ static coroutine_fn int >> vpc_co_read(BlockDriverState *bs, int64_t sector_num, >> > return ret; >> > } >> > >> > -static int vpc_write(BlockDriverState *bs, int64_t sector_num, >> > - const uint8_t *buf, int nb_sectors) >> > +static inline int64_t write_bitmap(BlockDriverState *bs, int64_t >> sector_num, >> > + int64_t sectors) >> > { >> > BDRVVPCState *s = bs->opaque; >> > - int64_t offset; >> > - int64_t sectors, sectors_per_block; >> > + uint64_t offset = sector_num << BDRV_SECTOR_BITS; >> > + uint64_t bitmap_offset; >> > + uint32_t pagetable_index, pageentry_index; >> > + uint8_t bitmap[s->bitmap_size]; >> > + uint32_t bitmap_index, bitmapbit_index; >> > + int i; >> > int ret; >> > - VHDFooter *footer = (VHDFooter *) s->footer_buf; >> > >> > - if (be32_to_cpu(footer->type) == VHD_FIXED) { >> > - return bdrv_write(bs->file, sector_num, buf, nb_sectors); >> > + pagetable_index = offset / s->block_size; >> > + pageentry_index = (offset % s->block_size) / 512; >> > + bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index]; >> > + >> > + if (bitmap_offset > s->max_table_entries * s->block_size) { >> > + return -1; >> > + } >> > + ret = bdrv_pread(bs->file, bitmap_offset, bitmap, s->bitmap_size); >> > + if (ret < 0) { >> > + return -1; >> > } >> > - while (nb_sectors > 0) { >> > - offset = get_sector_offset(bs, sector_num, 1); >> > >> > - sectors_per_block = s->block_size >> BDRV_SECTOR_BITS; >> > - sectors = sectors_per_block - (sector_num % sectors_per_block); >> > - if (sectors > nb_sectors) { >> > - sectors = nb_sectors; >> > + for (i = 0; i < sectors; i++) { >> > + bitmap_index = pageentry_index / 8; >> > + bitmapbit_index = 7 - pageentry_index % 8; >> > + bitmap[bitmap_index] |= (0x1 << bitmapbit_index); >> > + pageentry_index++; >> > + } >> > + ret = bdrv_pwrite(bs->file, bitmap_offset, bitmap, s->bitmap_size); >> > + if (ret < 0) { >> > + return -1; >> > + } >> > + >> > + return 0; >> > +} >> > + >> > +static int vpc_write(BlockDriverState *bs, int64_t sector_num, >> > + const uint8_t *buf, int nb_sectors) >> > +{ >> > + BDRVVPCState *s = bs->opaque; >> > + VHDFooter *footer = (VHDFooter *) s->footer_buf; >> > + int64_t sectors_per_block = s->block_size >> BDRV_SECTOR_BITS; >> > + int64_t offset, sectors; >> > + bool diff = true; >> > + int ret = 0; >> > + >> > + switch (be32_to_cpu(footer->type)) { >> > + case VHD_FIXED: >> > + return bdrv_write(bs->file, sector_num, buf, nb_sectors); >> > + case VHD_DYNAMIC: >> > + case VHD_DIFF: >> > + if (be32_to_cpu(footer->type) == VHD_DYNAMIC) { >> > + diff = false; >> > } >> > >> > - if (offset == -1) { >> > - offset = alloc_block(bs, sector_num); >> > - if (offset < 0) >> > + while (nb_sectors > 0) { >> > + sectors = sectors_per_block - (sector_num % >> sectors_per_block); >> > + if (sectors > nb_sectors) { >> > + sectors = nb_sectors; >> > + } >> > + >> > + offset = get_sector_offset(bs, sector_num, true, diff); >> > + if (offset == -1) { >> > return -1; >> > - } >> > + } else if (offset == -2) { >> > + offset = alloc_block(bs, sector_num, diff); >> > + if (offset < 0) { >> > + return -1; >> > + } >> > + } >> > >> > - ret = bdrv_pwrite(bs->file, offset, buf, sectors * >> BDRV_SECTOR_SIZE); >> > - if (ret != sectors * BDRV_SECTOR_SIZE) { >> > - return -1; >> > - } >> > + ret = bdrv_pwrite(bs->file, offset, buf, >> > + sectors * BDRV_SECTOR_SIZE); >> > + if (ret != sectors * BDRV_SECTOR_SIZE) { >> > + return -1; >> > + } >> > >> > - nb_sectors -= sectors; >> > - sector_num += sectors; >> > - buf += sectors * BDRV_SECTOR_SIZE; >> > - } >> > + if (diff) { >> > + ret = write_bitmap(bs, sector_num, sectors); >> > + if (ret < 0) { >> > + return -1; >> > + } >> > + } >> > >> > - return 0; >> > + nb_sectors -= sectors; >> > + sector_num += sectors; >> > + buf += sectors * BDRV_SECTOR_SIZE; >> > + } >> > + break; >> > + default: >> > + return -1; >> > + } >> > + return ret; >> > } >> > >> > static coroutine_fn int vpc_co_write(BlockDriverState *bs, int64_t >> sector_num, >> > @@ -910,6 +1122,7 @@ static BlockDriver bdrv_vpc = { >> > .bdrv_close = vpc_close, >> > .bdrv_reopen_prepare = vpc_reopen_prepare, >> > .bdrv_create = vpc_create, >> > + .supports_backing = true, >> > >> > .bdrv_read = vpc_co_read, >> > .bdrv_write = vpc_co_write, >> > diff --git a/include/block/block_int.h b/include/block/block_int.h >> > index 8a61215..aab3ae8 100644 >> > --- a/include/block/block_int.h >> > +++ b/include/block/block_int.h >> > @@ -335,8 +335,10 @@ struct BlockDriverState { >> > QLIST_HEAD(, BdrvAioNotifier) aio_notifiers; >> > >> > char filename[1024]; >> > - char backing_file[1024]; /* if non zero, the image is a diff of >> > - this file image */ >> > + >> > +#define PARENT_MAX_LOCATOR 512 >> > + char backing_file[PARENT_MAX_LOCATOR]; /* if non zero, the is a >> diff >> > + of this file image */ >> > char backing_format[16]; /* if non-zero and backing_file exists */ >> > >> > QDict *full_open_options; >> >> block/vpc.c | 355 >> ++++++++++++++++++++++++++++++++++++---------- >> include/block/block_int.h | 6 +- >> 2 files changed, 288 insertions(+), 73 deletions(-) >> >> diff --git a/block/vpc.c b/block/vpc.c >> index c024b4c..2ff2bba 100644 >> --- a/block/vpc.c >> +++ b/block/vpc.c >> @@ -33,13 +33,18 @@ >> /**************************************************************/ >> >> #define HEADER_SIZE 512 >> +#define DYNAMIC_HEADER_SIZE 1024 >> +#define PARENT_LOCATOR_NUM 8 >> +#define PARENT_PREFIX_LEN 7 /* such as file:// */ >> +#define TBBATMAP_HEAD_SIZE 28 >> +#define PLATFORM_MACX 0x5863614d /* big endian */ >> >> //#define CACHE >> >> enum vhd_type { >> VHD_FIXED = 2, >> VHD_DYNAMIC = 3, >> - VHD_DIFFERENCING = 4, >> + VHD_DIFF = 4, >> }; >> >> // Seconds since Jan 1, 2000 0:00:00 (UTC) >> @@ -138,6 +143,15 @@ typedef struct BDRVVPCState { >> Error *migration_blocker; >> } BDRVVPCState; >> >> +typedef struct vhd_tdbatmap_header { >> + char magic[8]; /* always "tdbatmap" */ >> + >> + uint64_t batmap_offset; >> + uint32_t batmap_size; >> + uint32_t batmap_version; >> + uint32_t checksum; >> +} QEMU_PACKED VHDTdBatmapHeader; >> + >> static uint32_t vpc_checksum(uint8_t* buf, size_t size) >> { >> uint32_t res = 0; >> @@ -153,7 +167,7 @@ static uint32_t vpc_checksum(uint8_t* buf, size_t >> size) >> static int vpc_probe(const uint8_t *buf, int buf_size, const char >> *filename) >> { >> if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8)) >> - return 100; >> + return 100; >> return 0; >> } >> >> @@ -164,11 +178,17 @@ static int vpc_open(BlockDriverState *bs, QDict >> *options, int flags, >> int i; >> VHDFooter *footer; >> VHDDynDiskHeader *dyndisk_header; >> - uint8_t buf[HEADER_SIZE]; >> + uint8_t buf[DYNAMIC_HEADER_SIZE]; >> + uint8_t tdbatmap_header_buf[TBBATMAP_HEAD_SIZE]; >> uint32_t checksum; >> uint64_t computed_size; >> - int disk_type = VHD_DYNAMIC; >> + uint32_t disk_type; >> int ret; >> + VHDTdBatmapHeader *tdbatmap_header; >> + int parent_locator_offset = 0; >> + int64_t data_offset = 0; >> + int data_length = 0; >> + uint32_t platform; >> >> ret = bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE); >> if (ret < 0) { >> @@ -176,6 +196,8 @@ static int vpc_open(BlockDriverState *bs, QDict >> *options, int flags, >> } >> >> footer = (VHDFooter *) s->footer_buf; >> + disk_type = be32_to_cpu(footer->type); >> + >> if (strncmp(footer->creator, "conectix", 8)) { >> int64_t offset = bdrv_getlength(bs->file); >> if (offset < 0) { >> @@ -230,9 +252,9 @@ static int vpc_open(BlockDriverState *bs, QDict >> *options, int flags, >> goto fail; >> } >> >> - if (disk_type == VHD_DYNAMIC) { >> + if (disk_type == VHD_DYNAMIC || disk_type == VHD_DIFF) { >> ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf, >> - HEADER_SIZE); >> + DYNAMIC_HEADER_SIZE); >> if (ret < 0) { >> goto fail; >> } >> @@ -286,6 +308,63 @@ static int vpc_open(BlockDriverState *bs, QDict >> *options, int flags, >> s->free_data_block_offset = >> (s->bat_offset + (s->max_table_entries * 4) + 511) & ~511; >> >> + /* Read tdbatmap header by offset */ >> + ret = bdrv_pread(bs->file, s->free_data_block_offset, >> + tdbatmap_header_buf, TBBATMAP_HEAD_SIZE); >> + if (ret < 0) { >> + goto fail; >> + } >> + >> + tdbatmap_header = (VHDTdBatmapHeader *) tdbatmap_header_buf; >> + if (!strncmp(tdbatmap_header->magic, "tdbatmap", 8)) { >> + s->free_data_block_offset = >> + be32_to_cpu(tdbatmap_header->batmap_size) * 512 >> + + be64_to_cpu(tdbatmap_header->batmap_offset); >> + } >> + >> + /* Read backing file location from dyn header table */ >> + if (dyndisk_header->parent_name[0] || >> dyndisk_header->parent_name[1]) { >> + for (i = 0; i < PARENT_LOCATOR_NUM; i++) { >> + data_offset = >> + >> be64_to_cpu(dyndisk_header->parent_locator[i].data_offset); >> + data_length = >> + >> be32_to_cpu(dyndisk_header->parent_locator[i].data_length); >> + platform = dyndisk_header->parent_locator[i].platform; >> + >> + if (platform == PLATFORM_MACX) { >> + if (data_offset + PARENT_PREFIX_LEN > >> + s->max_table_entries * s->block_size) { >> + goto fail; >> + } >> + if (data_length - PARENT_PREFIX_LEN > >> PARENT_MAX_LOCATOR) { >> + goto fail; >> + } >> + ret = bdrv_pread(bs->file, data_offset + >> PARENT_PREFIX_LEN, >> + bs->backing_file, data_length - >> PARENT_PREFIX_LEN); >> + if (ret < 0) { >> + goto fail; >> + } >> + >> + bs->backing_file[data_length - PARENT_PREFIX_LEN] = >> '\0'; >> + } >> + >> + if (data_offset > parent_locator_offset) { >> + parent_locator_offset = data_offset; >> + } >> + } >> + >> + if (strlen(bs->backing_file) == 0) { >> + error_setg(errp, "block-vpc: differencing is not support >> in" >> + "w2ru or w2ku"); >> + ret = -EINVAL; >> + goto fail; >> + } >> + } >> + >> + if (parent_locator_offset + 512 > s->free_data_block_offset) { >> + s->free_data_block_offset = parent_locator_offset + 512; >> + } >> + >> for (i = 0; i < s->max_table_entries; i++) { >> be32_to_cpus(&s->pagetable[i]); >> if (s->pagetable[i] != 0xFFFFFFFF) { >> @@ -340,35 +419,76 @@ static int vpc_reopen_prepare(BDRVReopenState >> *state, >> } >> >> /* >> - * Returns the absolute byte offset of the given sector in the image >> file. >> - * If the sector is not allocated, -1 is returned instead. >> + * Returns the absolute byte offset of the given sector in the >> differencing >> + * image file. >> * >> - * The parameter write must be 1 if the offset will be used for a write >> - * operation (the block bitmaps is updated then), 0 otherwise. >> + * If error happened, -1 is returned. >> + * >> + * When write all type or read dynamic, if the sector is not allocated, >> -2 >> + * is returned instead. If the sector is allocated in current file, the >> block >> + * offset is returned. >> + * >> + * When read diff. If the sector is not allocated, -2 is returned >> instead. >> + * If the sector is allocated in the backing file, -3 is returned. If the >> + * sector is allocated in current file, the block offset is returned. >> */ >> static inline int64_t get_sector_offset(BlockDriverState *bs, >> - int64_t sector_num, int write) >> + int64_t sector_num, bool write, bool diff) >> { >> BDRVVPCState *s = bs->opaque; >> - uint64_t offset = sector_num * 512; >> - uint64_t bitmap_offset, block_offset; >> + uint64_t offset = sector_num << BDRV_SECTOR_BITS; >> + uint64_t bitmap_offset; >> uint32_t pagetable_index, pageentry_index; >> + int64_t block_offset = LONG_MIN; >> + int ret; >> >> pagetable_index = offset / s->block_size; >> - pageentry_index = (offset % s->block_size) / 512; >> + pageentry_index = (offset % s->block_size) >> BDRV_SECTOR_BITS; >> >> - if (pagetable_index >= s->max_table_entries || >> s->pagetable[pagetable_index] == 0xffffffff) >> - return -1; // not allocated >> + if (pagetable_index >= s->max_table_entries) { >> + return -2; >> + } >> + if (s->pagetable[pagetable_index] == 0xffffffff) { >> + if (!write && diff) { >> + return -3; /* parent allocated */ >> + } else { >> + return -2; /* not allocated */ >> + } >> + } >> >> - bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index]; >> - block_offset = bitmap_offset + s->bitmap_size + (512 * >> pageentry_index); >> + bitmap_offset = (uint64_t) s->pagetable[pagetable_index] >> + << BDRV_SECTOR_BITS; >> + >> + if (!diff || write) { >> + block_offset = bitmap_offset + s->bitmap_size >> + + (pageentry_index << BDRV_SECTOR_BITS); >> + } else { >> + uint32_t bitmap_index, bitmapentry_index; >> + uint8_t bitmap[s->bitmap_size]; >> + >> + if (bitmap_offset > s->max_table_entries * s->block_size) { >> + return -1; >> + } >> + ret = bdrv_pread(bs->file, bitmap_offset, bitmap, >> s->bitmap_size); >> + if (ret < 0) { >> + return -1; >> + } >> >> + bitmap_index = pageentry_index / 8; >> + bitmapentry_index = 7 - pageentry_index % 8; >> + if (bitmap[bitmap_index] & 0x1 << bitmapentry_index) { >> + block_offset = bitmap_offset + s->bitmap_size >> + + (pageentry_index << BDRV_SECTOR_BITS); >> + } else { >> + return -3; >> + } >> + } >> // We must ensure that we don't write to any sectors which are >> marked as >> // unused in the bitmap. We get away with setting all bits in the >> block >> // bitmap each time we write to a new block. This might cause >> Virtual PC to >> // miss sparse read optimization, but it's not a problem in terms of >> // correctness. >> - if (write && (s->last_bitmap_offset != bitmap_offset)) { >> + if (!diff && write && (s->last_bitmap_offset != bitmap_offset)) { >> uint8_t bitmap[s->bitmap_size]; >> >> s->last_bitmap_offset = bitmap_offset; >> @@ -376,7 +496,7 @@ static inline int64_t >> get_sector_offset(BlockDriverState *bs, >> bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, >> s->bitmap_size); >> } >> >> -// printf("sector: %" PRIx64 ", index: %x, offset: %x, bioff: %" >> PRIx64 ", bloff: %" PRIx64 "\n", >> +// printf("sector: %" PRIx64 ", index: %x, offset: %x, bioff: %" PRIx64 >> ", bloff: %" PRIx64 "\n", >> // sector_num, pagetable_index, pageentry_index, >> // bitmap_offset, block_offset); >> >> @@ -437,7 +557,8 @@ static int rewrite_footer(BlockDriverState* bs) >> * >> * Returns the sectors' offset in the image file on success and < 0 on >> error >> */ >> -static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num) >> +static int64_t alloc_block(BlockDriverState *bs, int64_t sector_num, >> + bool diff) >> { >> BDRVVPCState *s = bs->opaque; >> int64_t bat_offset; >> @@ -457,7 +578,11 @@ static int64_t alloc_block(BlockDriverState* bs, >> int64_t sector_num) >> s->pagetable[index] = s->free_data_block_offset / 512; >> >> // Initialize the block's bitmap >> - memset(bitmap, 0xff, s->bitmap_size); >> + if (diff) { >> + memset(bitmap, 0x0, s->bitmap_size); >> + } else { >> + memset(bitmap, 0xff, s->bitmap_size); >> + } >> ret = bdrv_pwrite_sync(bs->file, s->free_data_block_offset, bitmap, >> s->bitmap_size); >> if (ret < 0) { >> @@ -477,7 +602,7 @@ static int64_t alloc_block(BlockDriverState* bs, >> int64_t sector_num) >> if (ret < 0) >> goto fail; >> >> - return get_sector_offset(bs, sector_num, 0); >> + return get_sector_offset(bs, sector_num, false, diff); >> >> fail: >> s->free_data_block_offset -= (s->block_size + s->bitmap_size); >> @@ -501,36 +626,66 @@ static int vpc_read(BlockDriverState *bs, int64_t >> sector_num, >> uint8_t *buf, int nb_sectors) >> { >> BDRVVPCState *s = bs->opaque; >> - int ret; >> - int64_t offset; >> - int64_t sectors, sectors_per_block; >> VHDFooter *footer = (VHDFooter *) s->footer_buf; >> + int64_t sectors_per_block = s->block_size >> BDRV_SECTOR_BITS; >> + int64_t offset, sectors; >> + int ret; >> >> - if (be32_to_cpu(footer->type) == VHD_FIXED) { >> + switch (be32_to_cpu(footer->type)) { >> + case VHD_FIXED: >> return bdrv_read(bs->file, sector_num, buf, nb_sectors); >> - } >> - while (nb_sectors > 0) { >> - offset = get_sector_offset(bs, sector_num, 0); >> - >> - sectors_per_block = s->block_size >> BDRV_SECTOR_BITS; >> - sectors = sectors_per_block - (sector_num % sectors_per_block); >> - if (sectors > nb_sectors) { >> - sectors = nb_sectors; >> - } >> + case VHD_DYNAMIC: >> + while (nb_sectors > 0) { >> + sectors = sectors_per_block - (sector_num % >> sectors_per_block); >> + if (sectors > nb_sectors) { >> + sectors = nb_sectors; >> + } >> >> - if (offset == -1) { >> - memset(buf, 0, sectors * BDRV_SECTOR_SIZE); >> - } else { >> - ret = bdrv_pread(bs->file, offset, buf, >> - sectors * BDRV_SECTOR_SIZE); >> - if (ret != sectors * BDRV_SECTOR_SIZE) { >> + offset = get_sector_offset(bs, sector_num, false, false); >> + if (offset == -1) { >> return -1; >> + } else if (offset == -2) { >> + memset(buf, 0, sectors * BDRV_SECTOR_SIZE); >> + } else { >> + ret = bdrv_pread(bs->file, offset, buf, >> + sectors * BDRV_SECTOR_SIZE); >> + if (ret != sectors * BDRV_SECTOR_SIZE) { >> + return -1; >> + } >> } >> + >> + nb_sectors -= sectors; >> + sector_num += sectors; >> + buf += sectors * BDRV_SECTOR_SIZE; >> } >> + break; >> + case VHD_DIFF: >> + while (nb_sectors > 0) { >> + offset = get_sector_offset(bs, sector_num, false, true); >> + if (offset == -1) { >> + return -1; >> + } else if (offset == -2) { >> + memset(buf, 0, BDRV_SECTOR_SIZE); >> + } else if (offset == -3) { >> + ret = bdrv_pread(bs->backing_hd, sector_num << >> BDRV_SECTOR_BITS >> + , buf, BDRV_SECTOR_SIZE); >> + if (ret < 0) { >> + return -1; >> + } >> + } else { >> + ret = bdrv_pread(bs->file, offset, buf, >> BDRV_SECTOR_SIZE); >> + if (ret != BDRV_SECTOR_SIZE) { >> + return -1; >> + } >> + } >> >> - nb_sectors -= sectors; >> - sector_num += sectors; >> - buf += sectors * BDRV_SECTOR_SIZE; >> + nb_sectors--; >> + sector_num++; >> + buf += BDRV_SECTOR_SIZE; >> + } >> + break; >> + default: >> + return -1; >> } >> return 0; >> } >> @@ -546,44 +701,101 @@ static coroutine_fn int >> vpc_co_read(BlockDriverState *bs, int64_t sector_num, >> return ret; >> } >> >> -static int vpc_write(BlockDriverState *bs, int64_t sector_num, >> - const uint8_t *buf, int nb_sectors) >> +static inline int64_t write_bitmap(BlockDriverState *bs, int64_t >> sector_num, >> + int64_t sectors) >> { >> BDRVVPCState *s = bs->opaque; >> - int64_t offset; >> - int64_t sectors, sectors_per_block; >> + uint64_t offset = sector_num << BDRV_SECTOR_BITS; >> + uint64_t bitmap_offset; >> + uint32_t pagetable_index, pageentry_index; >> + uint8_t bitmap[s->bitmap_size]; >> + uint32_t bitmap_index, bitmapbit_index; >> + int i; >> int ret; >> - VHDFooter *footer = (VHDFooter *) s->footer_buf; >> >> - if (be32_to_cpu(footer->type) == VHD_FIXED) { >> - return bdrv_write(bs->file, sector_num, buf, nb_sectors); >> + pagetable_index = offset / s->block_size; >> + pageentry_index = (offset % s->block_size) / 512; >> + bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index]; >> + >> + if (bitmap_offset > s->max_table_entries * s->block_size) { >> + return -1; >> + } >> + ret = bdrv_pread(bs->file, bitmap_offset, bitmap, s->bitmap_size); >> + if (ret < 0) { >> + return -1; >> } >> - while (nb_sectors > 0) { >> - offset = get_sector_offset(bs, sector_num, 1); >> >> - sectors_per_block = s->block_size >> BDRV_SECTOR_BITS; >> - sectors = sectors_per_block - (sector_num % sectors_per_block); >> - if (sectors > nb_sectors) { >> - sectors = nb_sectors; >> + for (i = 0; i < sectors; i++) { >> + bitmap_index = pageentry_index / 8; >> + bitmapbit_index = 7 - pageentry_index % 8; >> + bitmap[bitmap_index] |= (0x1 << bitmapbit_index); >> + pageentry_index++; >> + } >> + ret = bdrv_pwrite(bs->file, bitmap_offset, bitmap, s->bitmap_size); >> + if (ret < 0) { >> + return -1; >> + } >> + >> + return 0; >> +} >> + >> +static int vpc_write(BlockDriverState *bs, int64_t sector_num, >> + const uint8_t *buf, int nb_sectors) >> +{ >> + BDRVVPCState *s = bs->opaque; >> + VHDFooter *footer = (VHDFooter *) s->footer_buf; >> + int64_t sectors_per_block = s->block_size >> BDRV_SECTOR_BITS; >> + int64_t offset, sectors; >> + bool diff = true; >> + int ret = 0; >> + >> + switch (be32_to_cpu(footer->type)) { >> + case VHD_FIXED: >> + return bdrv_write(bs->file, sector_num, buf, nb_sectors); >> + case VHD_DYNAMIC: >> + case VHD_DIFF: >> + if (be32_to_cpu(footer->type) == VHD_DYNAMIC) { >> + diff = false; >> } >> >> - if (offset == -1) { >> - offset = alloc_block(bs, sector_num); >> - if (offset < 0) >> + while (nb_sectors > 0) { >> + sectors = sectors_per_block - (sector_num % >> sectors_per_block); >> + if (sectors > nb_sectors) { >> + sectors = nb_sectors; >> + } >> + >> + offset = get_sector_offset(bs, sector_num, true, diff); >> + if (offset == -1) { >> return -1; >> - } >> + } else if (offset == -2) { >> + offset = alloc_block(bs, sector_num, diff); >> + if (offset < 0) { >> + return -1; >> + } >> + } >> >> - ret = bdrv_pwrite(bs->file, offset, buf, sectors * >> BDRV_SECTOR_SIZE); >> - if (ret != sectors * BDRV_SECTOR_SIZE) { >> - return -1; >> - } >> + ret = bdrv_pwrite(bs->file, offset, buf, >> + sectors * BDRV_SECTOR_SIZE); >> + if (ret != sectors * BDRV_SECTOR_SIZE) { >> + return -1; >> + } >> >> - nb_sectors -= sectors; >> - sector_num += sectors; >> - buf += sectors * BDRV_SECTOR_SIZE; >> - } >> + if (diff) { >> + ret = write_bitmap(bs, sector_num, sectors); >> + if (ret < 0) { >> + return -1; >> + } >> + } >> >> - return 0; >> + nb_sectors -= sectors; >> + sector_num += sectors; >> + buf += sectors * BDRV_SECTOR_SIZE; >> + } >> + break; >> + default: >> + return -1; >> + } >> + return ret; >> } >> >> static coroutine_fn int vpc_co_write(BlockDriverState *bs, int64_t >> sector_num, >> @@ -910,6 +1122,7 @@ static BlockDriver bdrv_vpc = { >> .bdrv_close = vpc_close, >> .bdrv_reopen_prepare = vpc_reopen_prepare, >> .bdrv_create = vpc_create, >> + .supports_backing = true, >> >> .bdrv_read = vpc_co_read, >> .bdrv_write = vpc_co_write, >> diff --git a/include/block/block_int.h b/include/block/block_int.h >> index 8a61215..aab3ae8 100644 >> --- a/include/block/block_int.h >> +++ b/include/block/block_int.h >> @@ -335,8 +335,10 @@ struct BlockDriverState { >> QLIST_HEAD(, BdrvAioNotifier) aio_notifiers; >> >> char filename[1024]; >> - char backing_file[1024]; /* if non zero, the image is a diff of >> - this file image */ >> + >> +#define PARENT_MAX_LOCATOR 512 >> + char backing_file[PARENT_MAX_LOCATOR]; /* if non zero, the is a diff >> + of this file image */ >> char backing_format[16]; /* if non-zero and backing_file exists */ >> >> QDict *full_open_options; >> >> >