I re-sent the patch and use bdrv_co_flush now, thanks. 2011/10/27 shu ming <shum...@linux.vnet.ibm.com>: > Looks good to me. A nit, it seems that bdrv_flush is not supported anymore > in upstream. > bdrv_co_flush should be used instead if you update your workspace to latest > one. > On 2011-10-26 18:08, Robert Wang wrote: >> >> Please find version 4 in the attachment. >> >> 2011/10/23 shu ming<shum...@linux.vnet.ibm.com>: >>> >>> On 2011-10-13 0:23, Dong Xu Wang wrote: >>>> >>>> Add add-cow file format >>>> >>>> Signed-off-by: Dong Xu Wang<wdon...@linux.vnet.ibm.com> >>>> --- >>>> Makefile.objs | 1 + >>>> block.c | 2 +- >>>> block.h | 1 + >>>> block/add-cow.c | 412 >>>> ++++++++++++++++++++++++++++++++++++++++++++++++ >>>> block_int.h | 1 + >>>> docs/specs/add-cow.txt | 45 ++++++ >>>> 6 files changed, 461 insertions(+), 1 deletions(-) >>>> create mode 100644 block/add-cow.c >>>> create mode 100644 docs/specs/add-cow.txt >>>> >>>> diff --git a/Makefile.objs b/Makefile.objs >>>> index c849e51..624c04c 100644 >>>> --- a/Makefile.objs >>>> +++ b/Makefile.objs >>>> @@ -31,6 +31,7 @@ block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o >>>> >>>> block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o >>>> vpc.o vvfat.o >>>> block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o >>>> qcow2-snapshot.o qcow2-cache.o >>>> +block-nested-y += add-cow.o >>>> block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o >>>> qed-cluster.o >>>> block-nested-y += qed-check.o >>>> block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o >>>> diff --git a/block.c b/block.c >>>> index e865fab..c25241d 100644 >>>> --- a/block.c >>>> +++ b/block.c >>>> @@ -106,7 +106,7 @@ int is_windows_drive(const char *filename) >>>> #endif >>>> >>>> /* check if the path starts with "<protocol>:" */ >>>> -static int path_has_protocol(const char *path) >>>> +int path_has_protocol(const char *path) >>>> { >>>> #ifdef _WIN32 >>>> if (is_windows_drive(path) || >>>> diff --git a/block.h b/block.h >>>> index 16bfa0a..8b09f12 100644 >>>> --- a/block.h >>>> +++ b/block.h >>>> @@ -256,6 +256,7 @@ char *bdrv_snapshot_dump(char *buf, int buf_size, >>>> QEMUSnapshotInfo *sn); >>>> >>>> char *get_human_readable_size(char *buf, int buf_size, int64_t size); >>>> int path_is_absolute(const char *path); >>>> +int path_has_protocol(const char *path); >>>> void path_combine(char *dest, int dest_size, >>>> const char *base_path, >>>> const char *filename); >>>> diff --git a/block/add-cow.c b/block/add-cow.c >>>> new file mode 100644 >>>> index 0000000..d2538a2 >>>> --- /dev/null >>>> +++ b/block/add-cow.c >>>> @@ -0,0 +1,412 @@ >>>> +#include "qemu-common.h" >>>> +#include "block_int.h" >>>> +#include "module.h" >>>> + >>>> +#define ADD_COW_MAGIC (((uint64_t)'A'<< 56) | ((uint64_t)'D'<< >>>> 48) | \ >>>> + ((uint64_t)'D'<< 40) | ((uint64_t)'_'<< >>>> 32) | \ >>>> + ((uint64_t)'C'<< 24) | ((uint64_t)'O'<< >>>> 16) | \ >>>> + ((uint64_t)'W'<< 8) | 0xFF) >>>> +#define ADD_COW_VERSION 1 >>>> + >>>> +typedef struct AddCowHeader { >>>> + uint64_t magic; >>>> + uint32_t version; >>>> + char backing_file[1024]; >>>> + char image_file[1024]; >>> >>> 1024 is a magic number for me. Can we have a meaningful macro? >>> >>> >>>> + uint64_t size; >>>> +} QEMU_PACKED AddCowHeader; >>>> + >>>> +typedef struct BDRVAddCowState { >>>> + char image_file[1024]; >>>> + BlockDriverState *image_hd; >>>> + uint8_t *bitmap; >>>> + uint64_t bitmap_size; >>>> +} BDRVAddCowState; >>>> + >>>> +static int add_cow_probe(const uint8_t *buf, int buf_size, const char >>>> *filename) >>>> +{ >>>> + const AddCowHeader *header = (const void *)buf; >>>> + >>>> + if (be64_to_cpu(header->magic) == ADD_COW_MAGIC&& >>>> + be32_to_cpu(header->version) == ADD_COW_VERSION) { >>>> + return 100; >>>> + } else { >>>> + return 0; >>>> + } >>>> +} >>>> + >>>> +static int add_cow_open(BlockDriverState *bs, int flags) >>>> +{ >>>> + AddCowHeader header; >>>> + int64_t size; >>>> + char image_filename[1024]; >>>> + int image_flags; >>>> + BlockDriver *image_drv = NULL; >>>> + int ret; >>>> + BDRVAddCowState *state = (BDRVAddCowState *)(bs->opaque); >>>> + >>>> + ret = bdrv_pread(bs->file, 0,&header, sizeof(header)); >>>> + if (ret != sizeof(header)) { >>>> + goto fail; >>>> + } >>>> + >>>> + if (be64_to_cpu(header.magic) != ADD_COW_MAGIC || >>>> + be32_to_cpu(header.version) != ADD_COW_VERSION) { >>>> + ret = -1; >>>> + goto fail; >>>> + } >>>> + >>>> + size = be64_to_cpu(header.size); >>>> + bs->total_sectors = size / BDRV_SECTOR_SIZE; >>>> + >>>> + QEMU_BUILD_BUG_ON(sizeof(state->image_file) != >>>> sizeof(header.image_file)); >>>> + pstrcpy(bs->backing_file, sizeof(bs->backing_file), >>>> + header.backing_file); >>>> + pstrcpy(state->image_file, sizeof(state->image_file), >>>> + header.image_file); >>>> + >>>> + state->bitmap_size = ((bs->total_sectors + 7)>> 3); >>>> + state->bitmap = g_malloc0(state->bitmap_size); >>>> + >>>> + ret = bdrv_pread(bs->file, sizeof(header), state->bitmap, >>>> + state->bitmap_size); >>>> + if (ret != state->bitmap_size) { >>>> + goto fail; >>>> + } >>>> + /* If there is a image_file, must be together with backing_file */ >>>> + if (state->image_file[0] != '\0') { >>>> + state->image_hd = bdrv_new(""); >>>> + /* Relative to image or working dir, need discussion */ >>>> + if (path_has_protocol(state->image_file)) { >>>> + pstrcpy(image_filename, sizeof(image_filename), >>>> + state->image_file); >>>> + } else { >>>> + path_combine(image_filename, sizeof(image_filename), >>>> + bs->filename, state->image_file); >>>> + } >>>> + >>>> + image_drv = bdrv_find_format("raw"); >>>> + image_flags = >>>> + (flags& (~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING))) | >>>> BDRV_O_RDWR; >>>> + state->image_hd->keep_read_only = 0; >>>> + >>>> + ret = bdrv_open(state->image_hd, image_filename, image_flags, >>>> + image_drv); >>>> + if (ret< 0) { >>>> + goto fail; >>>> + } >>>> + } else { >>>> + ret = -ENOENT; >>>> + goto fail; >>>> + } >>>> + return 0; >>>> + fail: >>>> + if (state->bitmap) { >>>> + g_free(state->bitmap); >>>> + state->bitmap = NULL; >>>> + } >>>> + return ret; >>>> +} >>>> + >>>> +static inline void add_cow_set_bit(BlockDriverState *bs, int64_t >>>> bitnum) >>>> +{ >>>> + uint64_t offset = bitnum / 8; >>>> + BDRVAddCowState *state = (BDRVAddCowState *)(bs->opaque); >>>> + state->bitmap[offset] |= (1<< (bitnum % 8)); >>>> +} >>>> + >>>> +static inline int is_bit_set(BlockDriverState *bs, int64_t bitnum) >>>> +{ >>>> + BDRVAddCowState *state = (BDRVAddCowState *)(bs->opaque); >>>> + uint64_t offset = bitnum / 8; >>>> + return !!(state->bitmap[offset]& (1<< (bitnum % 8))); >>>> +} >>>> + >>>> +static int add_cow_is_allocated(BlockDriverState *bs, int64_t >>>> sector_num, >>>> + int nb_sectors, int *num_same) >>>> +{ >>>> + int changed; >>>> + uint64_t bitmap_size = ((BDRVAddCowState >>>> *)(bs->opaque))->bitmap_size; >>>> + >>>> + /* Beyond the end of bitmap, return error or read from >>>> backing_file? >>>> */ >>>> + if (((sector_num + nb_sectors + 7) / 8)> bitmap_size) { >>>> + return 0; >>>> + } >>>> + >>>> + if (nb_sectors == 0) { >>>> + *num_same = nb_sectors; >>>> + return 0; >>>> + } >>>> + >>>> + changed = is_bit_set(bs, sector_num); >>>> + if (changed< 0) { >>>> + return 0; >>>> + } >>>> + >>>> + for (*num_same = 1; *num_same< nb_sectors; (*num_same)++) { >>>> + if (is_bit_set(bs, sector_num + *num_same) != changed) { >>>> + break; >>>> + } >>>> + } >>>> + >>>> + return changed; >>>> +} >>>> + >>>> +static int add_cow_update_bitmap(BlockDriverState *bs, int64_t >>>> sector_num, >>>> + int nb_sectors) >>>> +{ >>>> + int i, ret = 0; >>>> + BDRVAddCowState *state = (BDRVAddCowState *)(bs->opaque); >>>> + uint64_t start_pos = sector_num / 8; >>>> + uint64_t end_pos = (sector_num + nb_sectors - 1) / 8; >>>> + >>>> + if (start_pos> state->bitmap_size) { >>>> + return -1; >>>> + } >>>> + >>>> + for (i = 0; i< nb_sectors; i++) { >>>> + add_cow_set_bit(bs, sector_num + i); >>>> + } >>>> + ret = bdrv_pwrite_sync(bs->file, >>>> + sizeof(AddCowHeader) + start_pos, >>>> + state->bitmap + start_pos, >>>> + MIN(((end_pos - start_pos)& (~512)) + 512, >>>> state->bitmap_size - >>>> start_pos)); >>>> + return ret; >>>> +} >>>> + >>>> +static void add_cow_close(BlockDriverState *bs) >>>> +{ >>>> + BDRVAddCowState *state = (BDRVAddCowState *)(bs->opaque); >>>> + if (state->bitmap) { >>>> + g_free(state->bitmap); >>>> + state->bitmap = NULL; >>>> + } >>>> +} >>>> + >>>> +static int add_cow_create(const char *filename, QEMUOptionParameter >>>> *options) >>>> +{ >>>> + AddCowHeader header; >>>> + int64_t image_sectors = 0; >>>> + const char *backing_filename = NULL; >>>> + const char *image_filename = NULL; >>>> + int ret; >>>> + BlockDriverState *bs, *image_bs = NULL; >>>> + >>>> + while (options&& options->name) { >>>> + if (!strcmp(options->name, BLOCK_OPT_SIZE)) { >>>> + image_sectors = options->value.n / BDRV_SECTOR_SIZE; >>>> + } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) { >>>> + backing_filename = options->value.s; >>>> + } else if (!strcmp(options->name, BLOCK_OPT_IMAGE_FILE)) { >>>> + image_filename = options->value.s; >>>> + } >>>> + options++; >>>> + } >>>> + >>>> + if (!backing_filename || !image_filename) { >>>> + return -EINVAL; >>>> + } >>>> + /* Make sure image file exists */ >>>> + ret = bdrv_file_open(&image_bs, image_filename, BDRV_O_RDWR >>>> + | BDRV_O_CACHE_WB); >>>> + if (ret< 0) { >>>> + return ret; >>>> + } >>>> + bdrv_close(image_bs); >>>> + ret = bdrv_create_file(filename, NULL); >>>> + if (ret< 0) { >>>> + return ret; >>>> + } >>>> + >>>> + ret = bdrv_file_open(&bs, filename, BDRV_O_RDWR); >>>> + if (ret< 0) { >>>> + return ret; >>>> + } >>>> + >>>> + memset(&header, 0, sizeof(header)); >>>> + header.magic = cpu_to_be64(ADD_COW_MAGIC); >>>> + header.version = cpu_to_be32(ADD_COW_VERSION); >>>> + pstrcpy(header.backing_file, sizeof(header.backing_file), >>>> backing_filename); >>>> + pstrcpy(header.image_file, sizeof(header.image_file), >>>> image_filename); >>>> + header.size = cpu_to_be64(image_sectors * BDRV_SECTOR_SIZE); >>>> + >>>> + ret = bdrv_pwrite(bs, 0,&header, sizeof(header)); >>>> + if (ret< 0) { >>>> + bdrv_close(bs); >>>> + return ret; >>>> + } >>>> + bdrv_close(bs); >>>> + >>>> + BlockDriver *drv = bdrv_find_format("add-cow"); >>>> + assert(drv != NULL); >>>> + ret = bdrv_open(bs, filename, BDRV_O_RDWR | BDRV_O_NO_FLUSH, drv); >>>> + if (ret< 0) { >>>> + return ret; >>>> + } >>>> + >>>> + ret = bdrv_truncate(bs, image_sectors * BDRV_SECTOR_SIZE); >>>> +fail: >>> >>> "fail" label is not used in this function. >>> >>> >>>> + bdrv_close(bs); >>>> + return ret; >>>> +} >>>> + >>>> +static int add_cow_co_readv(BlockDriverState *bs, int64_t sector_num, >>>> + int remaining_sectors, QEMUIOVector *qiov) >>>> +{ >>>> + BDRVAddCowState *s = bs->opaque; >>>> + int cur_nr_sectors; >>>> + uint64_t bytes_done = 0; >>>> + QEMUIOVector hd_qiov; >>>> + int n, ret = 0; >>>> + >>>> + qemu_iovec_init(&hd_qiov, qiov->niov); >>>> + while (remaining_sectors != 0) { >>>> + cur_nr_sectors = remaining_sectors; >>>> + if (add_cow_is_allocated(bs, sector_num, cur_nr_sectors,&n)) { >>>> + cur_nr_sectors = n; >>>> + qemu_iovec_reset(&hd_qiov); >>>> + qemu_iovec_copy(&hd_qiov, qiov, bytes_done, >>>> + cur_nr_sectors * BDRV_SECTOR_SIZE); >>>> + ret = bdrv_co_readv(s->image_hd, sector_num, n,&hd_qiov); >>>> + if (ret< 0) { >>>> + goto fail; >>>> + } >>>> + } else { >>>> + cur_nr_sectors = n; >>>> + if (bs->backing_hd) { >>>> + qemu_iovec_reset(&hd_qiov); >>>> + qemu_iovec_copy(&hd_qiov, qiov, bytes_done, >>>> + cur_nr_sectors * BDRV_SECTOR_SIZE); >>>> + ret = bdrv_co_readv(bs->backing_hd, sector_num, >>>> + n,&hd_qiov); >>>> + if (ret< 0) { >>>> + goto fail; >>>> + } >>>> + } else { >>>> + qemu_iovec_reset(&hd_qiov); >>>> + qemu_iovec_memset(&hd_qiov, 0, >>>> + BDRV_SECTOR_SIZE * cur_nr_sectors); >>>> + } >>>> + } >>>> + remaining_sectors -= cur_nr_sectors; >>>> + sector_num += cur_nr_sectors; >>>> + bytes_done += cur_nr_sectors * BDRV_SECTOR_SIZE; >>>> + } >>>> +fail: >>>> + qemu_iovec_destroy(&hd_qiov); >>>> + return ret; >>>> +} >>>> + >>>> +static int add_cow_co_writev(BlockDriverState *bs, int64_t sector_num, >>>> + int remaining_sectors, QEMUIOVector *qiov) >>>> +{ >>>> + BDRVAddCowState *s = bs->opaque; >>>> + int ret = 0; >>>> + QEMUIOVector hd_qiov; >>>> + qemu_iovec_init(&hd_qiov, qiov->niov); >>>> + >>> >>> It looks like that the memory where "bs" is pointing to should be >>> protected >>> by a lock. Because "bs" >>> was passed to several BlockDriver entries like ".bdrv_co_writev', >>> 'bdrv_co_readv'&etc which can be executed >>> in parallel to access the memory where "bs" is pointing to. >>> >>>> + qemu_iovec_reset(&hd_qiov); >>>> + qemu_iovec_copy(&hd_qiov, qiov, 0, remaining_sectors * >>>> BDRV_SECTOR_SIZE); >>>> + ret = bdrv_co_writev(s->image_hd, >>>> + sector_num, >>>> + remaining_sectors,&hd_qiov); >>>> + if (ret< 0) { >>>> + goto fail; >>>> + } >>>> + >>>> + ret = add_cow_update_bitmap(bs, sector_num, remaining_sectors); >>>> + if (ret< 0) { >>>> + goto fail; >>>> + } >>>> +fail: >>>> + qemu_iovec_destroy(&hd_qiov); >>>> + return ret; >>>> +} >>>> + >>>> +static int bdrv_add_cow_truncate(BlockDriverState *bs, int64_t offset) >>>> +{ >>>> + int ret = 0; >>>> + int64_t image_sectors = offset/BDRV_SECTOR_SIZE; >>>> + int64_t be_offset = cpu_to_be64(offset); >>>> + BDRVAddCowState *state = bs->opaque; >>>> + ret = bdrv_truncate(bs->file, ((image_sectors + 7)>> 3) >>>> + + sizeof(AddCowHeader)); >>>> + if (ret< 0) { >>>> + return ret; >>>> + } >>>> + >>>> + ret = bdrv_pwrite_sync(bs->file, offsetof(AddCowHeader, size), >>>> +&be_offset, sizeof(uint64_t)); >>>> + if (ret< 0) { >>>> + return ret; >>>> + } >>>> + >>>> + ret = bdrv_truncate(state->image_hd, offset); >>>> + return ret; >>>> +} >>>> + >>>> +static QEMUOptionParameter add_cow_create_options[] = { >>>> + { >>>> + .name = BLOCK_OPT_SIZE, >>>> + .type = OPT_SIZE, >>>> + .help = "Virtual disk size" >>>> + }, >>>> + { >>>> + .name = BLOCK_OPT_BACKING_FILE, >>>> + .type = OPT_STRING, >>>> + .help = "File name of a base image" >>>> + }, >>>> + { >>>> + .name = BLOCK_OPT_IMAGE_FILE, >>>> + .type = OPT_STRING, >>>> + .help = "File name of a image file" >>>> + }, >>>> + { NULL } >>>> +}; >>>> + >>>> +static int add_cow_flush(BlockDriverState *bs) >>>> +{ >>>> + BDRVAddCowState *state = bs->opaque; >>>> + int ret = bdrv_flush(state->image_hd); >>>> + if (ret< 0) { >>>> + return ret; >>>> + } >>>> + return bdrv_flush(bs->file); >>>> +} >>>> + >>>> +static BlockDriverAIOCB *add_cow_aio_flush(BlockDriverState *bs, >>>> + BlockDriverCompletionFunc *cb, void *opaque) >>>> +{ >>>> + BDRVAddCowState *state = bs->opaque; >>>> + int ret = bdrv_flush(state->image_hd); >>>> + if (ret< 0) { >>>> + return NULL; >>>> + } >>>> + >>>> + return bdrv_aio_flush(bs->file, cb, opaque); >>>> +} >>>> + >>>> +static BlockDriver bdrv_add_cow = { >>>> + .format_name = "add-cow", >>>> + .instance_size = sizeof(BDRVAddCowState), >>>> + .bdrv_probe = add_cow_probe, >>>> + .bdrv_open = add_cow_open, >>>> + .bdrv_close = add_cow_close, >>>> + .bdrv_create = add_cow_create, >>>> + .bdrv_is_allocated = add_cow_is_allocated, >>>> + >>>> + .bdrv_co_readv = add_cow_co_readv, >>>> + .bdrv_co_writev = add_cow_co_writev, >>>> + .bdrv_truncate = bdrv_add_cow_truncate, >>>> + >>>> + .create_options = add_cow_create_options, >>>> + .bdrv_flush = add_cow_flush, >>>> + .bdrv_aio_flush = add_cow_aio_flush, >>>> +}; >>>> + >>>> +static void bdrv_add_cow_init(void) >>>> +{ >>>> + bdrv_register(&bdrv_add_cow); >>>> +} >>>> + >>>> +block_init(bdrv_add_cow_init); >>>> diff --git a/block_int.h b/block_int.h >>>> index 8c3b863..2b26fdc 100644 >>>> --- a/block_int.h >>>> +++ b/block_int.h >>>> @@ -42,6 +42,7 @@ >>>> #define BLOCK_OPT_TABLE_SIZE "table_size" >>>> #define BLOCK_OPT_PREALLOC "preallocation" >>>> #define BLOCK_OPT_SUBFMT "subformat" >>>> +#define BLOCK_OPT_IMAGE_FILE "image_file" >>>> >>>> typedef struct AIOPool { >>>> void (*cancel)(BlockDriverAIOCB *acb); >>>> diff --git a/docs/specs/add-cow.txt b/docs/specs/add-cow.txt >>>> new file mode 100644 >>>> index 0000000..0443c6c >>>> --- /dev/null >>>> +++ b/docs/specs/add-cow.txt >>>> @@ -0,0 +1,45 @@ >>>> +== General == >>>> + >>>> +Raw file format does not support backing_file and copy on write >>>> feature. >>>> Then >>>> +you can use add-cow file realize these features. >>>> + >>>> +When using add-cow, procedures may like this: >>>> +(ubuntu.img is a disk image which has been installed OS.) >>>> + 1) Create a raw image with the same size of ubuntu.img >>>> + qemu-img create -f raw test.raw 8G >>>> + 2) Create a add-cow image which will store dirty bitmap >>>> + qemu-img create -f add-cow test.add-cow -o >>>> backing_file=ubuntu.img,image_file=test.raw >>>> + 3) Run qemu with add-cow image >>>> + qemu -drive if=virtio,file=test.add-cow >>>> + >>>> +=Specification= >>>> + >>>> +The file format looks like this: >>>> + >>>> + +----------+----------+----------+-----+ >>>> + | Header | Data | Data | ... | >>>> + +----------+----------+----------+-----+ >>>> + >>>> + All numbers in add-cow are stored in Big Endian byte order. >>>> + >>>> + >>>> +== Header == >>>> + >>>> +The Header is included in the first bytes: >>>> + >>>> + Byte 0 - 7: magic >>>> + add-cow magic string ("ADD_COW\xff") >>>> + >>>> + 8 - 11: version >>>> + Version number (only valid value is 1 now) >>>> + >>>> + 12 - 1035: backing_file >>>> + backing_file file name related to add-cow file. >>>> While >>>> + using backing_file, must together with >>>> image_file. >>>> + >>>> + 1036 - 2059: image_file >>>> + image_file is a raw file, While using >>>> image_file, >>>> must >>>> + together with image_file. >>>> + >>>> + 2060 - 2067: size >>>> + Virtual disk size of image_file in bytes. >>> >>> >>> >> >> > > >
-- Regards Robert Wang