This patch is part of the Fast Virtual Disk (FVD) proposal. See http://wiki.qemu.org/Features/FVD.
This patch adds the skeleton of the block device driver for Fast Virtual Disk (FVD). Signed-off-by: Chunqiang Tang <ct...@us.ibm.com> --- Makefile.objs | 2 +- block/fvd-create.c | 21 +++++++ block/fvd-flush.c | 24 +++++++ block/fvd-misc.c | 37 +++++++++++ block/fvd-open.c | 17 +++++ block/fvd-read.c | 21 +++++++ block/fvd-update.c | 21 +++++++ block/fvd-write.c | 21 +++++++ block/fvd.c | 60 ++++++++++++++++++ block/fvd.h | 171 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 files changed, 394 insertions(+), 1 deletions(-) create mode 100644 block/fvd-create.c create mode 100644 block/fvd-flush.c create mode 100644 block/fvd-misc.c create mode 100644 block/fvd-open.c create mode 100644 block/fvd-read.c create mode 100644 block/fvd-update.c create mode 100644 block/fvd-write.c create mode 100644 block/fvd.c create mode 100644 block/fvd.h diff --git a/Makefile.objs b/Makefile.objs index 264aab3..9185d3e 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -23,7 +23,7 @@ block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o block-nested-y += qed-check.o block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o -block-nested-y += blksim.o +block-nested-y += blksim.o fvd.o block-nested-$(CONFIG_WIN32) += raw-win32.o block-nested-$(CONFIG_POSIX) += raw-posix.o block-nested-$(CONFIG_CURL) += curl.o diff --git a/block/fvd-create.c b/block/fvd-create.c new file mode 100644 index 0000000..5593cea --- /dev/null +++ b/block/fvd-create.c @@ -0,0 +1,21 @@ +/* + * QEMU Fast Virtual Disk Format bdrv_create() + * + * Copyright IBM, Corp. 2010 + * + * Authors: + * Chunqiang Tang <ct...@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +static int fvd_create(const char *filename, QEMUOptionParameter * options) +{ + return -ENOTSUP; +} + +static QEMUOptionParameter fvd_create_options[] = { + {NULL} +}; diff --git a/block/fvd-flush.c b/block/fvd-flush.c new file mode 100644 index 0000000..34bd5cb --- /dev/null +++ b/block/fvd-flush.c @@ -0,0 +1,24 @@ +/* + * QEMU Fast Virtual Disk Format bdrv_flush() and bdrv_aio_flush() + * + * Copyright IBM, Corp. 2010 + * + * Authors: + * Chunqiang Tang <ct...@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +static BlockDriverAIOCB *fvd_aio_flush(BlockDriverState * bs, + BlockDriverCompletionFunc * cb, + void *opaque) +{ + return NULL; +} + +static int fvd_flush(BlockDriverState * bs) +{ + return -ENOTSUP; +} diff --git a/block/fvd-misc.c b/block/fvd-misc.c new file mode 100644 index 0000000..f4e1038 --- /dev/null +++ b/block/fvd-misc.c @@ -0,0 +1,37 @@ +/* + * QEMU Fast Virtual Disk Format Misc Functions of BlockDriver Interface + * + * Copyright IBM, Corp. 2010 + * + * Authors: + * Chunqiang Tang <ct...@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +static void fvd_close(BlockDriverState * bs) +{ +} + +static int fvd_probe(const uint8_t * buf, int buf_size, const char *filename) +{ + return 0; +} + +static int fvd_is_allocated(BlockDriverState * bs, int64_t sector_num, + int nb_sectors, int *pnum) +{ + return 0; +} + +static int fvd_get_info(BlockDriverState * bs, BlockDriverInfo * bdi) +{ + return -ENOTSUP; +} + +static int fvd_has_zero_init(BlockDriverState * bs) +{ + return 0; +} diff --git a/block/fvd-open.c b/block/fvd-open.c new file mode 100644 index 0000000..056b994 --- /dev/null +++ b/block/fvd-open.c @@ -0,0 +1,17 @@ +/* + * QEMU Fast Virtual Disk Format bdrv_file_open() + * + * Copyright IBM, Corp. 2010 + * + * Authors: + * Chunqiang Tang <ct...@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +static int fvd_open(BlockDriverState * bs, const char *filename, int flags) +{ + return -ENOTSUP; +} diff --git a/block/fvd-read.c b/block/fvd-read.c new file mode 100644 index 0000000..b9f3ac9 --- /dev/null +++ b/block/fvd-read.c @@ -0,0 +1,21 @@ +/* + * QEMU Fast Virtual Disk Format bdrv_aio_readv() + * + * Copyright IBM, Corp. 2010 + * + * Authors: + * Chunqiang Tang <ct...@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +static BlockDriverAIOCB *fvd_aio_readv(BlockDriverState * bs, + int64_t sector_num, QEMUIOVector * qiov, + int nb_sectors, + BlockDriverCompletionFunc * cb, + void *opaque) +{ + return NULL; +} diff --git a/block/fvd-update.c b/block/fvd-update.c new file mode 100644 index 0000000..2498618 --- /dev/null +++ b/block/fvd-update.c @@ -0,0 +1,21 @@ +/* + * QEMU Fast Virtual Disk Format bdrv_update + * + * Copyright IBM, Corp. 2010 + * + * Authors: + * Chunqiang Tang <ct...@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +static int fvd_update(BlockDriverState * bs, QEMUOptionParameter * options) +{ + return -ENOTSUP; +} + +static QEMUOptionParameter fvd_update_options[] = { + {NULL} +}; diff --git a/block/fvd-write.c b/block/fvd-write.c new file mode 100644 index 0000000..a736a37 --- /dev/null +++ b/block/fvd-write.c @@ -0,0 +1,21 @@ +/* + * QEMU Fast Virtual Disk Format bdrv_aio_writev() + * + * Copyright IBM, Corp. 2010 + * + * Authors: + * Chunqiang Tang <ct...@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +static BlockDriverAIOCB *fvd_aio_writev(BlockDriverState * bs, + int64_t sector_num, + QEMUIOVector * qiov, int nb_sectors, + BlockDriverCompletionFunc * cb, + void *opaque) +{ + return NULL; +} diff --git a/block/fvd.c b/block/fvd.c new file mode 100644 index 0000000..bc2645c --- /dev/null +++ b/block/fvd.c @@ -0,0 +1,60 @@ +/* + * QEMU Fast Virtual Disk Format + * + * Copyright IBM, Corp. 2010 + * + * Authors: + * Chunqiang Tang <ct...@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +/*============================================================================= + * See the following companion papers for a detailed description of FVD: + * 1. The so-called "FVD-cow paper": + * "FVD: a High-Performance Virtual Machine Image Format for Cloud", + * by Chunqiang Tang, 2010. + * 2. The so-called "FVD-compact paper": + * "FVD: a High-Performance Virtual Machine Image Format for Cloud + * with Sparse Image Capability", by Chunqiang Tang, 2010. + *============================================================================*/ + +#include "block/fvd.h" + +/* Use include to avoid exposing too many FVD symbols, and to allow inline + * function optimization. */ +#include "block/fvd-flush.c" +#include "block/fvd-update.c" +#include "block/fvd-misc.c" +#include "block/fvd-create.c" +#include "block/fvd-open.c" +#include "block/fvd-read.c" +#include "block/fvd-write.c" + +static BlockDriver bdrv_fvd = { + .format_name = "fvd", + .instance_size = sizeof(BDRVFvdState), + .bdrv_create = fvd_create, + .bdrv_probe = fvd_probe, + .bdrv_file_open = fvd_open, + .bdrv_close = fvd_close, + .bdrv_is_allocated = fvd_is_allocated, + .bdrv_flush = fvd_flush, + .bdrv_aio_readv = fvd_aio_readv, + .bdrv_aio_writev = fvd_aio_writev, + .bdrv_aio_flush = fvd_aio_flush, + .create_options = fvd_create_options, + .update_options = fvd_update_options, + .bdrv_get_info = fvd_get_info, + .bdrv_update = fvd_update, + .bdrv_has_zero_init = fvd_has_zero_init +}; + +static void bdrv_fvd_init(void) +{ + bdrv_register(&bdrv_fvd); +} + +block_init(bdrv_fvd_init); diff --git a/block/fvd.h b/block/fvd.h new file mode 100644 index 0000000..f2da330 --- /dev/null +++ b/block/fvd.h @@ -0,0 +1,171 @@ +/* + * QEMU Fast Virtual Disk Format + * + * Copyright IBM, Corp. 2010 + * + * Authors: + * Chunqiang Tang <ct...@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "block_int.h" +#include "osdep.h" +#include "qemu-option.h" +#include "qemu-timer.h" +#include "block.h" +#include "qemu-queue.h" +#include "qemu-common.h" + +enum { + FVD_MAGIC = ('F' | 'V' << 8 | 'D' << 16 | '\0' << 24), + FVD_VERSION = 1, + INCOMPATIBLE_FEATURES_SPACE = 4096, /* in bytes. */ + DEF_PAGE_SIZE = 4096 /* in bytes. */ +}; + +/* + * The FVD format consists of the following fields in little endian: + * + Header fields of FvdHeader. + * + Bitmap, starting on a 4KB page boundary at a location specified by + * FvdHeader.bitmap_offset. + * + Journal, starting on a 4KB page boundary at a location specified by + * FvdHeader.journal_offset. + * + Table, starting on a 4KB page boundary at a location specified by + * FvdHeader.table_offset. When expanding the size of an existing FVD + * image, the table can be expanded to borrow space from the next, + * "virtual disk data" section, by relocating some data chunks. + * + Virtual disk data, starting on a 4KB page boundary. Optionally, disk + * data can be stored in a separate data file specified by + * FvdHeader.data_file. + */ +typedef struct __attribute__ ((__packed__)) FvdHeader { + uint32_t magic; /* FVD_MAGIC */ + + /* Size of FvdHeader in bytes, rounded up to DEF_PAGE_SIZE. A new FVD + * version may add fields to FvdHeader and hence need to increase + * header_size. When an old FVD version reads an image created by a new + * FVD version, the old version only reads the beginning part of FvdHeader + * that it can understand and ignroes the new fields at the end of + * FvdHeader. */ + uint32_t header_size; + + /* Version of the FVD software that created the image. */ + uint32_t create_version; + + /* Version of the FVD software that openned the image most recently. This + * field is for forward compatibility. Consider one example. Suppos FVD + * version N+1 introduces a compatible feature, e.g., adding a + * 'last_modified' timestamp into the FVD image header. Even if FVD + * version N is unaware of this new feature, it can still open an image + * created by FVD version N+1 without problem, but won't update the + * last_modified field. FVD version N sets the image's + * 'last_open_version=N' when it opens the image. When FVD version N+1 + * opens this image, it knows that the 'last_modified' field cannot be + * trusted and may take some actions accordingly, e.g., being conservative + * in some optimization heuristics that depend on the value of + * 'last_modified' to avoid making the optimization counter effective. */ + uint32_t last_open_version; + + uint64_t virtual_disk_size; /* in bytes. Disk size perceived by the VM. */ + uint64_t data_offset; /* in bytes. Aligned on DEF_PAGE_SIZE. */ + + /* Data can be optionally stored in a different file. */ + char data_file[1024]; + char data_file_fmt[16]; + + /* Base image. */ + char base_img[1024]; + char base_img_fmt[16]; + uint64_t base_img_size; /* in bytes. */ + + /* Bitmap for base image. */ + uint64_t bitmap_offset; /* in bytes. Aligned on DEF_PAGE_SIZE. */ + uint64_t bitmap_size; /* in bytes. Rounded up to DEF_PAGE_SIZE. */ + uint64_t block_size; /* in bytes. */ + + /* Journal */ + uint64_t journal_offset; /* in bytes. */ + uint64_t journal_size; /* in bytes. On-disk journal size. */ + uint32_t clean_shutdown; /* true if VM's last shutdown was graceful. */ + uint64_t stable_journal_epoch; /* Needed only if a chunk can be relocated.*/ + uint64_t journal_buf_size; /* in bytes. In-memory buffer size. */ + uint64_t journal_clean_buf_period; /* in milliseconds. */ + + /* Table for compact image. */ + uint64_t table_offset; /* in bytes. Aligned on DEF_PAGE_SIZE. */ + uint64_t table_size; /* in bytes. Rounded up to DEF_PAGE_SIZE. */ + uint64_t chunk_size; /* in bytes. */ + uint64_t storage_grow_unit; /* in bytes. */ + char add_storage_cmd[1024]; + uint32_t chunks_relocated; /* Affect bdrv_has_zero_init(). */ + + /* Copy-on-read */ + uint32_t copy_on_read; /* true or false */ + uint64_t max_outstanding_copy_on_read_data; /* in bytes. */ + + /* Prefetching. */ + int64_t prefetch_start_delay; /* in seconds. -1 means disabled. */ + uint32_t base_img_fully_prefetched; /* true or false. */ + uint32_t num_prefetch_slots; /* Max number of oustanding prefetch writes. */ + uint64_t bytes_per_prefetch; /* For whole image prefetching. */ + uint64_t prefetch_read_throughput_measure_time; /* in milliseconds. */ + uint64_t prefetch_write_throughput_measure_time; /* in milliseconds. */ + uint64_t prefetch_min_read_throughput; /* in KB/second. */ + uint64_t prefetch_min_write_throughput; /* in KB/second. */ + uint64_t prefetch_max_read_throughput; /* in KB/second. */ + uint64_t prefetch_max_write_throughput; /* in KB/second. */ + uint64_t prefetch_throttle_time; /* in milliseconds. */ + + /* need_zero_init is true if the image mandates that the storage layer + * (BDRVFvdState.fvd_data) must return true for bdrv_has_zero_init(). + * This is the case if the optimization described in Section 3.3.3 of the + * FVD-cow paper is enabled (see function search_holes()). If 'qemu-img + * create' sets need_zero_init to true, 'qemu-img update' can be used to + * manually reset it to false, if the user always manually pre-fills the + * storage (e.g., a raw partition) with zeros. If the image is stored on a + * file system, it already supports zero_init, and hence there is no need + * to manually manipulate this field. */ + uint32_t need_zero_init; + + /* This field enables adding incompatible features. For example, Suppose + * FVD version N+1 adds image compression. A compressed image cannot be + * openned by FVD version N. Suppose in FVD version N, the value of + * INCOMPATIBLE_FEATURES_SPACE is 4096. Introducing image compression + * in FVD version N+1 causes the following changes to the header. + * In FVD version N: + * uint8_t incompatible_features[4096]; + * In FVD version N+1: + * uint8_t image_compressed; + * uint8_t incompatible_features[4095]; + * + * When any FVD version X opens an image, it always scans through the + * entire array of 'incompatible_features', although the size of + * INCOMPATIBLE_FEATURES_SPACE may be different for different FVD + * versions. If any bit of 'incompatible_features' is non-zero, FVD + * version X refuses to open the image. In the example above, if FVD + * version N+1 creates a non-compressed image, it sets + * 'image_compressed=0', which then still allows FVD version N to open the + * image. Instead of using one byte to represent a new feature, it can + * also use one bit to represent a new feature, which then allows a total + * of 32768 incompatible features to be added in the future. + */ + uint8_t incompatible_features[INCOMPATIBLE_FEATURES_SPACE]; + + /* When a new FVD version introduces a new feature (which may or may not + * be backward compatible), an arbitrary number of new fields can be added + * to the image header, but those new fields must be added at the end of + * 'FvdHeader'. Old FVD versions simply won't read or write those new + * fields. Old FVD versions can still correctly access the bitmap, the + * journal, and the table, because no FVD version assumes a fixed header + * size, but instead accesses the bitmap, the journal, and the table + * through bitmap_offset, journal_offset, and table_offset, respectively. + * Similarly, if a new data structure of a variable size is added to the + * image header in the future, it must also be indexed by an offset field + * and a size field. */ +} FvdHeader; + +typedef struct BDRVFvdState { +} BDRVFvdState; -- 1.7.0.4