On Tue, Jul 30, 2013 at 11:57:20AM +0800, Fam Zheng wrote: > On Wed, 07/24 13:54, Jeff Cody wrote: > > This adds support for writing to the VHDX log. > > > > For spec details, see VHDX Specification Format v1.00: > > https://www.microsoft.com/en-us/download/details.aspx?id=34750 > > > > There are a few limitations to this log support: > > 1.) There is no caching yet > > 2.) The log is flushed after each entry > > > > The primary write interface, vhdx_log_write_and_flush(), performs a log > > write followed by an immediate flush of the log. > > > > As each log entry sector is a minimum of 4KB, partial sector writes are > > filled in with data from the disk write destination. > > > > If the current file log GUID is 0, a new GUID is generated and updated > > in the header. > > > > Signed-off-by: Jeff Cody <jc...@redhat.com> > > --- > > block/vhdx-log.c | 273 > > +++++++++++++++++++++++++++++++++++++++++++++++++++++++ > > block/vhdx.h | 3 + > > 2 files changed, 276 insertions(+) > > > > diff --git a/block/vhdx-log.c b/block/vhdx-log.c > > index 89b9000..786b393 100644 > > --- a/block/vhdx-log.c > > +++ b/block/vhdx-log.c > > @@ -170,6 +170,53 @@ exit: > > return ret; > > } > > > > +/* Writes num_sectors to the log (all log sectors are 4096 bytes), > > + * from buffer 'buffer'. Upon return, *sectors_written will contain > > + * the number of sectors successfully written. > > + * > > + * It is assumed that 'buffer' is at least 4096*num_sectors large. > > + * > > + * 0 is returned on success, -errno otherwise */ > > +static int vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries > > *log, > > + uint32_t *sectors_written, void *buffer, > > + uint32_t num_sectors) > > +{ > > + int ret = 0; > > + uint64_t offset; > > + uint32_t write; > > + void *buffer_tmp; > > + BDRVVHDXState *s = bs->opaque; > > + > > + vhdx_user_visible_write(bs, s); > > + > > + write = log->write; > > + > > + buffer_tmp = buffer; > > + while (num_sectors) { > > + > > + offset = log->offset + write; > > + write = vhdx_log_inc_idx(write, log->length); > > + if (write == log->read) { > > + /* full */ > > + break; > > + } > > + ret = bdrv_pwrite_sync(bs->file, offset, buffer_tmp, > > + VHDX_LOG_SECTOR_SIZE); > > + if (ret < 0) { > > + goto exit; > > + } > > + buffer_tmp += VHDX_LOG_SECTOR_SIZE; > > + > > + log->write = write; > > + *sectors_written = *sectors_written + 1; > > + num_sectors--; > > + } > > + > > +exit: > > + return ret; > > +} > > + > > + > > /* Validates a log entry header */ > > static bool vhdx_log_hdr_is_valid(VHDXLogEntries *log, VHDXLogEntryHeader > > *hdr, > > BDRVVHDXState *s) > > @@ -732,3 +779,229 @@ exit: > > return ret; > > } > > > > + > > + > > +static void vhdx_log_raw_to_le_sector(VHDXLogDescriptor *desc, > > + VHDXLogDataSector *sector, void > > *data, > > + uint64_t seq) > > +{ > > + memcpy(&desc->leading_bytes, data, 8); > > + data += 8; > > + cpu_to_le64s(&desc->leading_bytes); > > + memcpy(sector->data, data, 4084); > > + data += 4084; > > + memcpy(&desc->trailing_bytes, data, 4); > > + cpu_to_le32s(&desc->trailing_bytes); > > + data += 4; > > + > > + sector->sequence_high = (uint32_t) (seq >> 32); > > + sector->sequence_low = (uint32_t) (seq & 0xffffffff); > > + sector->data_signature = VHDX_LOG_DATA_SIGNATURE; > > + > > + vhdx_log_desc_le_export(desc); > > + vhdx_log_data_le_export(sector); > > +} > > + > > + > > +static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s, > > + void *data, uint32_t length, uint64_t offset) > > +{ > > + int ret = 0; > > + void *buffer = NULL; > > + void *merged_sector = NULL; > > + void *data_tmp, *sector_write; > > + unsigned int i; > > + int sector_offset; > > + uint32_t desc_sectors, sectors, total_length; > > + uint32_t sectors_written = 0; > > + uint32_t aligned_length; > > + uint32_t leading_length = 0; > > + uint32_t trailing_length = 0; > > + uint32_t partial_sectors = 0; > > + uint32_t bytes_written = 0; > > + uint64_t file_offset; > > + VHDXHeader *header; > > + VHDXLogEntryHeader new_hdr; > > + VHDXLogDescriptor *new_desc = NULL; > > + VHDXLogDataSector *data_sector = NULL; > > + MSGUID new_guid = { 0 }; > > + > > + header = s->headers[s->curr_header]; > > + > > + /* need to have offset read data, and be on 4096 byte boundary */ > > + > > + if (length > header->log_length) { > > + /* no log present. we could create a log here instead of failing > > */ > > Does newly created vhdx have allocated log sectors? >
I don't know of any way to make Hyper-V create a file without an allocated log area (I believe with the files I've generated, it allocates a 1MB log between the header and the BAT region). The spec says that "LogLength" in the header should be a multiple of 1MB. And technically, 0 is a multiple of every number, so when parsing the header I don't fail out on a zero-lengthed log. In practice, I don't think Hyper-V creates files with zero-length logs, but I don't think the spec rules it out. So we could either allocate a log in the file at this point, or fail. > > + ret = -EINVAL; > > + goto exit; > > + } > > + > > + if (vhdx_log_guid_is_zero(&header->log_guid)) { > > + vhdx_guid_generate(&new_guid); > > + vhdx_update_headers(bs, s, false, &new_guid); > > + } else { > > + /* currently, we require that the log be flushed after > > + * every write. */ > > + ret = -ENOTSUP; > > Can we make an assertion here? > I don't know if we should assert here - the VM could certainly continue on if this is not the primary drive. > > + } > > + > > + /* 0 is an invalid sequence number, but may also represent the first > > + * log write (or a wrapped seq) */ > > + if (s->log.sequence == 0) { > > + s->log.sequence = 1; > > + } > > + > > + sector_offset = offset % VHDX_LOG_SECTOR_SIZE; > > + file_offset = (offset / VHDX_LOG_SECTOR_SIZE) * VHDX_LOG_SECTOR_SIZE; > > + > > + aligned_length = length; > > + > > + /* add in the unaligned head and tail bytes */ > > + if (sector_offset) { > > + leading_length = (VHDX_LOG_SECTOR_SIZE - sector_offset); > > + leading_length = leading_length > length ? length : leading_length; > > + aligned_length -= leading_length; > > + partial_sectors++; > > + } > > + > > + sectors = aligned_length / VHDX_LOG_SECTOR_SIZE; > > + trailing_length = aligned_length - (sectors * VHDX_LOG_SECTOR_SIZE); > > + if (trailing_length) { > > + partial_sectors++; > > + } > > + > > + sectors += partial_sectors; > > + > > + /* sectors is now how many sectors the data itself takes, not > > + * including the header and descriptor metadata */ > > + > > + new_hdr = (VHDXLogEntryHeader) { > > + .signature = VHDX_LOG_SIGNATURE, > > + .tail = s->log.tail, > > + .sequence_number = s->log.sequence, > > + .descriptor_count = sectors, > > + .reserved = 0, > > + .flushed_file_offset = bdrv_getlength(bs->file), > > + .last_file_offset = bdrv_getlength(bs->file), > > + }; > > + > > + memcpy(&new_hdr.log_guid, &header->log_guid, sizeof(MSGUID)); > > + > > + desc_sectors = vhdx_compute_desc_sectors(new_hdr.descriptor_count); > > + > > + total_length = (desc_sectors + sectors) * VHDX_LOG_SECTOR_SIZE; > > + new_hdr.entry_length = total_length; > > + > > + vhdx_log_entry_hdr_le_export(&new_hdr); > > + > > + buffer = qemu_blockalign(bs, total_length); > > + memcpy(buffer, &new_hdr, sizeof(new_hdr)); > > + > > + new_desc = (VHDXLogDescriptor *) (buffer + sizeof(new_hdr)); > > + data_sector = buffer + (desc_sectors * VHDX_LOG_SECTOR_SIZE); > > + data_tmp = data; > > + > > + /* All log sectors are 4KB, so for any partial sectors we must > > + * merge the data with preexisting data from the final file > > + * destination */ > > + merged_sector = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE); > > + > > + for (i = 0; i < sectors; i++) { > > + new_desc->signature = VHDX_LOG_DESC_SIGNATURE; > > + new_desc->sequence_number = s->log.sequence; > > + new_desc->file_offset = file_offset; > > + > > + if (i == 0 && leading_length) { > > + /* partial sector at the front of the buffer */ > > + ret = bdrv_pread(bs->file, file_offset, merged_sector, > > + VHDX_LOG_SECTOR_SIZE); > > + if (ret < 0) { > > + goto exit; > > + } > > + memcpy(merged_sector + sector_offset, data_tmp, > > leading_length); > > + bytes_written = leading_length; > > + sector_write = merged_sector; > > + } else if (i == sectors - 1 && trailing_length) { > > + /* partial sector at the end of the buffer */ > > + ret = bdrv_pread(bs->file, > > + file_offset, > > + merged_sector + trailing_length, > > + VHDX_LOG_SECTOR_SIZE - trailing_length); > > + if (ret < 0) { > > + goto exit; > > + } > > + memcpy(merged_sector, data_tmp, trailing_length); > > + bytes_written = trailing_length; > > + sector_write = merged_sector; > > + } else { > > + bytes_written = VHDX_LOG_SECTOR_SIZE; > > + sector_write = data_tmp; > > + } > > + > > + /* populate the raw sector data into the proper structures, > > + * as well as update the descriptor, and convert to proper > > + * endianness */ > > + vhdx_log_raw_to_le_sector(new_desc, data_sector, sector_write, > > + s->log.sequence); > > + > > + data_tmp += bytes_written; > > + data_sector++; > > + new_desc++; > > + file_offset += VHDX_LOG_SECTOR_SIZE; > > + } > > + > > + /* checksum covers entire entry, from the log header through the > > + * last data sector */ > > + vhdx_update_checksum(buffer, total_length, 4); > > + cpu_to_le32s((uint32_t *)(buffer + 4)); > > + > > + /* now write to the log */ > > + vhdx_log_write_sectors(bs, &s->log, §ors_written, buffer, > > + desc_sectors + sectors); > > + if (ret < 0) { > > + goto exit; > > + } > > + > > + if (sectors_written != desc_sectors + sectors) { > > + /* instead of failing, we could flush the log here */ > > + ret = -EINVAL; > > + goto exit; > > + } > > + > > + s->log.sequence++; > > + /* write new tail */ > > + s->log.tail = s->log.write; > > + > > +exit: > > + qemu_vfree(buffer); > > + qemu_vfree(merged_sector); > > + return ret; > > +} > > + > > +/* Perform a log write, and then immediately flush the entire log */ > > +int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s, > > + void *data, uint32_t length, uint64_t offset) > > +{ > > + int ret = 0; > > + VHDXLogSequence logs = { .valid = true, > > + .count = 1, > > + .hdr = { 0 } }; > > + > > + > > + ret = vhdx_log_write(bs, s, data, length, offset); > > + if (ret < 0) { > > + goto exit; > > + } > > + logs.log = s->log; > > + > > + ret = vhdx_log_flush(bs, s, &logs); > > + if (ret < 0) { > > + goto exit; > > + } > > + > > + s->log = logs.log; > > + > > +exit: > > + return ret; > > +} > > + > > diff --git a/block/vhdx.h b/block/vhdx.h > > index 24b126e..b210efc 100644 > > --- a/block/vhdx.h > > +++ b/block/vhdx.h > > @@ -393,6 +393,9 @@ bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, > > int crc_offset); > > > > int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s); > > > > +int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s, > > + void *data, uint32_t length, uint64_t offset); > > + > > static inline void leguid_to_cpus(MSGUID *guid) > > { > > le32_to_cpus(&guid->data1); > > -- > > 1.8.1.4 > > > > > > -- > Fam