On Wed, 07/24 13:54, Jeff Cody wrote: > This adds support for writing to the VHDX log. > > For spec details, see VHDX Specification Format v1.00: > https://www.microsoft.com/en-us/download/details.aspx?id=34750 > > There are a few limitations to this log support: > 1.) There is no caching yet > 2.) The log is flushed after each entry > > The primary write interface, vhdx_log_write_and_flush(), performs a log > write followed by an immediate flush of the log. > > As each log entry sector is a minimum of 4KB, partial sector writes are > filled in with data from the disk write destination. > > If the current file log GUID is 0, a new GUID is generated and updated > in the header. > > Signed-off-by: Jeff Cody <jc...@redhat.com> > --- > block/vhdx-log.c | 273 > +++++++++++++++++++++++++++++++++++++++++++++++++++++++ > block/vhdx.h | 3 + > 2 files changed, 276 insertions(+) > > diff --git a/block/vhdx-log.c b/block/vhdx-log.c > index 89b9000..786b393 100644 > --- a/block/vhdx-log.c > +++ b/block/vhdx-log.c > @@ -170,6 +170,53 @@ exit: > return ret; > } > > +/* Writes num_sectors to the log (all log sectors are 4096 bytes), > + * from buffer 'buffer'. Upon return, *sectors_written will contain > + * the number of sectors successfully written. > + * > + * It is assumed that 'buffer' is at least 4096*num_sectors large. > + * > + * 0 is returned on success, -errno otherwise */ > +static int vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log, > + uint32_t *sectors_written, void *buffer, > + uint32_t num_sectors) > +{ > + int ret = 0; > + uint64_t offset; > + uint32_t write; > + void *buffer_tmp; > + BDRVVHDXState *s = bs->opaque; > + > + vhdx_user_visible_write(bs, s); > + > + write = log->write; > + > + buffer_tmp = buffer; > + while (num_sectors) { > + > + offset = log->offset + write; > + write = vhdx_log_inc_idx(write, log->length); > + if (write == log->read) { > + /* full */ > + break; > + } > + ret = bdrv_pwrite_sync(bs->file, offset, buffer_tmp, > + VHDX_LOG_SECTOR_SIZE); > + if (ret < 0) { > + goto exit; > + } > + buffer_tmp += VHDX_LOG_SECTOR_SIZE; > + > + log->write = write; > + *sectors_written = *sectors_written + 1; > + num_sectors--; > + } > + > +exit: > + return ret; > +} > + > + > /* Validates a log entry header */ > static bool vhdx_log_hdr_is_valid(VHDXLogEntries *log, VHDXLogEntryHeader > *hdr, > BDRVVHDXState *s) > @@ -732,3 +779,229 @@ exit: > return ret; > } > > + > + > +static void vhdx_log_raw_to_le_sector(VHDXLogDescriptor *desc, > + VHDXLogDataSector *sector, void *data, > + uint64_t seq) > +{ > + memcpy(&desc->leading_bytes, data, 8); > + data += 8; > + cpu_to_le64s(&desc->leading_bytes); > + memcpy(sector->data, data, 4084); > + data += 4084; > + memcpy(&desc->trailing_bytes, data, 4); > + cpu_to_le32s(&desc->trailing_bytes); > + data += 4; > + > + sector->sequence_high = (uint32_t) (seq >> 32); > + sector->sequence_low = (uint32_t) (seq & 0xffffffff); > + sector->data_signature = VHDX_LOG_DATA_SIGNATURE; > + > + vhdx_log_desc_le_export(desc); > + vhdx_log_data_le_export(sector); > +} > + > + > +static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s, > + void *data, uint32_t length, uint64_t offset) > +{ > + int ret = 0; > + void *buffer = NULL; > + void *merged_sector = NULL; > + void *data_tmp, *sector_write; > + unsigned int i; > + int sector_offset; > + uint32_t desc_sectors, sectors, total_length; > + uint32_t sectors_written = 0; > + uint32_t aligned_length; > + uint32_t leading_length = 0; > + uint32_t trailing_length = 0; > + uint32_t partial_sectors = 0; > + uint32_t bytes_written = 0; > + uint64_t file_offset; > + VHDXHeader *header; > + VHDXLogEntryHeader new_hdr; > + VHDXLogDescriptor *new_desc = NULL; > + VHDXLogDataSector *data_sector = NULL; > + MSGUID new_guid = { 0 }; > + > + header = s->headers[s->curr_header]; > + > + /* need to have offset read data, and be on 4096 byte boundary */ > + > + if (length > header->log_length) { > + /* no log present. we could create a log here instead of failing */
Does newly created vhdx have allocated log sectors? > + ret = -EINVAL; > + goto exit; > + } > + > + if (vhdx_log_guid_is_zero(&header->log_guid)) { > + vhdx_guid_generate(&new_guid); > + vhdx_update_headers(bs, s, false, &new_guid); > + } else { > + /* currently, we require that the log be flushed after > + * every write. */ > + ret = -ENOTSUP; Can we make an assertion here? > + } > + > + /* 0 is an invalid sequence number, but may also represent the first > + * log write (or a wrapped seq) */ > + if (s->log.sequence == 0) { > + s->log.sequence = 1; > + } > + > + sector_offset = offset % VHDX_LOG_SECTOR_SIZE; > + file_offset = (offset / VHDX_LOG_SECTOR_SIZE) * VHDX_LOG_SECTOR_SIZE; > + > + aligned_length = length; > + > + /* add in the unaligned head and tail bytes */ > + if (sector_offset) { > + leading_length = (VHDX_LOG_SECTOR_SIZE - sector_offset); > + leading_length = leading_length > length ? length : leading_length; > + aligned_length -= leading_length; > + partial_sectors++; > + } > + > + sectors = aligned_length / VHDX_LOG_SECTOR_SIZE; > + trailing_length = aligned_length - (sectors * VHDX_LOG_SECTOR_SIZE); > + if (trailing_length) { > + partial_sectors++; > + } > + > + sectors += partial_sectors; > + > + /* sectors is now how many sectors the data itself takes, not > + * including the header and descriptor metadata */ > + > + new_hdr = (VHDXLogEntryHeader) { > + .signature = VHDX_LOG_SIGNATURE, > + .tail = s->log.tail, > + .sequence_number = s->log.sequence, > + .descriptor_count = sectors, > + .reserved = 0, > + .flushed_file_offset = bdrv_getlength(bs->file), > + .last_file_offset = bdrv_getlength(bs->file), > + }; > + > + memcpy(&new_hdr.log_guid, &header->log_guid, sizeof(MSGUID)); > + > + desc_sectors = vhdx_compute_desc_sectors(new_hdr.descriptor_count); > + > + total_length = (desc_sectors + sectors) * VHDX_LOG_SECTOR_SIZE; > + new_hdr.entry_length = total_length; > + > + vhdx_log_entry_hdr_le_export(&new_hdr); > + > + buffer = qemu_blockalign(bs, total_length); > + memcpy(buffer, &new_hdr, sizeof(new_hdr)); > + > + new_desc = (VHDXLogDescriptor *) (buffer + sizeof(new_hdr)); > + data_sector = buffer + (desc_sectors * VHDX_LOG_SECTOR_SIZE); > + data_tmp = data; > + > + /* All log sectors are 4KB, so for any partial sectors we must > + * merge the data with preexisting data from the final file > + * destination */ > + merged_sector = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE); > + > + for (i = 0; i < sectors; i++) { > + new_desc->signature = VHDX_LOG_DESC_SIGNATURE; > + new_desc->sequence_number = s->log.sequence; > + new_desc->file_offset = file_offset; > + > + if (i == 0 && leading_length) { > + /* partial sector at the front of the buffer */ > + ret = bdrv_pread(bs->file, file_offset, merged_sector, > + VHDX_LOG_SECTOR_SIZE); > + if (ret < 0) { > + goto exit; > + } > + memcpy(merged_sector + sector_offset, data_tmp, leading_length); > + bytes_written = leading_length; > + sector_write = merged_sector; > + } else if (i == sectors - 1 && trailing_length) { > + /* partial sector at the end of the buffer */ > + ret = bdrv_pread(bs->file, > + file_offset, > + merged_sector + trailing_length, > + VHDX_LOG_SECTOR_SIZE - trailing_length); > + if (ret < 0) { > + goto exit; > + } > + memcpy(merged_sector, data_tmp, trailing_length); > + bytes_written = trailing_length; > + sector_write = merged_sector; > + } else { > + bytes_written = VHDX_LOG_SECTOR_SIZE; > + sector_write = data_tmp; > + } > + > + /* populate the raw sector data into the proper structures, > + * as well as update the descriptor, and convert to proper > + * endianness */ > + vhdx_log_raw_to_le_sector(new_desc, data_sector, sector_write, > + s->log.sequence); > + > + data_tmp += bytes_written; > + data_sector++; > + new_desc++; > + file_offset += VHDX_LOG_SECTOR_SIZE; > + } > + > + /* checksum covers entire entry, from the log header through the > + * last data sector */ > + vhdx_update_checksum(buffer, total_length, 4); > + cpu_to_le32s((uint32_t *)(buffer + 4)); > + > + /* now write to the log */ > + vhdx_log_write_sectors(bs, &s->log, §ors_written, buffer, > + desc_sectors + sectors); > + if (ret < 0) { > + goto exit; > + } > + > + if (sectors_written != desc_sectors + sectors) { > + /* instead of failing, we could flush the log here */ > + ret = -EINVAL; > + goto exit; > + } > + > + s->log.sequence++; > + /* write new tail */ > + s->log.tail = s->log.write; > + > +exit: > + qemu_vfree(buffer); > + qemu_vfree(merged_sector); > + return ret; > +} > + > +/* Perform a log write, and then immediately flush the entire log */ > +int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s, > + void *data, uint32_t length, uint64_t offset) > +{ > + int ret = 0; > + VHDXLogSequence logs = { .valid = true, > + .count = 1, > + .hdr = { 0 } }; > + > + > + ret = vhdx_log_write(bs, s, data, length, offset); > + if (ret < 0) { > + goto exit; > + } > + logs.log = s->log; > + > + ret = vhdx_log_flush(bs, s, &logs); > + if (ret < 0) { > + goto exit; > + } > + > + s->log = logs.log; > + > +exit: > + return ret; > +} > + > diff --git a/block/vhdx.h b/block/vhdx.h > index 24b126e..b210efc 100644 > --- a/block/vhdx.h > +++ b/block/vhdx.h > @@ -393,6 +393,9 @@ bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, > int crc_offset); > > int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s); > > +int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s, > + void *data, uint32_t length, uint64_t offset); > + > static inline void leguid_to_cpus(MSGUID *guid) > { > le32_to_cpus(&guid->data1); > -- > 1.8.1.4 > > -- Fam