Il 20/08/2013 20:34, Charlie Shepherd ha scritto: > cow_co_is_allocated and cow_update_bitmap set bits by reading the relevant > word, setting the specific bit in it and writing it back. These functions set > a number of contiguous bits however, so this is an extremely inefficient way > of doing this. This patch converts them to read the whole bitmap they need in > one go, update it and then write it out, which should be much more more > efficient. > > This patch has a startling effect on the speed reads and writes, as measured > by a simple benchmark. This performance increase could likely still > be improved quite easily. > > Before: > $ qemu-io -c 'write 0 1M' test.cow > Average over 3 runs: 34Kb/s > > $ qemu-io -c 'read 0 1M' test.cow > Average over 3 runs:28Mb/s > > $ qemu-io -c 'read 0 100M' test.cow > Average over 3 runs:28Mb/s > > After: > $ qemu-io -c 'write 0 1M' test.cow > Average over 3 runs: 10 Mb/s > > $ qemu-io -c 'read 0 1M' test.cow > Average over 3 runs: 351 Mb/s > > $ qemu-io -c 'read 0 100M' test.cow > Average over 3 runs: 426 Mb/s > > Which is a ~300x increase in write speed and a ~10x increase in read speed. > > Signed-off-by: Charlie Shepherd <char...@ctshepherd.com> > --- > block/cow.c | 119 > ++++++++++++++++++++++++++++++++++-------------------------- > 1 file changed, 68 insertions(+), 51 deletions(-) > > diff --git a/block/cow.c b/block/cow.c > index 1cc2e89..cd411ec 100644 > --- a/block/cow.c > +++ b/block/cow.c > @@ -102,42 +102,10 @@ static int cow_open(BlockDriverState *bs, QDict > *options, int flags) > return ret; > } > > -/* > - * XXX(hch): right now these functions are extremely inefficient. > - * We should just read the whole bitmap we'll need in one go instead. > - */ > -static inline int cow_set_bit(BlockDriverState *bs, int64_t bitnum) > -{ > - uint64_t offset = sizeof(struct cow_header_v2) + bitnum / 8; > - uint8_t bitmap; > - int ret; > - > - ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap)); > - if (ret < 0) { > - return ret; > - } > - > - bitmap |= (1 << (bitnum % 8)); > - > - ret = bdrv_pwrite_sync(bs->file, offset, &bitmap, sizeof(bitmap)); > - if (ret < 0) { > - return ret; > - } > - return 0; > -} > - > -static inline int is_bit_set(BlockDriverState *bs, int64_t bitnum) > +/* Cannot use bitmap.c on big-endian machines. */ > +static int cow_test_bit(int64_t bitnum, const uint8_t *bitmap) > { > - uint64_t offset = sizeof(struct cow_header_v2) + bitnum / 8; > - uint8_t bitmap; > - int ret; > - > - ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap)); > - if (ret < 0) { > - return ret; > - } > - > - return !!(bitmap & (1 << (bitnum % 8))); > + return (bitmap[bitnum / 8] & (1 << (bitnum & 7))) != 0; > } > > /* Return true if first block has been changed (ie. current version is > @@ -146,40 +114,82 @@ static inline int is_bit_set(BlockDriverState *bs, > int64_t bitnum) > static int coroutine_fn cow_co_is_allocated(BlockDriverState *bs, > int64_t sector_num, int nb_sectors, int *num_same) > { > - int changed; > + int ret, changed; > + uint64_t offset = sizeof(struct cow_header_v2) + sector_num / 8; > + > + int init_bits = (sector_num % 8) ? (8 - (sector_num % 8)) : 0; > + int remaining = sector_num - init_bits; > + int full_bytes = remaining / 8; > + int trail = remaining % 8; > + > + int len = !!init_bits + full_bytes + !!trail; > + uint8_t bitmap[len];
This is a basically unbounded allocation on the stack. You should split this in smaller ranges using the "num_same" argument, which is what I did in my patch. > if (nb_sectors == 0) { > - *num_same = nb_sectors; > - return 0; > + *num_same = nb_sectors; > + return 0; > } > > - changed = is_bit_set(bs, sector_num); > - if (changed < 0) { > - return 0; /* XXX: how to return I/O errors? */ > + ret = bdrv_pread(bs->file, offset, bitmap, len); > + if (ret < 0) { > + return ret; > } > > + changed = cow_test_bit(sector_num, bitmap); > for (*num_same = 1; *num_same < nb_sectors; (*num_same)++) { > - if (is_bit_set(bs, sector_num + *num_same) != changed) > - break; > + if (cow_test_bit(sector_num + *num_same, bitmap) != changed) { > + break; > + } > } > > return changed; > } > > +/* Set the bits from sector_num to sector_num + nb_sectors in the bitmap of > + * bs->file. */ > static int cow_update_bitmap(BlockDriverState *bs, int64_t sector_num, > int nb_sectors) > { > - int error = 0; > - int i; > + int ret; > + uint64_t offset = sizeof(struct cow_header_v2) + sector_num / 8; > > - for (i = 0; i < nb_sectors; i++) { > - error = cow_set_bit(bs, sector_num + i); > - if (error) { > - break; > - } > + int init_bits = (sector_num % 8) ? (8 - (sector_num % 8)) : 0; > + int remaining = sector_num - init_bits; > + int full_bytes = remaining / 8; > + int trail = remaining % 8; > + > + int len = !!init_bits + full_bytes + !!trail; > + uint8_t buf[len]; Here your patch has indeed an improvement over mine. However, this is another basically unbounded allocation on the stack. You should split bitmap updates in smaller parts (doing 512-byte aligned writes is fine, each covers 2MB in the file and writes this big are very rare!). > + ret = bdrv_pread(bs->file, offset, buf, len); > + if (ret < 0) { > + return ret; > + } > + > + /* Do sector_num -> nearest byte boundary */ > + if (init_bits) { > + /* This sets the highest init_bits bits in the byte */ > + uint8_t bits = ((1 << init_bits) - 1) << (8 - init_bits); > + buf[0] |= bits; > + } > + > + if (full_bytes) { > + memset(&buf[!!init_bits], ~0, full_bytes); > + } > + > + /* Set the trailing bits in the final byte */ > + if (trail) { > + /* This sets the lowest trail bits in the byte */ > + uint8_t bits = (1 << trail) - 1; > + buf[len - 1] |= bits; > + } ... and you should also check if there is a change in the bits, and skip the flush if there is no change. Flushing a multi-megabyte write is very expensive. It basically makes format=cow as slow as format=raw,cache=writethrough. > + ret = bdrv_pwrite(bs->file, offset, buf, len); > + if (ret < 0) { > + return ret; > } > > - return error; > + return 0; > } > > static int coroutine_fn cow_read(BlockDriverState *bs, int64_t sector_num, > @@ -237,6 +247,13 @@ static int cow_write(BlockDriverState *bs, int64_t > sector_num, > return ret; > } > > + /* We need to flush the data before writing the metadata so that there is > + * no chance of metadata referring to data that doesn't exist. */ > + ret = bdrv_flush(bs->file); > + if (ret < 0) { > + return ret; > + } See above about this flush. Paolo > return cow_update_bitmap(bs, sector_num, nb_sectors); > } > >