On 06/28/2016 10:15 AM, Vladimir Sementsov-Ogievskiy wrote: > On 03.06.2016 07:32, Fam Zheng wrote: >> From: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com> >> >> Functions to serialize / deserialize(restore) HBitmap. HBitmap should be >> saved to linear sequence of bits independently of endianness and bitmap >> array element (unsigned long) size. Therefore Little Endian is chosen. >> >> These functions are appropriate for dirty bitmap migration, restoring >> the bitmap in several steps is available. To save performance, every >> step writes only the last level of the bitmap. All other levels are >> restored by hbitmap_deserialize_finish() as a last step of restoring. >> So, HBitmap is inconsistent while restoring. >> >> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com> >> [Fix left shift operand to 1UL; add "finish" parameter. - Fam] >> Signed-off-by: Fam Zheng <f...@redhat.com> >> Reviewed-by: Max Reitz <mre...@redhat.com> >> --- >> include/qemu/hbitmap.h | 79 ++++++++++++++++++++++++++++ >> util/hbitmap.c | 137 >> +++++++++++++++++++++++++++++++++++++++++++++++++ >> 2 files changed, 216 insertions(+) >> >> diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h >> index f8ed058..26cac7d 100644 >> --- a/include/qemu/hbitmap.h >> +++ b/include/qemu/hbitmap.h >> @@ -146,6 +146,85 @@ void hbitmap_reset_all(HBitmap *hb); >> bool hbitmap_get(const HBitmap *hb, uint64_t item); >> /** >> + * hbitmap_serialization_granularity: >> + * @hb: HBitmap to operate on. >> + * >> + * Granularity of serialization chunks, used by other serialization >> functions. >> + * For every chunk: >> + * 1. Chunk start should be aligned to this granularity. >> + * 2. Chunk size should be aligned too, except for last chunk (for which >> + * start + count == hb->size) >> + */ >> +uint64_t hbitmap_serialization_granularity(const HBitmap *hb); >> + >> +/** >> + * hbitmap_serialization_size: >> + * @hb: HBitmap to operate on. >> + * @start: Starting bit >> + * @count: Number of bits >> + * >> + * Return number of bytes hbitmap_(de)serialize_part needs >> + */ >> +uint64_t hbitmap_serialization_size(const HBitmap *hb, >> + uint64_t start, uint64_t count); >> + >> +/** >> + * hbitmap_serialize_part >> + * @hb: HBitmap to operate on. >> + * @buf: Buffer to store serialized bitmap. >> + * @start: First bit to store. >> + * @count: Number of bits to store. >> + * >> + * Stores HBitmap data corresponding to given region. The format of >> saved data >> + * is linear sequence of bits, so it can be used by >> hbitmap_deserialize_part >> + * independently of endianness and size of HBitmap level array elements >> + */ >> +void hbitmap_serialize_part(const HBitmap *hb, uint8_t *buf, >> + uint64_t start, uint64_t count); >> + >> +/** >> + * hbitmap_deserialize_part >> + * @hb: HBitmap to operate on. >> + * @buf: Buffer to restore bitmap data from. >> + * @start: First bit to restore. >> + * @count: Number of bits to restore. >> + * @finish: Whether to call hbitmap_deserialize_finish automatically. >> + * >> + * Restores HBitmap data corresponding to given region. The format is >> the same >> + * as for hbitmap_serialize_part. >> + * >> + * If @finish is false, caller must call hbitmap_serialize_finish >> before using >> + * the bitmap. >> + */ >> +void hbitmap_deserialize_part(HBitmap *hb, uint8_t *buf, >> + uint64_t start, uint64_t count, >> + bool finish); >> + >> +/** >> + * hbitmap_deserialize_zeroes >> + * @hb: HBitmap to operate on. >> + * @start: First bit to restore. >> + * @count: Number of bits to restore. >> + * @finish: Whether to call hbitmap_deserialize_finish automatically. >> + * >> + * Fills the bitmap with zeroes. >> + * >> + * If @finish is false, caller must call hbitmap_serialize_finish >> before using >> + * the bitmap. >> + */ >> +void hbitmap_deserialize_zeroes(HBitmap *hb, uint64_t start, uint64_t >> count, >> + bool finish); >> + >> +/** >> + * hbitmap_deserialize_finish >> + * @hb: HBitmap to operate on. >> + * >> + * Repair HBitmap after calling hbitmap_deserialize_data. Actually, >> all HBitmap >> + * layers are restored here. >> + */ >> +void hbitmap_deserialize_finish(HBitmap *hb); >> + >> +/** >> * hbitmap_free: >> * @hb: HBitmap to operate on. >> * >> diff --git a/util/hbitmap.c b/util/hbitmap.c >> index 13c0df5..70dc99b 100644 >> --- a/util/hbitmap.c >> +++ b/util/hbitmap.c >> @@ -395,6 +395,143 @@ bool hbitmap_get(const HBitmap *hb, uint64_t item) >> return (hb->levels[HBITMAP_LEVELS - 1][pos >> BITS_PER_LEVEL] & >> bit) != 0; >> } >> +uint64_t hbitmap_serialization_granularity(const HBitmap *hb) >> +{ >> + /* Require at least 64 bit granularity to be safe on both 64 bit >> and 32 bit >> + * hosts. */ >> + return 64 << hb->granularity; >> +} >> + >> +/* Start should be aligned to serialization granularity, chunk size >> should be >> + * aligned to serialization granularity too, except for last chunk. >> + */ >> +static void serialization_chunk(const HBitmap *hb, >> + uint64_t start, uint64_t count, >> + unsigned long **first_el, size_t >> *el_count) >> +{ >> + uint64_t last = start + count - 1; >> + uint64_t gran = hbitmap_serialization_granularity(hb); >> + >> + assert((start & (gran - 1)) == 0); >> + assert((last >> hb->granularity) < hb->size); >> + if ((last >> hb->granularity) != hb->size - 1) { >> + assert((count & (gran - 1)) == 0); >> + } >> + >> + start = (start >> hb->granularity) >> BITS_PER_LEVEL; >> + last = (last >> hb->granularity) >> BITS_PER_LEVEL; >> + >> + *first_el = &hb->levels[HBITMAP_LEVELS - 1][start]; >> + *el_count = last - start + 1; >> +} >> + >> +uint64_t hbitmap_serialization_size(const HBitmap *hb, >> + uint64_t start, uint64_t count) >> +{ >> + uint64_t el_count; >> + unsigned long *cur; >> + >> + if (!count) { >> + return 0; >> + } >> + serialization_chunk(hb, start, count, &cur, &el_count); >> + >> + return el_count * sizeof(unsigned long); >> +} >> + >> +void hbitmap_serialize_part(const HBitmap *hb, uint8_t *buf, >> + uint64_t start, uint64_t count) >> +{ >> + uint64_t el_count; >> + unsigned long *cur, *end; >> + >> + if (!count) { >> + return; >> + } >> + serialization_chunk(hb, start, count, &cur, &el_count); >> + end = cur + el_count; >> + > > I think it would be better to memcpy the whole part and then loop with > cpu_to_le64. This loop will go out from compilation for LE case (I > hope), so in LE case it would be just one memcpy. However in BE case it > should be better to memcpy large part first and then convert words.
I tried this out on a 64MB chunk of memory on my x86-64 system: both approaches were roughly the same speed. I couldn't produce a tangible difference. LE-vs-BE conversion made a big difference, but even on the version where we memcpy all-at-once and then perform no-ops, it wasn't noticeably different or faster than the bunch-of-memcpy approach. Going to consider this a premature optimization. >> + while (cur != end) { >> + unsigned long el = >> + (BITS_PER_LONG == 32 ? cpu_to_le32(*cur) : >> cpu_to_le64(*cur)); >> + >> + memcpy(buf, &el, sizeof(el)); >> + buf += sizeof(el); >> + cur++; >> + } >> +} >> + >> +void hbitmap_deserialize_part(HBitmap *hb, uint8_t *buf, >> + uint64_t start, uint64_t count, >> + bool finish) >> +{ >> + uint64_t el_count; >> + unsigned long *cur, *end; >> + >> + if (!count) { >> + return; >> + } >> + serialization_chunk(hb, start, count, &cur, &el_count); >> + end = cur + el_count; >> + > > the same here. >> + while (cur != end) { >> + memcpy(cur, buf, sizeof(*cur)); >> + >> + if (BITS_PER_LONG == 32) { >> + le32_to_cpus((uint32_t *)cur); >> + } else { >> + le64_to_cpus((uint64_t *)cur); >> + } >> + >> + buf += sizeof(unsigned long); >> + cur++; >> + } >> + if (finish) { >> + hbitmap_deserialize_finish(hb); >> + } >> +} >> + >> +void hbitmap_deserialize_zeroes(HBitmap *hb, uint64_t start, uint64_t >> count, >> + bool finish) >> +{ >> + uint64_t el_count; >> + unsigned long *first; >> + >> + if (!count) { >> + return; >> + } >> + serialization_chunk(hb, start, count, &first, &el_count); >> + >> + memset(first, 0, el_count * sizeof(unsigned long)); >> + if (finish) { >> + hbitmap_deserialize_finish(hb); >> + } >> +} >> + >> +void hbitmap_deserialize_finish(HBitmap *bitmap) >> +{ >> + int64_t i, size, prev_size; >> + int lev; >> + >> + /* restore levels starting from penultimate to zero level, assuming >> + * that the last level is ok */ >> + size = MAX((bitmap->size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1); >> + for (lev = HBITMAP_LEVELS - 1; lev-- > 0; ) { >> + prev_size = size; >> + size = MAX((size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1); >> + memset(bitmap->levels[lev], 0, size * sizeof(unsigned long)); >> + >> + for (i = 0; i < prev_size; ++i) { >> + if (bitmap->levels[lev + 1][i]) { >> + bitmap->levels[lev][i >> BITS_PER_LEVEL] |= >> + 1UL << (i & (BITS_PER_LONG - 1)); >> + } >> + } >> + } >> + >> + bitmap->levels[0][0] |= 1UL << (BITS_PER_LONG - 1); >> +} >> + >> void hbitmap_free(HBitmap *hb) >> { >> unsigned i; > >