On 06/28/2016 10:15 AM, Vladimir Sementsov-Ogievskiy wrote:
> On 03.06.2016 07:32, Fam Zheng wrote:
>> From: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
>>
>> Functions to serialize / deserialize(restore) HBitmap. HBitmap should be
>> saved to linear sequence of bits independently of endianness and bitmap
>> array element (unsigned long) size. Therefore Little Endian is chosen.
>>
>> These functions are appropriate for dirty bitmap migration, restoring
>> the bitmap in several steps is available. To save performance, every
>> step writes only the last level of the bitmap. All other levels are
>> restored by hbitmap_deserialize_finish() as a last step of restoring.
>> So, HBitmap is inconsistent while restoring.
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
>> [Fix left shift operand to 1UL; add "finish" parameter. - Fam]
>> Signed-off-by: Fam Zheng <f...@redhat.com>
>> Reviewed-by: Max Reitz <mre...@redhat.com>
>> ---
>>   include/qemu/hbitmap.h |  79 ++++++++++++++++++++++++++++
>>   util/hbitmap.c         | 137
>> +++++++++++++++++++++++++++++++++++++++++++++++++
>>   2 files changed, 216 insertions(+)
>>
>> diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
>> index f8ed058..26cac7d 100644
>> --- a/include/qemu/hbitmap.h
>> +++ b/include/qemu/hbitmap.h
>> @@ -146,6 +146,85 @@ void hbitmap_reset_all(HBitmap *hb);
>>   bool hbitmap_get(const HBitmap *hb, uint64_t item);
>>     /**
>> + * hbitmap_serialization_granularity:
>> + * @hb: HBitmap to operate on.
>> + *
>> + * Granularity of serialization chunks, used by other serialization
>> functions.
>> + * For every chunk:
>> + * 1. Chunk start should be aligned to this granularity.
>> + * 2. Chunk size should be aligned too, except for last chunk (for which
>> + *      start + count == hb->size)
>> + */
>> +uint64_t hbitmap_serialization_granularity(const HBitmap *hb);
>> +
>> +/**
>> + * hbitmap_serialization_size:
>> + * @hb: HBitmap to operate on.
>> + * @start: Starting bit
>> + * @count: Number of bits
>> + *
>> + * Return number of bytes hbitmap_(de)serialize_part needs
>> + */
>> +uint64_t hbitmap_serialization_size(const HBitmap *hb,
>> +                                    uint64_t start, uint64_t count);
>> +
>> +/**
>> + * hbitmap_serialize_part
>> + * @hb: HBitmap to operate on.
>> + * @buf: Buffer to store serialized bitmap.
>> + * @start: First bit to store.
>> + * @count: Number of bits to store.
>> + *
>> + * Stores HBitmap data corresponding to given region. The format of
>> saved data
>> + * is linear sequence of bits, so it can be used by
>> hbitmap_deserialize_part
>> + * independently of endianness and size of HBitmap level array elements
>> + */
>> +void hbitmap_serialize_part(const HBitmap *hb, uint8_t *buf,
>> +                            uint64_t start, uint64_t count);
>> +
>> +/**
>> + * hbitmap_deserialize_part
>> + * @hb: HBitmap to operate on.
>> + * @buf: Buffer to restore bitmap data from.
>> + * @start: First bit to restore.
>> + * @count: Number of bits to restore.
>> + * @finish: Whether to call hbitmap_deserialize_finish automatically.
>> + *
>> + * Restores HBitmap data corresponding to given region. The format is
>> the same
>> + * as for hbitmap_serialize_part.
>> + *
>> + * If @finish is false, caller must call hbitmap_serialize_finish
>> before using
>> + * the bitmap.
>> + */
>> +void hbitmap_deserialize_part(HBitmap *hb, uint8_t *buf,
>> +                              uint64_t start, uint64_t count,
>> +                              bool finish);
>> +
>> +/**
>> + * hbitmap_deserialize_zeroes
>> + * @hb: HBitmap to operate on.
>> + * @start: First bit to restore.
>> + * @count: Number of bits to restore.
>> + * @finish: Whether to call hbitmap_deserialize_finish automatically.
>> + *
>> + * Fills the bitmap with zeroes.
>> + *
>> + * If @finish is false, caller must call hbitmap_serialize_finish
>> before using
>> + * the bitmap.
>> + */
>> +void hbitmap_deserialize_zeroes(HBitmap *hb, uint64_t start, uint64_t
>> count,
>> +                                bool finish);
>> +
>> +/**
>> + * hbitmap_deserialize_finish
>> + * @hb: HBitmap to operate on.
>> + *
>> + * Repair HBitmap after calling hbitmap_deserialize_data. Actually,
>> all HBitmap
>> + * layers are restored here.
>> + */
>> +void hbitmap_deserialize_finish(HBitmap *hb);
>> +
>> +/**
>>    * hbitmap_free:
>>    * @hb: HBitmap to operate on.
>>    *
>> diff --git a/util/hbitmap.c b/util/hbitmap.c
>> index 13c0df5..70dc99b 100644
>> --- a/util/hbitmap.c
>> +++ b/util/hbitmap.c
>> @@ -395,6 +395,143 @@ bool hbitmap_get(const HBitmap *hb, uint64_t item)
>>       return (hb->levels[HBITMAP_LEVELS - 1][pos >> BITS_PER_LEVEL] &
>> bit) != 0;
>>   }
>>   +uint64_t hbitmap_serialization_granularity(const HBitmap *hb)
>> +{
>> +    /* Require at least 64 bit granularity to be safe on both 64 bit
>> and 32 bit
>> +     * hosts. */
>> +    return 64 << hb->granularity;
>> +}
>> +
>> +/* Start should be aligned to serialization granularity, chunk size
>> should be
>> + * aligned to serialization granularity too, except for last chunk.
>> + */
>> +static void serialization_chunk(const HBitmap *hb,
>> +                                uint64_t start, uint64_t count,
>> +                                unsigned long **first_el, size_t
>> *el_count)
>> +{
>> +    uint64_t last = start + count - 1;
>> +    uint64_t gran = hbitmap_serialization_granularity(hb);
>> +
>> +    assert((start & (gran - 1)) == 0);
>> +    assert((last >> hb->granularity) < hb->size);
>> +    if ((last >> hb->granularity) != hb->size - 1) {
>> +        assert((count & (gran - 1)) == 0);
>> +    }
>> +
>> +    start = (start >> hb->granularity) >> BITS_PER_LEVEL;
>> +    last = (last >> hb->granularity) >> BITS_PER_LEVEL;
>> +
>> +    *first_el = &hb->levels[HBITMAP_LEVELS - 1][start];
>> +    *el_count = last - start + 1;
>> +}
>> +
>> +uint64_t hbitmap_serialization_size(const HBitmap *hb,
>> +                                    uint64_t start, uint64_t count)
>> +{
>> +    uint64_t el_count;
>> +    unsigned long *cur;
>> +
>> +    if (!count) {
>> +        return 0;
>> +    }
>> +    serialization_chunk(hb, start, count, &cur, &el_count);
>> +
>> +    return el_count * sizeof(unsigned long);
>> +}
>> +
>> +void hbitmap_serialize_part(const HBitmap *hb, uint8_t *buf,
>> +                            uint64_t start, uint64_t count)
>> +{
>> +    uint64_t el_count;
>> +    unsigned long *cur, *end;
>> +
>> +    if (!count) {
>> +        return;
>> +    }
>> +    serialization_chunk(hb, start, count, &cur, &el_count);
>> +    end = cur + el_count;
>> +
> 
> I think it would be better to memcpy the whole part and then loop with
> cpu_to_le64. This loop will go out from compilation for LE case (I
> hope), so in LE case it would be just one memcpy. However in BE case it
> should be better to memcpy large part first and then convert words.

I tried this out on a 64MB chunk of memory on my x86-64 system: both
approaches were roughly the same speed. I couldn't produce a tangible
difference.

LE-vs-BE conversion made a big difference, but even on the version where
we memcpy all-at-once and then perform no-ops, it wasn't noticeably
different or faster than the bunch-of-memcpy approach.

Going to consider this a premature optimization.

>> +    while (cur != end) {
>> +        unsigned long el =
>> +            (BITS_PER_LONG == 32 ? cpu_to_le32(*cur) :
>> cpu_to_le64(*cur));
>> +
>> +        memcpy(buf, &el, sizeof(el));
>> +        buf += sizeof(el);
>> +        cur++;
>> +    }
>> +}
>> +
>> +void hbitmap_deserialize_part(HBitmap *hb, uint8_t *buf,
>> +                              uint64_t start, uint64_t count,
>> +                              bool finish)
>> +{
>> +    uint64_t el_count;
>> +    unsigned long *cur, *end;
>> +
>> +    if (!count) {
>> +        return;
>> +    }
>> +    serialization_chunk(hb, start, count, &cur, &el_count);
>> +    end = cur + el_count;
>> +
> 
> the same here.
>> +    while (cur != end) {
>> +        memcpy(cur, buf, sizeof(*cur));
>> +
>> +        if (BITS_PER_LONG == 32) {
>> +            le32_to_cpus((uint32_t *)cur);
>> +        } else {
>> +            le64_to_cpus((uint64_t *)cur);
>> +        }
>> +
>> +        buf += sizeof(unsigned long);
>> +        cur++;
>> +    }
>> +    if (finish) {
>> +        hbitmap_deserialize_finish(hb);
>> +    }
>> +}
>> +
>> +void hbitmap_deserialize_zeroes(HBitmap *hb, uint64_t start, uint64_t
>> count,
>> +                                bool finish)
>> +{
>> +    uint64_t el_count;
>> +    unsigned long *first;
>> +
>> +    if (!count) {
>> +        return;
>> +    }
>> +    serialization_chunk(hb, start, count, &first, &el_count);
>> +
>> +    memset(first, 0, el_count * sizeof(unsigned long));
>> +    if (finish) {
>> +        hbitmap_deserialize_finish(hb);
>> +    }
>> +}
>> +
>> +void hbitmap_deserialize_finish(HBitmap *bitmap)
>> +{
>> +    int64_t i, size, prev_size;
>> +    int lev;
>> +
>> +    /* restore levels starting from penultimate to zero level, assuming
>> +     * that the last level is ok */
>> +    size = MAX((bitmap->size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1);
>> +    for (lev = HBITMAP_LEVELS - 1; lev-- > 0; ) {
>> +        prev_size = size;
>> +        size = MAX((size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1);
>> +        memset(bitmap->levels[lev], 0, size * sizeof(unsigned long));
>> +
>> +        for (i = 0; i < prev_size; ++i) {
>> +            if (bitmap->levels[lev + 1][i]) {
>> +                bitmap->levels[lev][i >> BITS_PER_LEVEL] |=
>> +                    1UL << (i & (BITS_PER_LONG - 1));
>> +            }
>> +        }
>> +    }
>> +
>> +    bitmap->levels[0][0] |= 1UL << (BITS_PER_LONG - 1);
>> +}
>> +
>>   void hbitmap_free(HBitmap *hb)
>>   {
>>       unsigned i;
> 
> 

Reply via email to