On 06/09/2013 12:52 PM, Benjamin Herrenschmidt wrote:
> On Sun, 2013-06-09 at 12:34 +1000, Alexey Kardashevskiy wrote:
>> It is _live_ migration, the source sends changes, same pages can change and
>> be sent several times. So we would need to turn tracking on on the
>> destination to know if some page was received from the source or changed by
>> the destination itself (by writing there bios/firmware images, etc) and
>> then clear pages which were touched by the destination and were not sent by
>> the source.
> Or we can set some kind of flag so that when creating a "migration
> target" VM we don't load all these things into memory.

How would we do that? The platform initialization code does not have a clue
whether it is going to receive a migrated host or not.

>> Or we do not make guesses, the source sends everything and the destination
>> simply checks if a page which is empty on the source is empty on the
>> destination and avoid writing zeroes to it. Looks simpler to me and this is
>> what the new patch does.
> But you end up sending a lot of zero's ... is the migration compressed
> (I am not familiar with it at all) ? If it is, that shouldn't be a big
> deal, but else it feels to me that you should be able to send a special
> packet instead that says "all zeros" because you'll potentially have an
> awful lot of these.

It is compressed exactly as you described..

> Ben.
>>>>> Also, you mean following code is from qemu and it does not allocate
>>>>> memory with you gcc right? Maybe it is related to KVM, how about
>>>>> turn off KVM and retry following code in qemu?
>>>>>> #include <stdio.h>
>>>>>> #include <stdlib.h>
>>>>>> #include <assert.h>
>>>>>> #include <unistd.h>
>>>>>> #include <sys/resource.h>
>>>>>> #include <inttypes.h>
>>>>>> #include <string.h>
>>>>>> #include <sys/mman.h>
>>>>>> #include <errno.h>
>>>>>> #if defined __SSE2__
>>>>>> #include <emmintrin.h>
>>>>>> #define VECTYPE        __m128i
>>>>>> #define SPLAT(p)       _mm_set1_epi8(*(p))
>>>>>> #define ALL_EQ(v1, v2) (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) ==
>>>>>> 0xFFFF)
>>>>>> #else
>>>>>> #define VECTYPE        unsigned long
>>>>>> #define SPLAT(p)       (*(p) * (~0UL / 255))
>>>>>> #define ALL_EQ(v1, v2) ((v1) == (v2))
>>>>>> #endif
>>>>>> /* Round number down to multiple */
>>>>>> #define QEMU_ALIGN_DOWN(n, m) ((n) / (m) * (m))
>>>>>> /* Round number up to multiple */
>>>>>> #define QEMU_ALIGN_UP(n, m) QEMU_ALIGN_DOWN((n) + (m) - 1, (m))
>>>>>> #define QEMU_VMALLOC_ALIGN (256 * 4096)
>>>>>> /* alloc shared memory pages */
>>>>>> void *qemu_anon_ram_alloc(size_t size)
>>>>>> {
>>>>>>       size_t align = QEMU_VMALLOC_ALIGN;
>>>>>>       size_t total = size + align - getpagesize();
>>>>>>       void *ptr = mmap(0, total, PROT_READ | PROT_WRITE,
>>>>>>                        MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
>>>>>>       size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) -
>>>>>> (uintptr_t)ptr;
>>>>>>       if (ptr == MAP_FAILED) {
>>>>>>           fprintf(stderr, "Failed to allocate %zu B: %s\n",
>>>>>>                   size, strerror(errno));
>>>>>>           abort();
>>>>>>       }
>>>>>>       ptr += offset;
>>>>>>       total -= offset;
>>>>>>       if (offset > 0) {
>>>>>>           munmap(ptr - offset, offset);
>>>>>>       }
>>>>>>       if (total > size) {
>>>>>>           munmap(ptr + size, total - size);
>>>>>>       }
>>>>>>       return ptr;
>>>>>> }
>>>>>> static inline int
>>>>>> can_use_buffer_find_nonzero_offset(const void *buf, size_t len)
>>>>>> {
>>>>>>                      * sizeof(VECTYPE)) == 0
>>>>>>               && ((uintptr_t) buf) % sizeof(VECTYPE) == 0);
>>>>>> }
>>>>>> size_t buffer_find_nonzero_offset(const void *buf, size_t len)
>>>>>> {
>>>>>>       const VECTYPE *p = buf;
>>>>>>       const VECTYPE zero = (VECTYPE){0};
>>>>>>       size_t i;
>>>>>>       if (!len) {
>>>>>>           return 0;
>>>>>>       }
>>>>>>       assert(can_use_buffer_find_nonzero_offset(buf, len));
>>>>>>       for (i = 0; i < BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; i++) {
>>>>>>           if (!ALL_EQ(p[i], zero)) {
>>>>>>               return i * sizeof(VECTYPE);
>>>>>>           }
>>>>>>       }
>>>>>>            i < len / sizeof(VECTYPE);
>>>>>>           VECTYPE tmp0 = p[i + 0] | p[i + 1];
>>>>>>           VECTYPE tmp1 = p[i + 2] | p[i + 3];
>>>>>>           VECTYPE tmp2 = p[i + 4] | p[i + 5];
>>>>>>           VECTYPE tmp3 = p[i + 6] | p[i + 7];
>>>>>>           VECTYPE tmp01 = tmp0 | tmp1;
>>>>>>           VECTYPE tmp23 = tmp2 | tmp3;
>>>>>>           if (!ALL_EQ(tmp01 | tmp23, zero)) {
>>>>>>               break;
>>>>>>           }
>>>>>>       }
>>>>>>       return i * sizeof(VECTYPE);
>>>>>> }
>>>>>> int main()
>>>>>> {
>>>>>>        //char *x = malloc(1024 << 20);
>>>>>>        char *x = qemu_anon_ram_alloc(1024 << 20);
>>>>>>        int i, j;
>>>>>>        int ret = 0;
>>>>>>        struct rusage rusage;
>>>>>>        for (i = 0; i < 500; i ++) {
>>>>>>            for (j = 0; j < 10 << 20; j += 4096) {
>>>>>>                 ret += buffer_find_nonzero_offset((char*) (x + (i << 20)
>>>>>> + j), 4096);
>>>>>>            }
>>>>>>            getrusage( RUSAGE_SELF, &rusage );
>>>>>>            printf("read offset: %d kB, RSS size: %ld kB", ((i+1) << 10),
>>>>>> rusage.ru_maxrss);
>>>>>>            getchar();
>>>>>>        }
>>>>>>        printf("%d zero pages\n", ret);
>>>>>> }


Reply via email to