[Qemu-devel] [PATCH v2 0/6] postcopy block time calculation + ppc32 build fix
V1-V2 accidentally appeared __nocheck after rebase this patch set also rebased after latest pull request This patch set includes patches which were reverted by commit ee86981bd, due to build problem on 32 powerpc/arm architecture. Also it includes patch to fix build ([PATCH v4] migration: change blocktime type to uint32_t), but that patch was merged into: migration: add postcopy blocktime ctx into MigrationIncomingState migration: calculate vCPU blocktime on dst side migration: add postcopy total blocktime into query-migrate based on commit c6740fc88ecd8f5cf3cf3185ee112c3eea41caa2 "hw/rdma: Implementation of PVRDMA device" Alexey Perevalov (6): migration: introduce postcopy-blocktime capability migration: add postcopy blocktime ctx into MigrationIncomingState migration: calculate vCPU blocktime on dst side migration: postcopy_blocktime documentation migration: add blocktime calculation into migration-test migration: add postcopy total blocktime into query-migrate docs/devel/migration.rst | 14 +++ hmp.c| 15 +++ migration/migration.c| 51 - migration/migration.h| 13 +++ migration/postcopy-ram.c | 268 ++- migration/trace-events | 6 +- qapi/migration.json | 17 ++- tests/migration-test.c | 16 +++ 8 files changed, 392 insertions(+), 8 deletions(-) -- 2.7.4
[Qemu-devel] [PATCH v2 2/6] migration: add postcopy blocktime ctx into MigrationIncomingState
This patch adds request to kernel space for UFFD_FEATURE_THREAD_ID, in case this feature is provided by kernel. PostcopyBlocktimeContext is encapsulated inside postcopy-ram.c, due to it being a postcopy-only feature. Also it defines PostcopyBlocktimeContext's instance live time. Information from PostcopyBlocktimeContext instance will be provided much after postcopy migration end, instance of PostcopyBlocktimeContext will live till QEMU exit, but part of it (vcpu_addr, page_fault_vcpu_time) used only during calculation, will be released when postcopy ended or failed. To enable postcopy blocktime calculation on destination, need to request proper compatibility (Patch for documentation will be at the tail of the patch set). As an example following command enable that capability, assume QEMU was started with -chardev socket,id=charmonitor,path=/var/lib/migrate-vm-monitor.sock option to control it [root@host]#printf "{\"execute\" : \"qmp_capabilities\"}\r\n \ {\"execute\": \"migrate-set-capabilities\" , \"arguments\": { \"capabilities\": [ { \"capability\": \"postcopy-blocktime\", \"state\": true } ] } }" | nc -U /var/lib/migrate-vm-monitor.sock Or just with HMP (qemu) migrate_set_capability postcopy-blocktime on Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> Reviewed-by: Juan Quintela <quint...@redhat.com> Signed-off-by: Juan Quintela <quint...@redhat.com> --- migration/migration.h| 8 +++ migration/postcopy-ram.c | 61 2 files changed, 69 insertions(+) diff --git a/migration/migration.h b/migration/migration.h index 46a50bc..6d9aaeb 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -22,6 +22,8 @@ #include "hw/qdev.h" #include "io/channel.h" +struct PostcopyBlocktimeContext; + /* State for the incoming migration */ struct MigrationIncomingState { QEMUFile *from_src_file; @@ -65,6 +67,12 @@ struct MigrationIncomingState { /* The coroutine we should enter (back) after failover */ Coroutine *migration_incoming_co; QemuSemaphore colo_incoming_sem; + +/* + * PostcopyBlocktimeContext to keep information for postcopy + * live migration, to calculate vCPU block time + * */ +struct PostcopyBlocktimeContext *blocktime_ctx; }; MigrationIncomingState *migration_incoming_get_current(void); diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index efd7793..66f1df9 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -90,6 +90,54 @@ int postcopy_notify(enum PostcopyNotifyReason reason, Error **errp) #include #include +typedef struct PostcopyBlocktimeContext { +/* time when page fault initiated per vCPU */ +uint32_t *page_fault_vcpu_time; +/* page address per vCPU */ +uintptr_t *vcpu_addr; +uint32_t total_blocktime; +/* blocktime per vCPU */ +uint32_t *vcpu_blocktime; +/* point in time when last page fault was initiated */ +uint32_t last_begin; +/* number of vCPU are suspended */ +int smp_cpus_down; +uint64_t start_time; + +/* + * Handler for exit event, necessary for + * releasing whole blocktime_ctx + */ +Notifier exit_notifier; +} PostcopyBlocktimeContext; + +static void destroy_blocktime_context(struct PostcopyBlocktimeContext *ctx) +{ +g_free(ctx->page_fault_vcpu_time); +g_free(ctx->vcpu_addr); +g_free(ctx->vcpu_blocktime); +g_free(ctx); +} + +static void migration_exit_cb(Notifier *n, void *data) +{ +PostcopyBlocktimeContext *ctx = container_of(n, PostcopyBlocktimeContext, + exit_notifier); +destroy_blocktime_context(ctx); +} + +static struct PostcopyBlocktimeContext *blocktime_context_new(void) +{ +PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1); +ctx->page_fault_vcpu_time = g_new0(uint32_t, smp_cpus); +ctx->vcpu_addr = g_new0(uintptr_t, smp_cpus); +ctx->vcpu_blocktime = g_new0(uint32_t, smp_cpus); + +ctx->exit_notifier.notify = migration_exit_cb; +ctx->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); +qemu_add_exit_notifier(>exit_notifier); +return ctx; +} /** * receive_ufd_features: check userfault fd features, to request only supported @@ -182,6 +230,19 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) } } +#ifdef UFFD_FEATURE_THREAD_ID +if (migrate_postcopy_blocktime() && mis && +UFFD_FEATURE_THREAD_ID & supported_features) { +/* kernel supports that feature */ +/* don't create blocktime_context if it exists */ +if (!mis->blocktime_ctx) { +mis->blocktime_ctx = blocktime_context_new(); +} +
[Qemu-devel] [PATCH v2 6/6] migration: add postcopy total blocktime into query-migrate
Postcopy total blocktime is available on destination side only. But query-migrate was possible only for source. This patch adds ability to call query-migrate on destination. To be able to see postcopy blocktime, need to request postcopy-blocktime capability. The query-migrate command will show following sample result: {"return": "postcopy-vcpu-blocktime": [115, 100], "status": "completed", "postcopy-blocktime": 100 }} postcopy_vcpu_blocktime contains list, where the first item is the first vCPU in QEMU. This patch has a drawback, it combines states of incoming and outgoing migration. Ongoing migration state will overwrite incoming state. Looks like better to separate query-migrate for incoming and outgoing migration or add parameter to indicate type of migration. Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> Reviewed-by: Juan Quintela <quint...@redhat.com> Signed-off-by: Juan Quintela <quint...@redhat.com> --- hmp.c| 15 + migration/migration.c| 42 migration/migration.h| 4 migration/postcopy-ram.c | 56 migration/trace-events | 1 + qapi/migration.json | 11 +- 6 files changed, 124 insertions(+), 5 deletions(-) diff --git a/hmp.c b/hmp.c index 679467d..6c51df5 100644 --- a/hmp.c +++ b/hmp.c @@ -274,6 +274,21 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) info->cpu_throttle_percentage); } +if (info->has_postcopy_blocktime) { +monitor_printf(mon, "postcopy blocktime: %u\n", + info->postcopy_blocktime); +} + +if (info->has_postcopy_vcpu_blocktime) { +Visitor *v; +char *str; +v = string_output_visitor_new(false, ); +visit_type_uint32List(v, NULL, >postcopy_vcpu_blocktime, NULL); +visit_complete(v, ); +monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str); +g_free(str); +visit_free(v); +} qapi_free_MigrationInfo(info); qapi_free_MigrationCapabilityStatusList(caps); } diff --git a/migration/migration.c b/migration/migration.c index f95a7f3..71b0f19 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -630,14 +630,15 @@ static void populate_disk_info(MigrationInfo *info) } } -MigrationInfo *qmp_query_migrate(Error **errp) +static void fill_source_migration_info(MigrationInfo *info) { -MigrationInfo *info = g_malloc0(sizeof(*info)); MigrationState *s = migrate_get_current(); switch (s->state) { case MIGRATION_STATUS_NONE: /* no migration has happened ever */ +/* do not overwrite destination migration status */ +return; break; case MIGRATION_STATUS_SETUP: info->has_status = true; @@ -688,8 +689,6 @@ MigrationInfo *qmp_query_migrate(Error **errp) break; } info->status = s->state; - -return info; } /** @@ -753,6 +752,41 @@ static bool migrate_caps_check(bool *cap_list, return true; } +static void fill_destination_migration_info(MigrationInfo *info) +{ +MigrationIncomingState *mis = migration_incoming_get_current(); + +switch (mis->state) { +case MIGRATION_STATUS_NONE: +return; +break; +case MIGRATION_STATUS_SETUP: +case MIGRATION_STATUS_CANCELLING: +case MIGRATION_STATUS_CANCELLED: +case MIGRATION_STATUS_ACTIVE: +case MIGRATION_STATUS_POSTCOPY_ACTIVE: +case MIGRATION_STATUS_FAILED: +case MIGRATION_STATUS_COLO: +info->has_status = true; +break; +case MIGRATION_STATUS_COMPLETED: +info->has_status = true; +fill_destination_postcopy_migration_info(info); +break; +} +info->status = mis->state; +} + +MigrationInfo *qmp_query_migrate(Error **errp) +{ +MigrationInfo *info = g_malloc0(sizeof(*info)); + +fill_destination_migration_info(info); +fill_source_migration_info(info); + +return info; +} + void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, Error **errp) { diff --git a/migration/migration.h b/migration/migration.h index 6d9aaeb..7c69598 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -77,6 +77,10 @@ struct MigrationIncomingState { MigrationIncomingState *migration_incoming_get_current(void); void migration_incoming_state_destroy(void); +/* + * Functions to work with blocktime context + */ +void fill_destination_postcopy_migration_info(MigrationInfo *info); #define TYPE_MIGRATION "migration" diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 6b01884..bbc1a95 100644 --- a/migration/postcopy-ram.c +++ b/migration
[Qemu-devel] [PATCH v2 1/6] migration: introduce postcopy-blocktime capability
Right now it could be used on destination side to enable vCPU blocktime calculation for postcopy live migration. vCPU blocktime - it's time since vCPU thread was put into interruptible sleep, till memory page was copied and thread awake. Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> Reviewed-by: Juan Quintela <quint...@redhat.com> Signed-off-by: Juan Quintela <quint...@redhat.com> --- migration/migration.c | 9 + migration/migration.h | 1 + qapi/migration.json | 6 +- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/migration/migration.c b/migration/migration.c index fc629e5..f95a7f3 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1540,6 +1540,15 @@ bool migrate_zero_blocks(void) return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; } +bool migrate_postcopy_blocktime(void) +{ +MigrationState *s; + +s = migrate_get_current(); + +return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; +} + bool migrate_use_compression(void) { MigrationState *s; diff --git a/migration/migration.h b/migration/migration.h index 8d2f320..46a50bc 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -230,6 +230,7 @@ int migrate_compress_level(void); int migrate_compress_threads(void); int migrate_decompress_threads(void); bool migrate_use_events(void); +bool migrate_postcopy_blocktime(void); /* Sending on the return path - generic and then for each message type */ void migrate_send_rp_shut(MigrationIncomingState *mis, diff --git a/qapi/migration.json b/qapi/migration.json index 9d0bf82..24bfc19 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -354,16 +354,20 @@ # # @x-multifd: Use more than one fd for migration (since 2.11) # +# # @dirty-bitmaps: If enabled, QEMU will migrate named dirty bitmaps. # (since 2.12) # +# @postcopy-blocktime: Calculate downtime for postcopy live migration +# (since 2.13) +# # Since: 1.2 ## { 'enum': 'MigrationCapability', 'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks', 'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram', 'block', 'return-path', 'pause-before-switchover', 'x-multifd', - 'dirty-bitmaps' ] } + 'dirty-bitmaps', 'postcopy-blocktime' ] } ## # @MigrationCapabilityStatus: -- 2.7.4
[Qemu-devel] [PATCH v2 4/6] migration: postcopy_blocktime documentation
Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> Reviewed-by: Juan Quintela <quint...@redhat.com> Signed-off-by: Juan Quintela <quint...@redhat.com> --- docs/devel/migration.rst | 14 ++ 1 file changed, 14 insertions(+) diff --git a/docs/devel/migration.rst b/docs/devel/migration.rst index e32b087..9342a8a 100644 --- a/docs/devel/migration.rst +++ b/docs/devel/migration.rst @@ -401,6 +401,20 @@ will now cause the transition from precopy to postcopy. It can be issued immediately after migration is started or any time later on. Issuing it after the end of a migration is harmless. +Blocktime is a postcopy live migration metric, intended to show how +long the vCPU was in state of interruptable sleep due to pagefault. +That metric is calculated both for all vCPUs as overlapped value, and +separately for each vCPU. These values are calculated on destination +side. To enable postcopy blocktime calculation, enter following +command on destination monitor: + +``migrate_set_capability postcopy-blocktime on`` + +Postcopy blocktime can be retrieved by query-migrate qmp command. +postcopy-blocktime value of qmp command will show overlapped blocking +time for all vCPU, postcopy-vcpu-blocktime will show list of blocking +time per vCPU. + .. note:: During the postcopy phase, the bandwidth limits set using ``migrate_set_speed`` is ignored (to avoid delaying requested pages that -- 2.7.4
[Qemu-devel] [PATCH v2 3/6] migration: calculate vCPU blocktime on dst side
This patch provides blocktime calculation per vCPU, as a summary and as a overlapped value for all vCPUs. This approach was suggested by Peter Xu, as an improvements of previous approch where QEMU kept tree with faulted page address and cpus bitmask in it. Now QEMU is keeping array with faulted page address as value and vCPU as index. It helps to find proper vCPU at UFFD_COPY time. Also it keeps list for blocktime per vCPU (could be traced with page_fault_addr) Blocktime will not calculated if postcopy_blocktime field of MigrationIncomingState wasn't initialized. Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Reviewed-by: Juan Quintela <quint...@redhat.com> Signed-off-by: Juan Quintela <quint...@redhat.com> --- migration/postcopy-ram.c | 151 ++- migration/trace-events | 5 +- 2 files changed, 154 insertions(+), 2 deletions(-) diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 66f1df9..6b01884 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -636,6 +636,148 @@ int postcopy_request_shared_page(struct PostCopyFD *pcfd, RAMBlock *rb, return 0; } +static int get_mem_fault_cpu_index(uint32_t pid) +{ +CPUState *cpu_iter; + +CPU_FOREACH(cpu_iter) { +if (cpu_iter->thread_id == pid) { +trace_get_mem_fault_cpu_index(cpu_iter->cpu_index, pid); +return cpu_iter->cpu_index; +} +} +trace_get_mem_fault_cpu_index(-1, pid); +return -1; +} + +static uint32_t get_low_time_offset(PostcopyBlocktimeContext *dc) +{ +int64_t start_time_offset = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - +dc->start_time; +return start_time_offset < 1 ? 1 : start_time_offset & UINT32_MAX; +} + +/* + * This function is being called when pagefault occurs. It + * tracks down vCPU blocking time. + * + * @addr: faulted host virtual address + * @ptid: faulted process thread id + * @rb: ramblock appropriate to addr + */ +static void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid, + RAMBlock *rb) +{ +int cpu, already_received; +MigrationIncomingState *mis = migration_incoming_get_current(); +PostcopyBlocktimeContext *dc = mis->blocktime_ctx; +uint32_t low_time_offset; + +if (!dc || ptid == 0) { +return; +} +cpu = get_mem_fault_cpu_index(ptid); +if (cpu < 0) { +return; +} + +low_time_offset = get_low_time_offset(dc); +if (dc->vcpu_addr[cpu] == 0) { +atomic_inc(>smp_cpus_down); +} + +atomic_xchg(>last_begin, low_time_offset); +atomic_xchg(>page_fault_vcpu_time[cpu], low_time_offset); +atomic_xchg(>vcpu_addr[cpu], addr); + +/* check it here, not at the begining of the function, + * due to, check could accur early than bitmap_set in + * qemu_ufd_copy_ioctl */ +already_received = ramblock_recv_bitmap_test(rb, (void *)addr); +if (already_received) { +atomic_xchg(>vcpu_addr[cpu], 0); +atomic_xchg(>page_fault_vcpu_time[cpu], 0); +atomic_dec(>smp_cpus_down); +} +trace_mark_postcopy_blocktime_begin(addr, dc, dc->page_fault_vcpu_time[cpu], +cpu, already_received); +} + +/* + * This function just provide calculated blocktime per cpu and trace it. + * Total blocktime is calculated in mark_postcopy_blocktime_end. + * + * + * Assume we have 3 CPU + * + * S1E1 S1 E1 + * -***xxx***> CPU1 + * + * S2E2 + * xxx---> CPU2 + * + * S3E3 + * xxx---> CPU3 + * + * We have sequence S1,S2,E1,S3,S1,E2,E3,E1 + * S2,E1 - doesn't match condition due to sequence S1,S2,E1 doesn't include CPU3 + * S3,S1,E2 - sequence includes all CPUs, in this case overlap will be S1,E2 - + *it's a part of total blocktime. + * S1 - here is last_begin + * Legend of the picture is following: + * * - means blocktime per vCPU + * x - means overlapped blocktime (total blocktime) + * + * @addr: host virtual address + */ +static void mark_postcopy_blocktime_end(uintptr_t addr) +{ +MigrationIncomingState *mis = migration_incoming_get_current(); +PostcopyBlocktimeContext *dc = mis->blocktime_ctx; +int i, affected_cpu = 0; +bool vcpu_total_blocktime = false; +uint32_t read_vcpu_time, low_time_offset; + +if (!dc) { +return; +} + +low_time_offset = get_low_time_offset(dc); +/* lookup cpu, to clear it, + * that algorithm looks straighforward, but it's not + * op
[Qemu-devel] [PATCH v2 5/6] migration: add blocktime calculation into migration-test
This patch just requests blocktime calculation, and check it in case when UFFD_FEATURE_THREAD_ID feature is set on the host. Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> Reviewed-by: Juan Quintela <quint...@redhat.com> Signed-off-by: Juan Quintela <quint...@redhat.com> --- tests/migration-test.c | 16 1 file changed, 16 insertions(+) diff --git a/tests/migration-test.c b/tests/migration-test.c index 422bf1a..dde7c46 100644 --- a/tests/migration-test.c +++ b/tests/migration-test.c @@ -26,6 +26,7 @@ const unsigned start_address = 1024 * 1024; const unsigned end_address = 100 * 1024 * 1024; bool got_stop; +static bool uffd_feature_thread_id; #if defined(__linux__) #include @@ -55,6 +56,7 @@ static bool ufd_version_check(void) g_test_message("Skipping test: UFFDIO_API failed"); return false; } +uffd_feature_thread_id = api_struct.features & UFFD_FEATURE_THREAD_ID; ioctl_mask = (__u64)1 << _UFFDIO_REGISTER | (__u64)1 << _UFFDIO_UNREGISTER; @@ -223,6 +225,16 @@ static uint64_t get_migration_pass(QTestState *who) return result; } +static void read_blocktime(QTestState *who) +{ +QDict *rsp, *rsp_return; + +rsp = wait_command(who, "{ 'execute': 'query-migrate' }"); +rsp_return = qdict_get_qdict(rsp, "return"); +g_assert(qdict_haskey(rsp_return, "postcopy-blocktime")); +QDECREF(rsp); +} + static void wait_for_migration_complete(QTestState *who) { while (true) { @@ -533,6 +545,7 @@ static void test_migrate(void) migrate_set_capability(from, "postcopy-ram", "true"); migrate_set_capability(to, "postcopy-ram", "true"); +migrate_set_capability(to, "postcopy-blocktime", "true"); /* We want to pick a speed slow enough that the test completes * quickly, but that it doesn't complete precopy even on a slow @@ -559,6 +572,9 @@ static void test_migrate(void) wait_for_serial("dest_serial"); wait_for_migration_complete(from); +if (uffd_feature_thread_id) { +read_blocktime(to); +} g_free(uri); test_migrate_end(from, to, true); -- 2.7.4
[Qemu-devel] [PATCH v1 6/6] migration: add postcopy total blocktime into query-migrate
Postcopy total blocktime is available on destination side only. But query-migrate was possible only for source. This patch adds ability to call query-migrate on destination. To be able to see postcopy blocktime, need to request postcopy-blocktime capability. The query-migrate command will show following sample result: {"return": "postcopy-vcpu-blocktime": [115, 100], "status": "completed", "postcopy-blocktime": 100 }} postcopy_vcpu_blocktime contains list, where the first item is the first vCPU in QEMU. This patch has a drawback, it combines states of incoming and outgoing migration. Ongoing migration state will overwrite incoming state. Looks like better to separate query-migrate for incoming and outgoing migration or add parameter to indicate type of migration. Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> Reviewed-by: Juan Quintela <quint...@redhat.com> Signed-off-by: Juan Quintela <quint...@redhat.com> --- hmp.c| 15 + migration/migration.c| 42 migration/migration.h| 4 migration/postcopy-ram.c | 56 migration/trace-events | 1 + qapi/migration.json | 11 +- 6 files changed, 124 insertions(+), 5 deletions(-) diff --git a/hmp.c b/hmp.c index 016cb5c..4539f77 100644 --- a/hmp.c +++ b/hmp.c @@ -274,6 +274,21 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) info->cpu_throttle_percentage); } +if (info->has_postcopy_blocktime) { +monitor_printf(mon, "postcopy blocktime: %u\n", + info->postcopy_blocktime); +} + +if (info->has_postcopy_vcpu_blocktime) { +Visitor *v; +char *str; +v = string_output_visitor_new(false, ); +visit_type_uint32List(v, NULL, >postcopy_vcpu_blocktime, NULL); +visit_complete(v, ); +monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str); +g_free(str); +visit_free(v); +} qapi_free_MigrationInfo(info); qapi_free_MigrationCapabilityStatusList(caps); } diff --git a/migration/migration.c b/migration/migration.c index 0ee9c1f..ae8890e 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -619,14 +619,15 @@ static void populate_disk_info(MigrationInfo *info) } } -MigrationInfo *qmp_query_migrate(Error **errp) +static void fill_source_migration_info(MigrationInfo *info) { -MigrationInfo *info = g_malloc0(sizeof(*info)); MigrationState *s = migrate_get_current(); switch (s->state) { case MIGRATION_STATUS_NONE: /* no migration has happened ever */ +/* do not overwrite destination migration status */ +return; break; case MIGRATION_STATUS_SETUP: info->has_status = true; @@ -677,8 +678,6 @@ MigrationInfo *qmp_query_migrate(Error **errp) break; } info->status = s->state; - -return info; } /** @@ -742,6 +741,41 @@ static bool migrate_caps_check(bool *cap_list, return true; } +static void fill_destination_migration_info(MigrationInfo *info) +{ +MigrationIncomingState *mis = migration_incoming_get_current(); + +switch (mis->state) { +case MIGRATION_STATUS_NONE: +return; +break; +case MIGRATION_STATUS_SETUP: +case MIGRATION_STATUS_CANCELLING: +case MIGRATION_STATUS_CANCELLED: +case MIGRATION_STATUS_ACTIVE: +case MIGRATION_STATUS_POSTCOPY_ACTIVE: +case MIGRATION_STATUS_FAILED: +case MIGRATION_STATUS_COLO: +info->has_status = true; +break; +case MIGRATION_STATUS_COMPLETED: +info->has_status = true; +fill_destination_postcopy_migration_info(info); +break; +} +info->status = mis->state; +} + +MigrationInfo *qmp_query_migrate(Error **errp) +{ +MigrationInfo *info = g_malloc0(sizeof(*info)); + +fill_destination_migration_info(info); +fill_source_migration_info(info); + +return info; +} + void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, Error **errp) { diff --git a/migration/migration.h b/migration/migration.h index 3ead619..a09277e 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -73,6 +73,10 @@ struct MigrationIncomingState { MigrationIncomingState *migration_incoming_get_current(void); void migration_incoming_state_destroy(void); +/* + * Functions to work with blocktime context + */ +void fill_destination_postcopy_migration_info(MigrationInfo *info); #define TYPE_MIGRATION "migration" diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 9e51e84..c46225c 100644 --- a/migration/postcopy-ram.c +++ b/migration
[Qemu-devel] [PATCH v1 4/6] migration: postcopy_blocktime documentation
Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> Reviewed-by: Juan Quintela <quint...@redhat.com> Signed-off-by: Juan Quintela <quint...@redhat.com> --- docs/devel/migration.rst | 14 ++ 1 file changed, 14 insertions(+) diff --git a/docs/devel/migration.rst b/docs/devel/migration.rst index 9d1b765..18cd952 100644 --- a/docs/devel/migration.rst +++ b/docs/devel/migration.rst @@ -401,6 +401,20 @@ will now cause the transition from precopy to postcopy. It can be issued immediately after migration is started or any time later on. Issuing it after the end of a migration is harmless. +Blocktime is a postcopy live migration metric, intended to show how +long the vCPU was in state of interruptable sleep due to pagefault. +That metric is calculated both for all vCPUs as overlapped value, and +separately for each vCPU. These values are calculated on destination +side. To enable postcopy blocktime calculation, enter following +command on destination monitor: + +``migrate_set_capability postcopy-blocktime on`` + +Postcopy blocktime can be retrieved by query-migrate qmp command. +postcopy-blocktime value of qmp command will show overlapped blocking +time for all vCPU, postcopy-vcpu-blocktime will show list of blocking +time per vCPU. + .. note:: During the postcopy phase, the bandwidth limits set using ``migrate_set_speed`` is ignored (to avoid delaying requested pages that -- 2.7.4
[Qemu-devel] [PATCH v1 3/6] migration: calculate vCPU blocktime on dst side
This patch provides blocktime calculation per vCPU, as a summary and as a overlapped value for all vCPUs. This approach was suggested by Peter Xu, as an improvements of previous approch where QEMU kept tree with faulted page address and cpus bitmask in it. Now QEMU is keeping array with faulted page address as value and vCPU as index. It helps to find proper vCPU at UFFD_COPY time. Also it keeps list for blocktime per vCPU (could be traced with page_fault_addr) Blocktime will not calculated if postcopy_blocktime field of MigrationIncomingState wasn't initialized. Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Reviewed-by: Juan Quintela <quint...@redhat.com> Signed-off-by: Juan Quintela <quint...@redhat.com> --- migration/postcopy-ram.c | 149 ++- migration/trace-events | 5 +- 2 files changed, 152 insertions(+), 2 deletions(-) diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 9144102..9e51e84 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -546,6 +546,148 @@ static int ram_block_enable_notify(const char *block_name, void *host_addr, return 0; } +static int get_mem_fault_cpu_index(uint32_t pid) +{ +CPUState *cpu_iter; + +CPU_FOREACH(cpu_iter) { +if (cpu_iter->thread_id == pid) { +trace_get_mem_fault_cpu_index(cpu_iter->cpu_index, pid); +return cpu_iter->cpu_index; +} +} +trace_get_mem_fault_cpu_index(-1, pid); +return -1; +} + +static uint32_t get_low_time_offset(PostcopyBlocktimeContext *dc) +{ +int64_t start_time_offset = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - +dc->start_time; +return start_time_offset < 1 ? 1 : start_time_offset & UINT32_MAX; +} + +/* + * This function is being called when pagefault occurs. It + * tracks down vCPU blocking time. + * + * @addr: faulted host virtual address + * @ptid: faulted process thread id + * @rb: ramblock appropriate to addr + */ +static void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid, + RAMBlock *rb) +{ +int cpu, already_received; +MigrationIncomingState *mis = migration_incoming_get_current(); +PostcopyBlocktimeContext *dc = mis->blocktime_ctx; +uint32_t low_time_offset; + +if (!dc || ptid == 0) { +return; +} +cpu = get_mem_fault_cpu_index(ptid); +if (cpu < 0) { +return; +} + +low_time_offset = get_low_time_offset(dc); +if (dc->vcpu_addr[cpu] == 0) { +atomic_inc(>smp_cpus_down); +} + +atomic_xchg(>last_begin, low_time_offset); +atomic_xchg(>page_fault_vcpu_time[cpu], low_time_offset); +atomic_xchg(>vcpu_addr[cpu], addr); + +/* check it here, not at the begining of the function, + * due to, check could accur early than bitmap_set in + * qemu_ufd_copy_ioctl */ +already_received = ramblock_recv_bitmap_test(rb, (void *)addr); +if (already_received) { +atomic_xchg__nocheck(>vcpu_addr[cpu], 0); +atomic_xchg__nocheck(>page_fault_vcpu_time[cpu], 0); +atomic_dec(>smp_cpus_down); +} +trace_mark_postcopy_blocktime_begin(addr, dc, dc->page_fault_vcpu_time[cpu], +cpu, already_received); +} + +/* + * This function just provide calculated blocktime per cpu and trace it. + * Total blocktime is calculated in mark_postcopy_blocktime_end. + * + * + * Assume we have 3 CPU + * + * S1E1 S1 E1 + * -***xxx***> CPU1 + * + * S2E2 + * xxx---> CPU2 + * + * S3E3 + * xxx---> CPU3 + * + * We have sequence S1,S2,E1,S3,S1,E2,E3,E1 + * S2,E1 - doesn't match condition due to sequence S1,S2,E1 doesn't include CPU3 + * S3,S1,E2 - sequence includes all CPUs, in this case overlap will be S1,E2 - + *it's a part of total blocktime. + * S1 - here is last_begin + * Legend of the picture is following: + * * - means blocktime per vCPU + * x - means overlapped blocktime (total blocktime) + * + * @addr: host virtual address + */ +static void mark_postcopy_blocktime_end(uintptr_t addr) +{ +MigrationIncomingState *mis = migration_incoming_get_current(); +PostcopyBlocktimeContext *dc = mis->blocktime_ctx; +int i, affected_cpu = 0; +bool vcpu_total_blocktime = false; +uint32_t read_vcpu_time, low_time_offset; + +if (!dc) { +return; +} + +low_time_offset = get_low_time_offset(dc); +/* lookup cpu, to clear it, + * that algorithm looks stra
[Qemu-devel] [PATCH v1 1/6] migration: introduce postcopy-blocktime capability
Right now it could be used on destination side to enable vCPU blocktime calculation for postcopy live migration. vCPU blocktime - it's time since vCPU thread was put into interruptible sleep, till memory page was copied and thread awake. Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> Reviewed-by: Juan Quintela <quint...@redhat.com> Signed-off-by: Juan Quintela <quint...@redhat.com> --- migration/migration.c | 9 + migration/migration.h | 1 + qapi/migration.json | 6 +- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/migration/migration.c b/migration/migration.c index e345d0c..0ee9c1f 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1527,6 +1527,15 @@ bool migrate_zero_blocks(void) return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; } +bool migrate_postcopy_blocktime(void) +{ +MigrationState *s; + +s = migrate_get_current(); + +return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; +} + bool migrate_use_compression(void) { MigrationState *s; diff --git a/migration/migration.h b/migration/migration.h index 08c5d2d..aa7a884 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -225,6 +225,7 @@ int migrate_compress_level(void); int migrate_compress_threads(void); int migrate_decompress_threads(void); bool migrate_use_events(void); +bool migrate_postcopy_blocktime(void); /* Sending on the return path - generic and then for each message type */ void migrate_send_rp_shut(MigrationIncomingState *mis, diff --git a/qapi/migration.json b/qapi/migration.json index 7f465a1..676ef06 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -354,12 +354,16 @@ # # @x-multifd: Use more than one fd for migration (since 2.11) # +# @postcopy-blocktime: Calculate downtime for postcopy live migration +# (since 2.12) +# # Since: 1.2 ## { 'enum': 'MigrationCapability', 'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks', 'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram', - 'block', 'return-path', 'pause-before-switchover', 'x-multifd' ] } + 'block', 'return-path', 'pause-before-switchover', 'x-multifd', + 'postcopy-blocktime' ] } ## # @MigrationCapabilityStatus: -- 2.7.4
[Qemu-devel] [PATCH v1 2/6] migration: add postcopy blocktime ctx into MigrationIncomingState
This patch adds request to kernel space for UFFD_FEATURE_THREAD_ID, in case this feature is provided by kernel. PostcopyBlocktimeContext is encapsulated inside postcopy-ram.c, due to it being a postcopy-only feature. Also it defines PostcopyBlocktimeContext's instance live time. Information from PostcopyBlocktimeContext instance will be provided much after postcopy migration end, instance of PostcopyBlocktimeContext will live till QEMU exit, but part of it (vcpu_addr, page_fault_vcpu_time) used only during calculation, will be released when postcopy ended or failed. To enable postcopy blocktime calculation on destination, need to request proper compatibility (Patch for documentation will be at the tail of the patch set). As an example following command enable that capability, assume QEMU was started with -chardev socket,id=charmonitor,path=/var/lib/migrate-vm-monitor.sock option to control it [root@host]#printf "{\"execute\" : \"qmp_capabilities\"}\r\n \ {\"execute\": \"migrate-set-capabilities\" , \"arguments\": { \"capabilities\": [ { \"capability\": \"postcopy-blocktime\", \"state\": true } ] } }" | nc -U /var/lib/migrate-vm-monitor.sock Or just with HMP (qemu) migrate_set_capability postcopy-blocktime on Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> Reviewed-by: Juan Quintela <quint...@redhat.com> Signed-off-by: Juan Quintela <quint...@redhat.com> --- migration/migration.h| 8 +++ migration/postcopy-ram.c | 61 2 files changed, 69 insertions(+) diff --git a/migration/migration.h b/migration/migration.h index aa7a884..3ead619 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -22,6 +22,8 @@ #include "hw/qdev.h" #include "io/channel.h" +struct PostcopyBlocktimeContext; + /* State for the incoming migration */ struct MigrationIncomingState { QEMUFile *from_src_file; @@ -61,6 +63,12 @@ struct MigrationIncomingState { /* The coroutine we should enter (back) after failover */ Coroutine *migration_incoming_co; QemuSemaphore colo_incoming_sem; + +/* + * PostcopyBlocktimeContext to keep information for postcopy + * live migration, to calculate vCPU block time + * */ +struct PostcopyBlocktimeContext *blocktime_ctx; }; MigrationIncomingState *migration_incoming_get_current(void); diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 032abfb..9144102 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -61,6 +61,54 @@ struct PostcopyDiscardState { #include #include +typedef struct PostcopyBlocktimeContext { +/* time when page fault initiated per vCPU */ +uint32_t *page_fault_vcpu_time; +/* page address per vCPU */ +uintptr_t *vcpu_addr; +uint32_t total_blocktime; +/* blocktime per vCPU */ +uint32_t *vcpu_blocktime; +/* point in time when last page fault was initiated */ +uint32_t last_begin; +/* number of vCPU are suspended */ +int smp_cpus_down; +uint64_t start_time; + +/* + * Handler for exit event, necessary for + * releasing whole blocktime_ctx + */ +Notifier exit_notifier; +} PostcopyBlocktimeContext; + +static void destroy_blocktime_context(struct PostcopyBlocktimeContext *ctx) +{ +g_free(ctx->page_fault_vcpu_time); +g_free(ctx->vcpu_addr); +g_free(ctx->vcpu_blocktime); +g_free(ctx); +} + +static void migration_exit_cb(Notifier *n, void *data) +{ +PostcopyBlocktimeContext *ctx = container_of(n, PostcopyBlocktimeContext, + exit_notifier); +destroy_blocktime_context(ctx); +} + +static struct PostcopyBlocktimeContext *blocktime_context_new(void) +{ +PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1); +ctx->page_fault_vcpu_time = g_new0(uint32_t, smp_cpus); +ctx->vcpu_addr = g_new0(uintptr_t, smp_cpus); +ctx->vcpu_blocktime = g_new0(uint32_t, smp_cpus); + +ctx->exit_notifier.notify = migration_exit_cb; +ctx->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); +qemu_add_exit_notifier(>exit_notifier); +return ctx; +} /** * receive_ufd_features: check userfault fd features, to request only supported @@ -153,6 +201,19 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) } } +#ifdef UFFD_FEATURE_THREAD_ID +if (migrate_postcopy_blocktime() && mis && +UFFD_FEATURE_THREAD_ID & supported_features) { +/* kernel supports that feature */ +/* don't create blocktime_context if it exists */ +if (!mis->blocktime_ctx) { +mis->blocktime_ctx = blocktime_context_new(); +} + +asked_features |= UFFD_FEATURE_THREAD_ID; +} +#endif + /* * request features, even if asked_features is 0, due to * kernel expects UFFD_API before UFFDIO_REGISTER, per -- 2.7.4
[Qemu-devel] [PATCH v1 5/6] migration: add blocktime calculation into migration-test
This patch just requests blocktime calculation, and check it in case when UFFD_FEATURE_THREAD_ID feature is set on the host. Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> Reviewed-by: Juan Quintela <quint...@redhat.com> Signed-off-by: Juan Quintela <quint...@redhat.com> --- tests/migration-test.c | 16 1 file changed, 16 insertions(+) diff --git a/tests/migration-test.c b/tests/migration-test.c index 74f9361..259acfb 100644 --- a/tests/migration-test.c +++ b/tests/migration-test.c @@ -26,6 +26,7 @@ const unsigned start_address = 1024 * 1024; const unsigned end_address = 100 * 1024 * 1024; bool got_stop; +static bool uffd_feature_thread_id; #if defined(__linux__) #include @@ -55,6 +56,7 @@ static bool ufd_version_check(void) g_test_message("Skipping test: UFFDIO_API failed"); return false; } +uffd_feature_thread_id = api_struct.features & UFFD_FEATURE_THREAD_ID; ioctl_mask = (__u64)1 << _UFFDIO_REGISTER | (__u64)1 << _UFFDIO_UNREGISTER; @@ -223,6 +225,16 @@ static uint64_t get_migration_pass(QTestState *who) return result; } +static void read_blocktime(QTestState *who) +{ +QDict *rsp, *rsp_return; + +rsp = wait_command(who, "{ 'execute': 'query-migrate' }"); +rsp_return = qdict_get_qdict(rsp, "return"); +g_assert(qdict_haskey(rsp_return, "postcopy-blocktime")); +QDECREF(rsp); +} + static void wait_for_migration_complete(QTestState *who) { while (true) { @@ -522,6 +534,7 @@ static void test_migrate(void) migrate_set_capability(from, "postcopy-ram", "true"); migrate_set_capability(to, "postcopy-ram", "true"); +migrate_set_capability(to, "postcopy-blocktime", "true"); /* We want to pick a speed slow enough that the test completes * quickly, but that it doesn't complete precopy even on a slow @@ -548,6 +561,9 @@ static void test_migrate(void) wait_for_serial("dest_serial"); wait_for_migration_complete(from); +if (uffd_feature_thread_id) { +read_blocktime(to); +} g_free(uri); test_migrate_end(from, to, true); -- 2.7.4
[Qemu-devel] [PATCH v1 0/6] postcopy block time calculation + ppc32 build fix
This patch set includes patches which were reverted by commit ee86981bd, due to build problem on 32 powerpc/arm architecture. Also it includes patch to fix build ([PATCH v4] migration: change blocktime type to uint32_t), but that patch was merged into: migration: add postcopy blocktime ctx into MigrationIncomingState migration: calculate vCPU blocktime on dst side migration: add postcopy total blocktime into query-migrate based on commit 12c06d6f967a63515399b9e1f6a40f5ce871a8b7 Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging Alexey Perevalov (6): migration: introduce postcopy-blocktime capability migration: add postcopy blocktime ctx into MigrationIncomingState migration: calculate vCPU blocktime on dst side migration: postcopy_blocktime documentation migration: add blocktime calculation into migration-test migration: add postcopy total blocktime into query-migrate docs/devel/migration.rst | 14 +++ hmp.c| 15 +++ migration/migration.c| 51 - migration/migration.h| 13 +++ migration/postcopy-ram.c | 266 ++- migration/trace-events | 6 +- qapi/migration.json | 17 ++- tests/migration-test.c | 16 +++ 8 files changed, 390 insertions(+), 8 deletions(-) -- 2.7.4
Re: [Qemu-devel] [PATCH v4] migration: change blocktime type to uint32_t
On 03/08/2018 03:59 PM, Dr. David Alan Gilbert wrote: * Alexey Perevalov (a.pereva...@samsung.com) wrote: Initially int64_t was used, but on PowerPC architecture, clang doesn't have atomic_*_8 function, so it produces link time error. QEMU is working with time as with 64bit value, but by fact 32 bit is enough with CLOCK_REALTIME. In this case blocktime will keep only 1200 hours time interval. Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> Acked-by: Eric Blake <ebl...@redhat.com> Hi Alexey, So yes, I think that works; can you repost this merged with your full set of block-time code; because we had to revert it, we need to put it back all in again. Do you mean just to add this patch to set of reverted patches, or merge code of this patch into "migration: calculate vCPU blocktime on dst side"? Thanks, Dave --- hmp.c| 4 ++-- migration/postcopy-ram.c | 52 migration/trace-events | 4 ++-- qapi/migration.json | 4 ++-- 4 files changed, 36 insertions(+), 28 deletions(-) diff --git a/hmp.c b/hmp.c index be091e0..ec90043 100644 --- a/hmp.c +++ b/hmp.c @@ -267,7 +267,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) } if (info->has_postcopy_blocktime) { -monitor_printf(mon, "postcopy blocktime: %" PRId64 "\n", +monitor_printf(mon, "postcopy blocktime: %u\n", info->postcopy_blocktime); } @@ -275,7 +275,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) Visitor *v; char *str; v = string_output_visitor_new(false, ); -visit_type_int64List(v, NULL, >postcopy_vcpu_blocktime, NULL); +visit_type_uint32List(v, NULL, >postcopy_vcpu_blocktime, NULL); visit_complete(v, ); monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str); g_free(str); diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 05475e0..c46225c 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -63,16 +63,17 @@ struct PostcopyDiscardState { typedef struct PostcopyBlocktimeContext { /* time when page fault initiated per vCPU */ -int64_t *page_fault_vcpu_time; +uint32_t *page_fault_vcpu_time; /* page address per vCPU */ uintptr_t *vcpu_addr; -int64_t total_blocktime; +uint32_t total_blocktime; /* blocktime per vCPU */ -int64_t *vcpu_blocktime; +uint32_t *vcpu_blocktime; /* point in time when last page fault was initiated */ -int64_t last_begin; +uint32_t last_begin; /* number of vCPU are suspended */ int smp_cpus_down; +uint64_t start_time; /* * Handler for exit event, necessary for @@ -99,22 +100,23 @@ static void migration_exit_cb(Notifier *n, void *data) static struct PostcopyBlocktimeContext *blocktime_context_new(void) { PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1); -ctx->page_fault_vcpu_time = g_new0(int64_t, smp_cpus); +ctx->page_fault_vcpu_time = g_new0(uint32_t, smp_cpus); ctx->vcpu_addr = g_new0(uintptr_t, smp_cpus); -ctx->vcpu_blocktime = g_new0(int64_t, smp_cpus); +ctx->vcpu_blocktime = g_new0(uint32_t, smp_cpus); ctx->exit_notifier.notify = migration_exit_cb; +ctx->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); qemu_add_exit_notifier(>exit_notifier); return ctx; } -static int64List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx) +static uint32List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx) { -int64List *list = NULL, *entry = NULL; +uint32List *list = NULL, *entry = NULL; int i; for (i = smp_cpus - 1; i >= 0; i--) { -entry = g_new0(int64List, 1); +entry = g_new0(uint32List, 1); entry->value = ctx->vcpu_blocktime[i]; entry->next = list; list = entry; @@ -145,7 +147,7 @@ void fill_destination_postcopy_migration_info(MigrationInfo *info) info->postcopy_vcpu_blocktime = get_vcpu_blocktime_list(bc); } -static uint64_t get_postcopy_total_blocktime(void) +static uint32_t get_postcopy_total_blocktime(void) { MigrationIncomingState *mis = migration_incoming_get_current(); PostcopyBlocktimeContext *bc = mis->blocktime_ctx; @@ -610,6 +612,13 @@ static int get_mem_fault_cpu_index(uint32_t pid) return -1; } +static uint32_t get_low_time_offset(PostcopyBlocktimeContext *dc) +{ +int64_t start_time_offset = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - +dc->start_time; +return start_time_offset < 1 ? 1 : start_time_offset & UINT32_MAX; +} + /* * This function is being called when pagefault occurs. It * tracks down vCPU blocking time.
[Qemu-devel] [PATCH v4] Fix build on ppc platform in migration/postcopy-ram.c
V4->V3 - common helper was introduced and sanity check for probable time jumps (comment from David) V2->V3 - use UINT32_MAX instead of 0x (comment from Philippe) - use lelative time to avoid milliseconds overflow in uint32 (comment from David) V2->V1 This is a second version: - comment from David about casting David was right, I tried to find it in standard, but it was implicitly described for me, so part of standard: 1. When a value with integer type is converted to another integer type other than _Bool, if the value can be represented by the new type, it is unchanged. 2. Otherwise, if the new type is unsigned, the value is converted by repeatedly adding or subtracting one more than the maximum value that can be represented in the new type until the value is in the range of the new type. Initial message: It was a problem with 64 atomics on ppc in migration/postcopy-ram.c reported by Philippe Mathieu-Daudé <f4...@amsat.org>. Tested in Debian on qemu-system-ppc and in Ubuntu16.04 on i386. This commit is based on commit afd3397a8149d8b645004e459bf2002d78f5e267 Merge remote-tracking branch 'remotes/stefanha/tags/tracing-pull-request' into staging but with all necessary commit reverted in ee86981bda9ecd40c8daf81b7307b1d2aff68174 Alexey Perevalov (1): migration: change blocktime type to uint32_t hmp.c| 4 ++-- migration/postcopy-ram.c | 52 migration/trace-events | 4 ++-- qapi/migration.json | 4 ++-- 4 files changed, 36 insertions(+), 28 deletions(-) -- 2.7.4
[Qemu-devel] [PATCH v4] migration: change blocktime type to uint32_t
Initially int64_t was used, but on PowerPC architecture, clang doesn't have atomic_*_8 function, so it produces link time error. QEMU is working with time as with 64bit value, but by fact 32 bit is enough with CLOCK_REALTIME. In this case blocktime will keep only 1200 hours time interval. Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> Acked-by: Eric Blake <ebl...@redhat.com> --- hmp.c| 4 ++-- migration/postcopy-ram.c | 52 migration/trace-events | 4 ++-- qapi/migration.json | 4 ++-- 4 files changed, 36 insertions(+), 28 deletions(-) diff --git a/hmp.c b/hmp.c index be091e0..ec90043 100644 --- a/hmp.c +++ b/hmp.c @@ -267,7 +267,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) } if (info->has_postcopy_blocktime) { -monitor_printf(mon, "postcopy blocktime: %" PRId64 "\n", +monitor_printf(mon, "postcopy blocktime: %u\n", info->postcopy_blocktime); } @@ -275,7 +275,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) Visitor *v; char *str; v = string_output_visitor_new(false, ); -visit_type_int64List(v, NULL, >postcopy_vcpu_blocktime, NULL); +visit_type_uint32List(v, NULL, >postcopy_vcpu_blocktime, NULL); visit_complete(v, ); monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str); g_free(str); diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 05475e0..c46225c 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -63,16 +63,17 @@ struct PostcopyDiscardState { typedef struct PostcopyBlocktimeContext { /* time when page fault initiated per vCPU */ -int64_t *page_fault_vcpu_time; +uint32_t *page_fault_vcpu_time; /* page address per vCPU */ uintptr_t *vcpu_addr; -int64_t total_blocktime; +uint32_t total_blocktime; /* blocktime per vCPU */ -int64_t *vcpu_blocktime; +uint32_t *vcpu_blocktime; /* point in time when last page fault was initiated */ -int64_t last_begin; +uint32_t last_begin; /* number of vCPU are suspended */ int smp_cpus_down; +uint64_t start_time; /* * Handler for exit event, necessary for @@ -99,22 +100,23 @@ static void migration_exit_cb(Notifier *n, void *data) static struct PostcopyBlocktimeContext *blocktime_context_new(void) { PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1); -ctx->page_fault_vcpu_time = g_new0(int64_t, smp_cpus); +ctx->page_fault_vcpu_time = g_new0(uint32_t, smp_cpus); ctx->vcpu_addr = g_new0(uintptr_t, smp_cpus); -ctx->vcpu_blocktime = g_new0(int64_t, smp_cpus); +ctx->vcpu_blocktime = g_new0(uint32_t, smp_cpus); ctx->exit_notifier.notify = migration_exit_cb; +ctx->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); qemu_add_exit_notifier(>exit_notifier); return ctx; } -static int64List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx) +static uint32List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx) { -int64List *list = NULL, *entry = NULL; +uint32List *list = NULL, *entry = NULL; int i; for (i = smp_cpus - 1; i >= 0; i--) { -entry = g_new0(int64List, 1); +entry = g_new0(uint32List, 1); entry->value = ctx->vcpu_blocktime[i]; entry->next = list; list = entry; @@ -145,7 +147,7 @@ void fill_destination_postcopy_migration_info(MigrationInfo *info) info->postcopy_vcpu_blocktime = get_vcpu_blocktime_list(bc); } -static uint64_t get_postcopy_total_blocktime(void) +static uint32_t get_postcopy_total_blocktime(void) { MigrationIncomingState *mis = migration_incoming_get_current(); PostcopyBlocktimeContext *bc = mis->blocktime_ctx; @@ -610,6 +612,13 @@ static int get_mem_fault_cpu_index(uint32_t pid) return -1; } +static uint32_t get_low_time_offset(PostcopyBlocktimeContext *dc) +{ +int64_t start_time_offset = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - +dc->start_time; +return start_time_offset < 1 ? 1 : start_time_offset & UINT32_MAX; +} + /* * This function is being called when pagefault occurs. It * tracks down vCPU blocking time. @@ -624,7 +633,7 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid, int cpu, already_received; MigrationIncomingState *mis = migration_incoming_get_current(); PostcopyBlocktimeContext *dc = mis->blocktime_ctx; -int64_t now_ms; +uint32_t low_time_offset; if (!dc || ptid == 0) { return; @@ -634,14 +643,14 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid, return; } -now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
[Qemu-devel] [PATCH v3] migration: change blocktime type to uint32_t
Initially int64_t was used, but on PowerPC architecture, clang doesn't have atomic_*_8 function, so it produces link time error. QEMU is working with time as with 64bit value, but by fact 32 bit is enough with CLOCK_REALTIME. In this case blocktime will keep only 1200 hours time interval. Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> Acked-by: Eric Blake <ebl...@redhat.com> --- hmp.c| 4 ++-- migration/postcopy-ram.c | 48 +++- migration/trace-events | 4 ++-- qapi/migration.json | 4 ++-- 4 files changed, 33 insertions(+), 27 deletions(-) diff --git a/hmp.c b/hmp.c index c6bab53..3c376b3 100644 --- a/hmp.c +++ b/hmp.c @@ -265,7 +265,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) } if (info->has_postcopy_blocktime) { -monitor_printf(mon, "postcopy blocktime: %" PRId64 "\n", +monitor_printf(mon, "postcopy blocktime: %u\n", info->postcopy_blocktime); } @@ -273,7 +273,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) Visitor *v; char *str; v = string_output_visitor_new(false, ); -visit_type_int64List(v, NULL, >postcopy_vcpu_blocktime, NULL); +visit_type_uint32List(v, NULL, >postcopy_vcpu_blocktime, NULL); visit_complete(v, ); monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str); g_free(str); diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 7814da5..6694fd3 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -63,16 +63,17 @@ struct PostcopyDiscardState { typedef struct PostcopyBlocktimeContext { /* time when page fault initiated per vCPU */ -int64_t *page_fault_vcpu_time; +uint32_t *page_fault_vcpu_time; /* page address per vCPU */ uintptr_t *vcpu_addr; -int64_t total_blocktime; +uint32_t total_blocktime; /* blocktime per vCPU */ -int64_t *vcpu_blocktime; +uint32_t *vcpu_blocktime; /* point in time when last page fault was initiated */ -int64_t last_begin; +uint32_t last_begin; /* number of vCPU are suspended */ int smp_cpus_down; +uint64_t start_time; /* * Handler for exit event, necessary for @@ -99,22 +100,23 @@ static void migration_exit_cb(Notifier *n, void *data) static struct PostcopyBlocktimeContext *blocktime_context_new(void) { PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1); -ctx->page_fault_vcpu_time = g_new0(int64_t, smp_cpus); +ctx->page_fault_vcpu_time = g_new0(uint32_t, smp_cpus); ctx->vcpu_addr = g_new0(uintptr_t, smp_cpus); -ctx->vcpu_blocktime = g_new0(int64_t, smp_cpus); +ctx->vcpu_blocktime = g_new0(uint32_t, smp_cpus); ctx->exit_notifier.notify = migration_exit_cb; +ctx->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); qemu_add_exit_notifier(>exit_notifier); return ctx; } -static int64List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx) +static uint32List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx) { -int64List *list = NULL, *entry = NULL; +uint32List *list = NULL, *entry = NULL; int i; for (i = smp_cpus - 1; i >= 0; i--) { -entry = g_new0(int64List, 1); +entry = g_new0(uint32List, 1); entry->value = ctx->vcpu_blocktime[i]; entry->next = list; list = entry; @@ -145,7 +147,7 @@ void fill_destination_postcopy_migration_info(MigrationInfo *info) info->postcopy_vcpu_blocktime = get_vcpu_blocktime_list(bc); } -static uint64_t get_postcopy_total_blocktime(void) +static uint32_t get_postcopy_total_blocktime(void) { MigrationIncomingState *mis = migration_incoming_get_current(); PostcopyBlocktimeContext *bc = mis->blocktime_ctx; @@ -633,7 +635,8 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid, int cpu, already_received; MigrationIncomingState *mis = migration_incoming_get_current(); PostcopyBlocktimeContext *dc = mis->blocktime_ctx; -int64_t now_ms; +int64_t start_time_offset; +uint32_t low_time_offset; if (!dc || ptid == 0) { return; @@ -643,14 +646,15 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid, return; } -now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); +start_time_offset = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - dc->start_time; +low_time_offset = start_time_offset & UINT32_MAX; if (dc->vcpu_addr[cpu] == 0) { atomic_inc(>smp_cpus_down); } -atomic_xchg__nocheck(>last_begin, now_ms); -atomic_xchg__nocheck(>page_fault_vcpu_time[cpu], now_ms); -atomic_xchg__nocheck(>vcpu_addr[cpu], addr); +atomic_xchg(>last_begin, low_time_offset); +atomic_xc
[Qemu-devel] [PATCH v3] Fix build on ppc platform in migration/postcopy-ram.c
V2->V3 - use UINT32_MAX instead of 0x (comment from Philippe) - use lelative time to avoid milliseconds overflow in uint32 (comment from David) V2->V1 This is a second version: - comment from David about casting David was right, I tried to find it in standard, but it was implicitly described for me, so part of standard: 1. When a value with integer type is converted to another integer type other than _Bool, if the value can be represented by the new type, it is unchanged. 2. Otherwise, if the new type is unsigned, the value is converted by repeatedly adding or subtracting one more than the maximum value that can be represented in the new type until the value is in the range of the new type. Initial message: It was a problem with 64 atomics on ppc in migration/postcopy-ram.c reported by Philippe Mathieu-Daudé <f4...@amsat.org>. Tested in Debian on qemu-system-ppc and in Ubuntu16.04 on i386. This commit is based on commit ee264eb32c14f076c964fc34ee66f6f95cce2080 "Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.12-20180121' into staging" Alexey Perevalov (1): migration: change blocktime type to uint32_t hmp.c| 4 ++-- migration/postcopy-ram.c | 48 +++- migration/trace-events | 4 ++-- qapi/migration.json | 4 ++-- 4 files changed, 33 insertions(+), 27 deletions(-) -- 2.7.4
Re: [Qemu-devel] [PATCH v1] migration: change blocktime type to uint32_t
On 01/25/2018 11:02 PM, Dr. David Alan Gilbert wrote: * Alexey Perevalov (a.pereva...@samsung.com) wrote: Initially int64_t was used, but on PowerPC architecture, clang doesn't have atomic_*_8 function, so it produces link time error. QEMU is working with time as with 64bit value, but by fact 32 bit is enough with CLOCK_REALTIME. In this case blocktime will keep only 1200 hours time interval. Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- hmp.c| 4 ++-- migration/postcopy-ram.c | 47 ++- migration/trace-events | 4 ++-- qapi/migration.json | 4 ++-- 4 files changed, 36 insertions(+), 23 deletions(-) diff --git a/hmp.c b/hmp.c index c6bab53..3c376b3 100644 --- a/hmp.c +++ b/hmp.c @@ -265,7 +265,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) } if (info->has_postcopy_blocktime) { -monitor_printf(mon, "postcopy blocktime: %" PRId64 "\n", +monitor_printf(mon, "postcopy blocktime: %u\n", info->postcopy_blocktime); } @@ -273,7 +273,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) Visitor *v; char *str; v = string_output_visitor_new(false, ); -visit_type_int64List(v, NULL, >postcopy_vcpu_blocktime, NULL); +visit_type_uint32List(v, NULL, >postcopy_vcpu_blocktime, NULL); visit_complete(v, ); monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str); g_free(str); diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 7814da5..ce91de8 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -63,14 +63,14 @@ struct PostcopyDiscardState { typedef struct PostcopyBlocktimeContext { /* time when page fault initiated per vCPU */ -int64_t *page_fault_vcpu_time; +uint32_t *page_fault_vcpu_time; /* page address per vCPU */ uintptr_t *vcpu_addr; -int64_t total_blocktime; +uint32_t total_blocktime; /* blocktime per vCPU */ -int64_t *vcpu_blocktime; +uint32_t *vcpu_blocktime; /* point in time when last page fault was initiated */ -int64_t last_begin; +uint32_t last_begin; /* number of vCPU are suspended */ int smp_cpus_down; @@ -99,22 +99,22 @@ static void migration_exit_cb(Notifier *n, void *data) static struct PostcopyBlocktimeContext *blocktime_context_new(void) { PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1); -ctx->page_fault_vcpu_time = g_new0(int64_t, smp_cpus); +ctx->page_fault_vcpu_time = g_new0(uint32_t, smp_cpus); ctx->vcpu_addr = g_new0(uintptr_t, smp_cpus); -ctx->vcpu_blocktime = g_new0(int64_t, smp_cpus); +ctx->vcpu_blocktime = g_new0(uint32_t, smp_cpus); ctx->exit_notifier.notify = migration_exit_cb; qemu_add_exit_notifier(>exit_notifier); return ctx; } -static int64List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx) +static uint32List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx) { -int64List *list = NULL, *entry = NULL; +uint32List *list = NULL, *entry = NULL; int i; for (i = smp_cpus - 1; i >= 0; i--) { -entry = g_new0(int64List, 1); +entry = g_new0(uint32List, 1); entry->value = ctx->vcpu_blocktime[i]; entry->next = list; list = entry; @@ -145,7 +145,7 @@ void fill_destination_postcopy_migration_info(MigrationInfo *info) info->postcopy_vcpu_blocktime = get_vcpu_blocktime_list(bc); } -static uint64_t get_postcopy_total_blocktime(void) +static uint32_t get_postcopy_total_blocktime(void) { MigrationIncomingState *mis = migration_incoming_get_current(); PostcopyBlocktimeContext *bc = mis->blocktime_ctx; @@ -619,6 +619,16 @@ static int get_mem_fault_cpu_index(uint32_t pid) return -1; } +static uint32_t get_least_significant_part(int64_t value) +{ +unsigned char *t = (unsigned char *) +#if defined(HOST_WORDS_BIGENDIAN) +return t[4] << 24 | t[5] << 16 | t[6] << 8 | t[7] << 0; +#else +return t[0] << 0 | t[1] << 8 | t[2] << 16 | t[3] << 24; +#endif /* HOST_WORDS_BIGENDIAN */ +} This doesn't feel right. Firstly, we're doing a check for the magic value of read_vcpu_time==0 in mark_postcopy_blocktime_end - so we have to be careful not to hit it. Just masking the bottom 32bits of time means we've got a (rare) chance of hitting that; but we've got a much less rare change of hitting the case where one of the measurements happens after the roll-over of the bottom 32bits. If you stored a time at the start of the postcopy and just subtracted that from 'now' you're probably OK though. Here not so clearly for me. I though we get some "now" and it doesn't matter how, anding or shi
Re: [Qemu-devel] [PATCH v2] migration: change blocktime type to uint32_t
On 01/26/2018 09:14 PM, Dr. David Alan Gilbert wrote: * Alexey Perevalov (a.pereva...@samsung.com) wrote: Initially int64_t was used, but on PowerPC architecture, clang doesn't have atomic_*_8 function, so it produces link time error. QEMU is working with time as with 64bit value, but by fact 32 bit is enough with CLOCK_REALTIME. In this case blocktime will keep only 1200 hours time interval. Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> Acked-by: Eric Blake <ebl...@redhat.com> --- hmp.c| 4 ++-- migration/postcopy-ram.c | 37 - migration/trace-events | 4 ++-- qapi/migration.json | 4 ++-- 4 files changed, 26 insertions(+), 23 deletions(-) diff --git a/hmp.c b/hmp.c index c6bab53..3c376b3 100644 --- a/hmp.c +++ b/hmp.c @@ -265,7 +265,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) } if (info->has_postcopy_blocktime) { -monitor_printf(mon, "postcopy blocktime: %" PRId64 "\n", +monitor_printf(mon, "postcopy blocktime: %u\n", info->postcopy_blocktime); } @@ -273,7 +273,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) Visitor *v; char *str; v = string_output_visitor_new(false, ); -visit_type_int64List(v, NULL, >postcopy_vcpu_blocktime, NULL); +visit_type_uint32List(v, NULL, >postcopy_vcpu_blocktime, NULL); visit_complete(v, ); monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str); g_free(str); diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 7814da5..bd08c24 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -63,14 +63,14 @@ struct PostcopyDiscardState { typedef struct PostcopyBlocktimeContext { /* time when page fault initiated per vCPU */ -int64_t *page_fault_vcpu_time; +uint32_t *page_fault_vcpu_time; /* page address per vCPU */ uintptr_t *vcpu_addr; -int64_t total_blocktime; +uint32_t total_blocktime; /* blocktime per vCPU */ -int64_t *vcpu_blocktime; +uint32_t *vcpu_blocktime; /* point in time when last page fault was initiated */ -int64_t last_begin; +uint32_t last_begin; /* number of vCPU are suspended */ int smp_cpus_down; @@ -99,22 +99,22 @@ static void migration_exit_cb(Notifier *n, void *data) static struct PostcopyBlocktimeContext *blocktime_context_new(void) { PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1); -ctx->page_fault_vcpu_time = g_new0(int64_t, smp_cpus); +ctx->page_fault_vcpu_time = g_new0(uint32_t, smp_cpus); ctx->vcpu_addr = g_new0(uintptr_t, smp_cpus); -ctx->vcpu_blocktime = g_new0(int64_t, smp_cpus); +ctx->vcpu_blocktime = g_new0(uint32_t, smp_cpus); ctx->exit_notifier.notify = migration_exit_cb; qemu_add_exit_notifier(>exit_notifier); return ctx; } -static int64List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx) +static uint32List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx) { -int64List *list = NULL, *entry = NULL; +uint32List *list = NULL, *entry = NULL; int i; for (i = smp_cpus - 1; i >= 0; i--) { -entry = g_new0(int64List, 1); +entry = g_new0(uint32List, 1); entry->value = ctx->vcpu_blocktime[i]; entry->next = list; list = entry; @@ -145,7 +145,7 @@ void fill_destination_postcopy_migration_info(MigrationInfo *info) info->postcopy_vcpu_blocktime = get_vcpu_blocktime_list(bc); } -static uint64_t get_postcopy_total_blocktime(void) +static uint32_t get_postcopy_total_blocktime(void) { MigrationIncomingState *mis = migration_incoming_get_current(); PostcopyBlocktimeContext *bc = mis->blocktime_ctx; @@ -634,6 +634,7 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid, MigrationIncomingState *mis = migration_incoming_get_current(); PostcopyBlocktimeContext *dc = mis->blocktime_ctx; int64_t now_ms; +uint32_t least_now; if (!dc || ptid == 0) { return; @@ -644,13 +645,14 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid, } now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); +least_now = (uint32_t)now_ms; if (dc->vcpu_addr[cpu] == 0) { atomic_inc(>smp_cpus_down); } -atomic_xchg__nocheck(>last_begin, now_ms); -atomic_xchg__nocheck(>page_fault_vcpu_time[cpu], now_ms); -atomic_xchg__nocheck(>vcpu_addr[cpu], addr); +atomic_xchg(>last_begin, least_now); +atomic_xchg(>page_fault_vcpu_time[cpu], least_now); +atomic_xchg(>vcpu_addr[cpu], addr); /* check it here, not at the begining of the function, * due to, ch
[Qemu-devel] [PATCH v2] Fix build on ppc platform in migration/postcopy-ram.c
This is a second version: - comment from David about casting David was right, I tried to find it in standard, but it was implicitly described for me, so part of standard: 1. When a value with integer type is converted to another integer type other than _Bool, if the value can be represented by the new type, it is unchanged. 2. Otherwise, if the new type is unsigned, the value is converted by repeatedly adding or subtracting one more than the maximum value that can be represented in the new type until the value is in the range of the new type. Initial message: It was a problem with 64 atomics on ppc in migration/postcopy-ram.c reported by Philippe Mathieu-Daudé <f4...@amsat.org>. Tested in Debian on qemu-system-ppc and in Ubuntu16.04 on i386. This commit is based on commit ee264eb32c14f076c964fc34ee66f6f95cce2080 "Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.12-20180121' into staging" Alexey Perevalov (1): migration: change blocktime type to uint32_t hmp.c| 4 ++-- migration/postcopy-ram.c | 37 - migration/trace-events | 4 ++-- qapi/migration.json | 4 ++-- 4 files changed, 26 insertions(+), 23 deletions(-) -- 2.7.4
Re: [Qemu-devel] [PATCH v2] migration: change blocktime type to uint32_t
On 01/26/2018 07:13 PM, Philippe Mathieu-Daudé wrote: Hi Alexey, On 01/26/2018 01:05 PM, Alexey Perevalov wrote: Initially int64_t was used, but on PowerPC architecture, clang doesn't have atomic_*_8 function, so it produces link time error. QEMU is working with time as with 64bit value, but by fact 32 bit is enough with CLOCK_REALTIME. In this case blocktime will keep only 1200 hours time interval. Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> Acked-by: Eric Blake <ebl...@redhat.com> --- hmp.c| 4 ++-- migration/postcopy-ram.c | 37 - migration/trace-events | 4 ++-- qapi/migration.json | 4 ++-- 4 files changed, 26 insertions(+), 23 deletions(-) diff --git a/hmp.c b/hmp.c index c6bab53..3c376b3 100644 --- a/hmp.c +++ b/hmp.c @@ -265,7 +265,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) } if (info->has_postcopy_blocktime) { -monitor_printf(mon, "postcopy blocktime: %" PRId64 "\n", +monitor_printf(mon, "postcopy blocktime: %u\n", info->postcopy_blocktime); } @@ -273,7 +273,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) Visitor *v; char *str; v = string_output_visitor_new(false, ); -visit_type_int64List(v, NULL, >postcopy_vcpu_blocktime, NULL); +visit_type_uint32List(v, NULL, >postcopy_vcpu_blocktime, NULL); visit_complete(v, ); monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str); g_free(str); diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 7814da5..bd08c24 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -63,14 +63,14 @@ struct PostcopyDiscardState { typedef struct PostcopyBlocktimeContext { /* time when page fault initiated per vCPU */ -int64_t *page_fault_vcpu_time; +uint32_t *page_fault_vcpu_time; /* page address per vCPU */ uintptr_t *vcpu_addr; -int64_t total_blocktime; +uint32_t total_blocktime; /* blocktime per vCPU */ -int64_t *vcpu_blocktime; +uint32_t *vcpu_blocktime; /* point in time when last page fault was initiated */ -int64_t last_begin; +uint32_t last_begin; /* number of vCPU are suspended */ int smp_cpus_down; @@ -99,22 +99,22 @@ static void migration_exit_cb(Notifier *n, void *data) static struct PostcopyBlocktimeContext *blocktime_context_new(void) { PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1); -ctx->page_fault_vcpu_time = g_new0(int64_t, smp_cpus); +ctx->page_fault_vcpu_time = g_new0(uint32_t, smp_cpus); ctx->vcpu_addr = g_new0(uintptr_t, smp_cpus); -ctx->vcpu_blocktime = g_new0(int64_t, smp_cpus); +ctx->vcpu_blocktime = g_new0(uint32_t, smp_cpus); ctx->exit_notifier.notify = migration_exit_cb; qemu_add_exit_notifier(>exit_notifier); return ctx; } -static int64List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx) +static uint32List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx) { -int64List *list = NULL, *entry = NULL; +uint32List *list = NULL, *entry = NULL; int i; for (i = smp_cpus - 1; i >= 0; i--) { -entry = g_new0(int64List, 1); +entry = g_new0(uint32List, 1); entry->value = ctx->vcpu_blocktime[i]; entry->next = list; list = entry; @@ -145,7 +145,7 @@ void fill_destination_postcopy_migration_info(MigrationInfo *info) info->postcopy_vcpu_blocktime = get_vcpu_blocktime_list(bc); } -static uint64_t get_postcopy_total_blocktime(void) +static uint32_t get_postcopy_total_blocktime(void) { MigrationIncomingState *mis = migration_incoming_get_current(); PostcopyBlocktimeContext *bc = mis->blocktime_ctx; @@ -634,6 +634,7 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid, MigrationIncomingState *mis = migration_incoming_get_current(); PostcopyBlocktimeContext *dc = mis->blocktime_ctx; int64_t now_ms; +uint32_t least_now; if (!dc || ptid == 0) { return; @@ -644,13 +645,14 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid, } now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); +least_now = (uint32_t)now_ms; if (dc->vcpu_addr[cpu] == 0) { atomic_inc(>smp_cpus_down); } -atomic_xchg__nocheck(>last_begin, now_ms); -atomic_xchg__nocheck(>page_fault_vcpu_time[cpu], now_ms); -atomic_xchg__nocheck(>vcpu_addr[cpu], addr); +atomic_xchg(>last_begin, least_now); +atomic_xchg(>page_fault_vcpu_time[cpu], least_now); +atomic_xchg(>vcpu_addr[cpu], addr); /* check it here, not at the begining of the function, * due to, ch
[Qemu-devel] [PATCH v2] migration: change blocktime type to uint32_t
Initially int64_t was used, but on PowerPC architecture, clang doesn't have atomic_*_8 function, so it produces link time error. QEMU is working with time as with 64bit value, but by fact 32 bit is enough with CLOCK_REALTIME. In this case blocktime will keep only 1200 hours time interval. Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> Acked-by: Eric Blake <ebl...@redhat.com> --- hmp.c| 4 ++-- migration/postcopy-ram.c | 37 - migration/trace-events | 4 ++-- qapi/migration.json | 4 ++-- 4 files changed, 26 insertions(+), 23 deletions(-) diff --git a/hmp.c b/hmp.c index c6bab53..3c376b3 100644 --- a/hmp.c +++ b/hmp.c @@ -265,7 +265,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) } if (info->has_postcopy_blocktime) { -monitor_printf(mon, "postcopy blocktime: %" PRId64 "\n", +monitor_printf(mon, "postcopy blocktime: %u\n", info->postcopy_blocktime); } @@ -273,7 +273,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) Visitor *v; char *str; v = string_output_visitor_new(false, ); -visit_type_int64List(v, NULL, >postcopy_vcpu_blocktime, NULL); +visit_type_uint32List(v, NULL, >postcopy_vcpu_blocktime, NULL); visit_complete(v, ); monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str); g_free(str); diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 7814da5..bd08c24 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -63,14 +63,14 @@ struct PostcopyDiscardState { typedef struct PostcopyBlocktimeContext { /* time when page fault initiated per vCPU */ -int64_t *page_fault_vcpu_time; +uint32_t *page_fault_vcpu_time; /* page address per vCPU */ uintptr_t *vcpu_addr; -int64_t total_blocktime; +uint32_t total_blocktime; /* blocktime per vCPU */ -int64_t *vcpu_blocktime; +uint32_t *vcpu_blocktime; /* point in time when last page fault was initiated */ -int64_t last_begin; +uint32_t last_begin; /* number of vCPU are suspended */ int smp_cpus_down; @@ -99,22 +99,22 @@ static void migration_exit_cb(Notifier *n, void *data) static struct PostcopyBlocktimeContext *blocktime_context_new(void) { PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1); -ctx->page_fault_vcpu_time = g_new0(int64_t, smp_cpus); +ctx->page_fault_vcpu_time = g_new0(uint32_t, smp_cpus); ctx->vcpu_addr = g_new0(uintptr_t, smp_cpus); -ctx->vcpu_blocktime = g_new0(int64_t, smp_cpus); +ctx->vcpu_blocktime = g_new0(uint32_t, smp_cpus); ctx->exit_notifier.notify = migration_exit_cb; qemu_add_exit_notifier(>exit_notifier); return ctx; } -static int64List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx) +static uint32List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx) { -int64List *list = NULL, *entry = NULL; +uint32List *list = NULL, *entry = NULL; int i; for (i = smp_cpus - 1; i >= 0; i--) { -entry = g_new0(int64List, 1); +entry = g_new0(uint32List, 1); entry->value = ctx->vcpu_blocktime[i]; entry->next = list; list = entry; @@ -145,7 +145,7 @@ void fill_destination_postcopy_migration_info(MigrationInfo *info) info->postcopy_vcpu_blocktime = get_vcpu_blocktime_list(bc); } -static uint64_t get_postcopy_total_blocktime(void) +static uint32_t get_postcopy_total_blocktime(void) { MigrationIncomingState *mis = migration_incoming_get_current(); PostcopyBlocktimeContext *bc = mis->blocktime_ctx; @@ -634,6 +634,7 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid, MigrationIncomingState *mis = migration_incoming_get_current(); PostcopyBlocktimeContext *dc = mis->blocktime_ctx; int64_t now_ms; +uint32_t least_now; if (!dc || ptid == 0) { return; @@ -644,13 +645,14 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid, } now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); +least_now = (uint32_t)now_ms; if (dc->vcpu_addr[cpu] == 0) { atomic_inc(>smp_cpus_down); } -atomic_xchg__nocheck(>last_begin, now_ms); -atomic_xchg__nocheck(>page_fault_vcpu_time[cpu], now_ms); -atomic_xchg__nocheck(>vcpu_addr[cpu], addr); +atomic_xchg(>last_begin, least_now); +atomic_xchg(>page_fault_vcpu_time[cpu], least_now); +atomic_xchg(>vcpu_addr[cpu], addr); /* check it here, not at the begining of the function, * due to, check could accur early than bitmap_set in @@ -699,20 +701,21 @@ static void mark_postcopy_blocktime_end(uintptr_t addr) int i, affected_cpu = 0; int64_t now_ms; bool vcp
[Qemu-devel] [PATCH v1] migration: change blocktime type to uint32_t
Initially int64_t was used, but on PowerPC architecture, clang doesn't have atomic_*_8 function, so it produces link time error. QEMU is working with time as with 64bit value, but by fact 32 bit is enough with CLOCK_REALTIME. In this case blocktime will keep only 1200 hours time interval. Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- hmp.c| 4 ++-- migration/postcopy-ram.c | 47 ++- migration/trace-events | 4 ++-- qapi/migration.json | 4 ++-- 4 files changed, 36 insertions(+), 23 deletions(-) diff --git a/hmp.c b/hmp.c index c6bab53..3c376b3 100644 --- a/hmp.c +++ b/hmp.c @@ -265,7 +265,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) } if (info->has_postcopy_blocktime) { -monitor_printf(mon, "postcopy blocktime: %" PRId64 "\n", +monitor_printf(mon, "postcopy blocktime: %u\n", info->postcopy_blocktime); } @@ -273,7 +273,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) Visitor *v; char *str; v = string_output_visitor_new(false, ); -visit_type_int64List(v, NULL, >postcopy_vcpu_blocktime, NULL); +visit_type_uint32List(v, NULL, >postcopy_vcpu_blocktime, NULL); visit_complete(v, ); monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str); g_free(str); diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 7814da5..ce91de8 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -63,14 +63,14 @@ struct PostcopyDiscardState { typedef struct PostcopyBlocktimeContext { /* time when page fault initiated per vCPU */ -int64_t *page_fault_vcpu_time; +uint32_t *page_fault_vcpu_time; /* page address per vCPU */ uintptr_t *vcpu_addr; -int64_t total_blocktime; +uint32_t total_blocktime; /* blocktime per vCPU */ -int64_t *vcpu_blocktime; +uint32_t *vcpu_blocktime; /* point in time when last page fault was initiated */ -int64_t last_begin; +uint32_t last_begin; /* number of vCPU are suspended */ int smp_cpus_down; @@ -99,22 +99,22 @@ static void migration_exit_cb(Notifier *n, void *data) static struct PostcopyBlocktimeContext *blocktime_context_new(void) { PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1); -ctx->page_fault_vcpu_time = g_new0(int64_t, smp_cpus); +ctx->page_fault_vcpu_time = g_new0(uint32_t, smp_cpus); ctx->vcpu_addr = g_new0(uintptr_t, smp_cpus); -ctx->vcpu_blocktime = g_new0(int64_t, smp_cpus); +ctx->vcpu_blocktime = g_new0(uint32_t, smp_cpus); ctx->exit_notifier.notify = migration_exit_cb; qemu_add_exit_notifier(>exit_notifier); return ctx; } -static int64List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx) +static uint32List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx) { -int64List *list = NULL, *entry = NULL; +uint32List *list = NULL, *entry = NULL; int i; for (i = smp_cpus - 1; i >= 0; i--) { -entry = g_new0(int64List, 1); +entry = g_new0(uint32List, 1); entry->value = ctx->vcpu_blocktime[i]; entry->next = list; list = entry; @@ -145,7 +145,7 @@ void fill_destination_postcopy_migration_info(MigrationInfo *info) info->postcopy_vcpu_blocktime = get_vcpu_blocktime_list(bc); } -static uint64_t get_postcopy_total_blocktime(void) +static uint32_t get_postcopy_total_blocktime(void) { MigrationIncomingState *mis = migration_incoming_get_current(); PostcopyBlocktimeContext *bc = mis->blocktime_ctx; @@ -619,6 +619,16 @@ static int get_mem_fault_cpu_index(uint32_t pid) return -1; } +static uint32_t get_least_significant_part(int64_t value) +{ +unsigned char *t = (unsigned char *) +#if defined(HOST_WORDS_BIGENDIAN) +return t[4] << 24 | t[5] << 16 | t[6] << 8 | t[7] << 0; +#else +return t[0] << 0 | t[1] << 8 | t[2] << 16 | t[3] << 24; +#endif /* HOST_WORDS_BIGENDIAN */ +} + /* * This function is being called when pagefault occurs. It * tracks down vCPU blocking time. @@ -634,6 +644,7 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid, MigrationIncomingState *mis = migration_incoming_get_current(); PostcopyBlocktimeContext *dc = mis->blocktime_ctx; int64_t now_ms; +uint32_t least_now; if (!dc || ptid == 0) { return; @@ -644,13 +655,14 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid, } now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); +least_now = get_least_significant_part(now_ms); if (dc->vcpu_addr[cpu] == 0) { atomic_inc(>smp_cpus_down); } -atomic_xchg__nocheck(>last_begin, now_ms); -atomic_xc
[Qemu-devel] [PATCH v1] Fix build on ppc platform in migration/postcopy-ram.c
It was a problem with 64 atomics on ppc in migration/postcopy-ram.c reported by Philippe Mathieu-Daudé <f4...@amsat.org>. I didn't check on ppc due to debina installation inside docker is failed, but I have my own debian on qemu-system-ppc, but build is still going. It also was tested on Ubuntu16.04 on i386. This commit is based on commit ee264eb32c14f076c964fc34ee66f6f95cce2080 "Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.12-20180121' into staging" Alexey Perevalov (1): migration: change blocktime type to uint32_t hmp.c| 4 ++-- migration/postcopy-ram.c | 47 ++- migration/trace-events | 4 ++-- qapi/migration.json | 4 ++-- 4 files changed, 36 insertions(+), 23 deletions(-) -- 2.7.4
Re: [Qemu-devel] [PULL 00/27] Migration pull
On 01/22/2018 07:26 PM, Peter Maydell wrote: On 22 January 2018 at 16:25, Alexey Perevalov <a.pereva...@samsung.com> wrote: I want to keep 64bit atomic operations in migration. Sorry, you can't -- some 32 bit CPUs simply do not provide these operations. You need to rework your design to not require this. I would like to ask David, how do you think is it normal to use just one half of now_ms (int64_t), one half of 64 bit value should represent 1200 hours and probably it's enough to keep time difference. thanks -- PMM -- Best regards, Alexey Perevalov
Re: [Qemu-devel] [PULL 00/27] Migration pull
On 01/22/2018 01:03 PM, Peter Maydell wrote: On 20 January 2018 at 23:36, Juan Quintela <quint...@redhat.com> wrote: Peter Maydell <peter.mayd...@linaro.org> wrote: On 19 January 2018 at 16:43, Alexey Perevalov <a.pereva...@samsung.com> wrote: As I remember, I tested build in QEMU's docker build system, but now I checked it on i386 Ubuntu, and yes linker says about unresolved atomic symbols. Next week, I'll have a time to investigate it deeper. This sounds like exactly the problem I pointed out in a previous round of this patchset :-( https://lists.gnu.org/archive/html/qemu-devel/2018-01/msg02103.html Ignoring comments and sending patches anyway makes me grumpy, especially when the result is exactly "fails obscurely on some architectures only"... It compiles for me. F25 i686 gcc. I did change it to use intptr_t instead of uint64_t. So, I don't know what is going on here. Did you change it to not use the 'nocheck' versions of the macros? The code in master uses 'nocheck' which has exactly the effect of masking this bug on i686... So, I can agree that we have to fix anything that don't work, but I can't agree that I didn't care about comments, at least I tried to fix the problems you pointed me to. I said the could should probably not use the nocheck macros. The code in master is still using those macros, wrongly, which is why this problem shows only on ppc32 and not all 32-bit hosts. clang doesn't have atomic_*_8 function set on 32 platform, but gcc has. I also checked on ubuntu server 12.04, looks like no "ATOMIC_RELAXED" in both gcc and clang toolchain, so following code atomic_xchg__nocheck(, b64); is expanding to (({ asm volatile("" ::: "memory"); (void)0; }), __sync_lock_test_and_set(, b64)); and we don't have problem neither in gcc nor in clang. Maybe it's not so effective from performance point of view. I like glib approach. #if defined(__ATOMIC_SEQ_CST) && !defined(__clang__) there __ATOMIC_SEQ_CST instead of __ATOMIC_RELAXED in QEMU, but it doesn't matter. But clang has atomic_*_[1, 2, 4] functions, so it's not reasonable to avoid using clang for all cases. I want to keep 64bit atomic operations in migration. Maybe add into atomic.h additional check for clang and 64bit pointer and in this case use (({ asm volatile("" ::: "memory"); (void)0; }), __sync_lock_test_and_set(, b64)); ? thanks -- PMM -- Best regards, Alexey Perevalov
Re: [Qemu-devel] [PULL 00/27] Migration pull
On 01/19/2018 07:27 PM, Philippe Mathieu-Daudé wrote: On 01/15/2018 01:14 PM, Peter Maydell wrote: On 15 January 2018 at 11:52, Juan Quintela <quint...@redhat.com> wrote: Hi - rebase on top of lastest - fix compilation on 32bit machines - add Peter Xu cleanups Please, apply. The following changes since commit fd06527b80c88c8dde1b35fdc692685b68d2fd93: Merge remote-tracking branch 'remotes/thibault/tags/samuel-thibault' into staging (2018-01-15 10:39:29 +) are available in the Git repository at: git://github.com/juanquintela/qemu.git tags/migration/20180115 for you to fetch changes up to 816306826a45f4d15352e32d157172af3a35899f: migration: remove notify in fd_error (2018-01-15 12:48:13 +0100) migration/next for 20180115 Applied, thanks. We have armel/armhf/powerpc hosts failing since this pull due to commit 3be98be4e9f. Those target are currently tested on Shippable CI, eventually adding an IRC bot we could notice. I know companies using QEMU system in embedded armel/armhf hosts, I don't know about the ppc32 hosts. This is however unlikely the migration features are used there. If 64bit atomic op are required for migration (performance/security) but not on 32bit system, one way to fix it could be to have the migration code being optional... so we can disable it on 32bit hosts. Regards, Phil. Thank you Phil for report, I have a release this week and it's not yet over. As I remember, I tested build in QEMU's docker build system, but now I checked it on i386 Ubuntu, and yes linker says about unresolved atomic symbols. Next week, I'll have a time to investigate it deeper. -- Best regards, Alexey Perevalov
Re: [Qemu-devel] [PULL 09/27] migration: calculate vCPU blocktime on dst side
On 01/16/2018 08:43 PM, Dr. David Alan Gilbert wrote: * Max Reitz (mre...@redhat.com) wrote: On 2018-01-15 12:52, Juan Quintela wrote: From: Alexey Perevalov <a.pereva...@samsung.com> This patch provides blocktime calculation per vCPU, as a summary and as a overlapped value for all vCPUs. This approach was suggested by Peter Xu, as an improvements of previous approch where QEMU kept tree with faulted page address and cpus bitmask in it. Now QEMU is keeping array with faulted page address as value and vCPU as index. It helps to find proper vCPU at UFFD_COPY time. Also it keeps list for blocktime per vCPU (could be traced with page_fault_addr) Blocktime will not calculated if postcopy_blocktime field of MigrationIncomingState wasn't initialized. Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Reviewed-by: Juan Quintela <quint...@redhat.com> Signed-off-by: Juan Quintela <quint...@redhat.com> --- migration/postcopy-ram.c | 143 ++- migration/trace-events | 5 +- 2 files changed, 146 insertions(+), 2 deletions(-) For me, this breaks compilation with clang -m32: LINKx86_64-softmmu/qemu-system-x86_64 ../migration/postcopy-ram.o: In function `mark_postcopy_blocktime_begin': /home/maxx/projects/qemu/migration/postcopy-ram.c:599: undefined reference to `__atomic_exchange_8' /home/maxx/projects/qemu/migration/postcopy-ram.c:600: undefined reference to `__atomic_exchange_8' /home/maxx/projects/qemu/migration/postcopy-ram.c:609: undefined reference to `__atomic_exchange_8' ../migration/postcopy-ram.o: In function `mark_postcopy_blocktime_end': /home/maxx/projects/qemu/migration/postcopy-ram.c:665: undefined reference to `__atomic_fetch_add_8' /home/maxx/projects/qemu/migration/postcopy-ram.c:686: undefined reference to `__atomic_fetch_add_8' Am I doing something wrong? Hmm I also see that with clang on 32bit (gcc is fine); the problem is the postcopy blocktime stuff is doing some 64bit atomics, which you can never be sure 32bit will support. Dave I didn't check clang build, ok, I'll check it. Max -- Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK -- Best regards, Alexey Perevalov, phone: +7 (495) 797 25 00 ext 3969 e-mail: a.pereva...@samsung.com <mailto:a.pereva...@samsumng.com> Samsung R Institute Rus 12 Dvintsev street, building 1 127018, Moscow, Russian Federation
Re: [Qemu-devel] [PULL 00/14] Migration pull request
On 01/05/2018 12:59 PM, Juan Quintela wrote: Eric Blake <ebl...@redhat.com> wrote: On 01/03/2018 03:38 AM, Juan Quintela wrote: Hi This are the changes for migration that are already reviewed. Please, apply. Alexey Perevalov (6): migration: introduce postcopy-blocktime capability migration: add postcopy blocktime ctx into MigrationIncomingState migration: calculate vCPU blocktime on dst side migration: postcopy_blocktime documentation migration: add blocktime calculation into migration-test migration: add postcopy total blocktime into query-migrate I had unanswered questions about these patches in the v12 series, where I'm not sure if the interface is still quite right. To be fair, I had alroady integrated the patches before I saw your questions. We're still early enough that we could adjust the interface after the fact depending on how the questions are answered; I think this is the best approach, so far I can see two questions: - do we want to make it conditional? it requires some locking, but I haven't meassured it to see how slow/fast is it. - the other was documentation. I will like Alexey to answer. Depending of how slow it is, I can agree to make it non-optional. Ok, I'll give a logs with traces, maybe gprof result, today or tomorrow. but we're also early enough that it may be smarter to get the interface right before including it in a pull request. I'll leave it to Peter and Juan to sort out whether this means an updated pull request is needed, or to take this as-is. Thanks, Juan. -- Best regards, Alexey Perevalov
Re: [Qemu-devel] [PATCH v12 6/6] migration: add postcopy total blocktime into query-migrate
On 01/03/2018 12:26 AM, Eric Blake wrote: On 10/30/2017 08:16 AM, Alexey Perevalov wrote: Postcopy total blocktime is available on destination side only. But query-migrate was possible only for source. This patch adds ability to call query-migrate on destination. To be able to see postcopy blocktime, need to request postcopy-blocktime capability. Why not display the stats unconditionally when they are available, instead of having to set a capability knob to request them? That knob necessary to avoid regression if this information is not necessary, we decided during so long discussion in previous version of the patch set - it's not necessary always. But if user requested blocktime and host can't calculate it, e.g. due to appropriate feature isn't supported in host kernel, yes, the value will be 0. The query-migrate command will show following sample result: {"return": "postcopy-vcpu-blocktime": [115, 100], "status": "completed", "postcopy-blocktime": 100 }} postcopy_vcpu_blocktime contains list, where the first item is the first vCPU in QEMU. This patch has a drawback, it combines states of incoming and outgoing migration. Ongoing migration state will overwrite incoming state. Looks like better to separate query-migrate for incoming and outgoing migration or add parameter to indicate type of migration. Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- +++ b/qapi/migration.json @@ -156,6 +156,13 @@ # @status is 'failed'. Clients should not attempt to parse the # error strings. (Since 2.7) # +# @postcopy-blocktime: total time when all vCPU were blocked during postcopy +# live migration (Since 2.11) 2.12 now. Should this mention the capability knob needed to enable this stat (or else get rid of the capability knob and always expose this when possible)? +# +# @postcopy-vcpu-blocktime: list of the postcopy blocktime per vCPU (Since 2.11) Also 2.12. +# + +# # Since: 0.14.0 ## { 'struct': 'MigrationInfo', @@ -167,7 +174,9 @@ '*downtime': 'int', '*setup-time': 'int', '*cpu-throttle-percentage': 'int', - '*error-desc': 'str'} } + '*error-desc': 'str', + '*postcopy-blocktime' : 'int64', + '*postcopy-vcpu-blocktime': ['int64']} } ## # @query-migrate: -- Best regards, Alexey Perevalov
Re: [Qemu-devel] [PATCH v12 1/6] migration: introduce postcopy-blocktime capability
On 01/03/2018 12:20 AM, Eric Blake wrote: On 10/30/2017 08:16 AM, Alexey Perevalov wrote: Right now it could be used on destination side to enable vCPU blocktime calculation for postcopy live migration. vCPU blocktime - it's time since vCPU thread was put into interruptible sleep, till memory page was copied and thread awake. Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- migration/migration.c | 9 + migration/migration.h | 1 + qapi/migration.json | 6 +- 3 files changed, 15 insertions(+), 1 deletion(-) Is there any reason this has to be a new capability rather than unconditionally enabled? What are the trade-offs for enabling vs. disabling the capability that warrant it being a knob? Can we do a better job of documenting in which cases the user would want to change the knob from its default value, if we even need it to be a knob? Hello Eric, sorry for late response, it's holiday in whole Russia. The reason we decided to introduce new capability it's performance penalty, memory & cpu usage, in current version it's not so high as in initial, it affects hot path of post-copy live migration. Regarding documentation part, I'll answer in "[PATCH v12 6/6] migration: add postcopy total blocktime into query-migrate" thread. +++ b/qapi/migration.json @@ -352,12 +352,16 @@ # # @x-multifd: Use more than one fd for migration (since 2.11) # +# @postcopy-blocktime: Calculate downtime for postcopy live migration +# (since 2.11) +# # Since: 1.2 ## { 'enum': 'MigrationCapability', 'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks', 'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram', - 'block', 'return-path', 'pause-before-switchover', 'x-multifd' ] } + 'block', 'return-path', 'pause-before-switchover', 'x-multifd', + 'postcopy-blocktime' ] } ## # @MigrationCapabilityStatus: -- Best regards, Alexey Perevalov
[Qemu-devel] [PATCH v12 6/6] migration: add postcopy total blocktime into query-migrate
Postcopy total blocktime is available on destination side only. But query-migrate was possible only for source. This patch adds ability to call query-migrate on destination. To be able to see postcopy blocktime, need to request postcopy-blocktime capability. The query-migrate command will show following sample result: {"return": "postcopy-vcpu-blocktime": [115, 100], "status": "completed", "postcopy-blocktime": 100 }} postcopy_vcpu_blocktime contains list, where the first item is the first vCPU in QEMU. This patch has a drawback, it combines states of incoming and outgoing migration. Ongoing migration state will overwrite incoming state. Looks like better to separate query-migrate for incoming and outgoing migration or add parameter to indicate type of migration. Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- hmp.c| 15 + migration/migration.c| 42 migration/migration.h| 4 migration/postcopy-ram.c | 56 migration/trace-events | 1 + qapi/migration.json | 11 +- 6 files changed, 124 insertions(+), 5 deletions(-) diff --git a/hmp.c b/hmp.c index 41fcce6..4f42eb8 100644 --- a/hmp.c +++ b/hmp.c @@ -264,6 +264,21 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) info->cpu_throttle_percentage); } +if (info->has_postcopy_blocktime) { +monitor_printf(mon, "postcopy blocktime: %" PRId64 "\n", + info->postcopy_blocktime); +} + +if (info->has_postcopy_vcpu_blocktime) { +Visitor *v; +char *str; +v = string_output_visitor_new(false, ); +visit_type_int64List(v, NULL, >postcopy_vcpu_blocktime, NULL); +visit_complete(v, ); +monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str); +g_free(str); +visit_free(v); +} qapi_free_MigrationInfo(info); qapi_free_MigrationCapabilityStatusList(caps); } diff --git a/migration/migration.c b/migration/migration.c index c5244ae..cd09ba4 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -589,14 +589,15 @@ static void populate_disk_info(MigrationInfo *info) } } -MigrationInfo *qmp_query_migrate(Error **errp) +static void fill_source_migration_info(MigrationInfo *info) { -MigrationInfo *info = g_malloc0(sizeof(*info)); MigrationState *s = migrate_get_current(); switch (s->state) { case MIGRATION_STATUS_NONE: /* no migration has happened ever */ +/* do not overwrite destination migration status */ +return; break; case MIGRATION_STATUS_SETUP: info->has_status = true; @@ -647,8 +648,6 @@ MigrationInfo *qmp_query_migrate(Error **errp) break; } info->status = s->state; - -return info; } /** @@ -712,6 +711,41 @@ static bool migrate_caps_check(bool *cap_list, return true; } +static void fill_destination_migration_info(MigrationInfo *info) +{ +MigrationIncomingState *mis = migration_incoming_get_current(); + +switch (mis->state) { +case MIGRATION_STATUS_NONE: +return; +break; +case MIGRATION_STATUS_SETUP: +case MIGRATION_STATUS_CANCELLING: +case MIGRATION_STATUS_CANCELLED: +case MIGRATION_STATUS_ACTIVE: +case MIGRATION_STATUS_POSTCOPY_ACTIVE: +case MIGRATION_STATUS_FAILED: +case MIGRATION_STATUS_COLO: +info->has_status = true; +break; +case MIGRATION_STATUS_COMPLETED: +info->has_status = true; +fill_destination_postcopy_migration_info(info); +break; +} +info->status = mis->state; +} + +MigrationInfo *qmp_query_migrate(Error **errp) +{ +MigrationInfo *info = g_malloc0(sizeof(*info)); + +fill_destination_migration_info(info); +fill_source_migration_info(info); + +return info; +} + void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, Error **errp) { diff --git a/migration/migration.h b/migration/migration.h index fb8d2ef..99f294f 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -71,6 +71,10 @@ struct MigrationIncomingState { MigrationIncomingState *migration_incoming_get_current(void); void migration_incoming_state_destroy(void); +/* + * Functions to work with blocktime context + */ +void fill_destination_postcopy_migration_info(MigrationInfo *info); #define TYPE_MIGRATION "migration" diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 6bf24e9..2823133 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -108,6 +108,55 @@ static struct PostcopyBlocktimeContext *blocktime_context_new(void) re
[Qemu-devel] [PATCH v12 2/6] migration: add postcopy blocktime ctx into MigrationIncomingState
This patch adds request to kernel space for UFFD_FEATURE_THREAD_ID, in case when this feature is provided by kernel. PostcopyBlocktimeContext is incapsulated inside postcopy-ram.c, due to it's postcopy only feature. Also it defines PostcopyBlocktimeContext's instance live time. Information from PostcopyBlocktimeContext instance will be provided much after postcopy migration end, instance of PostcopyBlocktimeContext will live till QEMU exit, but part of it (vcpu_addr, page_fault_vcpu_time) used only during calculation, will be released when postcopy ended or failed. To enable postcopy blocktime calculation on destination, need to request proper capabiltiy (Patch for documentation will be at the tail of the patch set). As an example following command enable that capability, assume QEMU was started with -chardev socket,id=charmonitor,path=/var/lib/migrate-vm-monitor.sock option to control it [root@host]#printf "{\"execute\" : \"qmp_capabilities\"}\r\n \ {\"execute\": \"migrate-set-capabilities\" , \"arguments\": { \"capabilities\": [ { \"capability\": \"postcopy-blocktime\", \"state\": true } ] } }" | nc -U /var/lib/migrate-vm-monitor.sock Or just with HMP (qemu) migrate_set_capability postcopy-blocktime on Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- migration/migration.h| 8 +++ migration/postcopy-ram.c | 59 2 files changed, 67 insertions(+) diff --git a/migration/migration.h b/migration/migration.h index 5f5e527..fb8d2ef 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -22,6 +22,8 @@ #include "hw/qdev.h" #include "io/channel.h" +struct PostcopyBlocktimeContext; + /* State for the incoming migration */ struct MigrationIncomingState { QEMUFile *from_src_file; @@ -59,6 +61,12 @@ struct MigrationIncomingState { /* The coroutine we should enter (back) after failover */ Coroutine *migration_incoming_co; QemuSemaphore colo_incoming_sem; + +/* + * PostcopyBlocktimeContext to keep information for postcopy + * live migration, to calculate vCPU block time + * */ +struct PostcopyBlocktimeContext *blocktime_ctx; }; MigrationIncomingState *migration_incoming_get_current(void); diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index bec6c2c..c18ec5a 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -61,6 +61,52 @@ struct PostcopyDiscardState { #include #include +typedef struct PostcopyBlocktimeContext { +/* time when page fault initiated per vCPU */ +int64_t *page_fault_vcpu_time; +/* page address per vCPU */ +uint64_t *vcpu_addr; +int64_t total_blocktime; +/* blocktime per vCPU */ +int64_t *vcpu_blocktime; +/* point in time when last page fault was initiated */ +int64_t last_begin; +/* number of vCPU are suspended */ +int smp_cpus_down; + +/* + * Handler for exit event, necessary for + * releasing whole blocktime_ctx + */ +Notifier exit_notifier; +} PostcopyBlocktimeContext; + +static void destroy_blocktime_context(struct PostcopyBlocktimeContext *ctx) +{ +g_free(ctx->page_fault_vcpu_time); +g_free(ctx->vcpu_addr); +g_free(ctx->vcpu_blocktime); +g_free(ctx); +} + +static void migration_exit_cb(Notifier *n, void *data) +{ +PostcopyBlocktimeContext *ctx = container_of(n, PostcopyBlocktimeContext, + exit_notifier); +destroy_blocktime_context(ctx); +} + +static struct PostcopyBlocktimeContext *blocktime_context_new(void) +{ +PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1); +ctx->page_fault_vcpu_time = g_new0(int64_t, smp_cpus); +ctx->vcpu_addr = g_new0(uint64_t, smp_cpus); +ctx->vcpu_blocktime = g_new0(int64_t, smp_cpus); + +ctx->exit_notifier.notify = migration_exit_cb; +qemu_add_exit_notifier(>exit_notifier); +return ctx; +} /** * receive_ufd_features: check userfault fd features, to request only supported @@ -153,6 +199,19 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) } } +#ifdef UFFD_FEATURE_THREAD_ID +if (migrate_postcopy_blocktime() && mis && +UFFD_FEATURE_THREAD_ID & supported_features) { +/* kernel supports that feature */ +/* don't create blocktime_context if it exists */ +if (!mis->blocktime_ctx) { +mis->blocktime_ctx = blocktime_context_new(); +} + +asked_features |= UFFD_FEATURE_THREAD_ID; +} +#endif + /* * request features, even if asked_features is 0, due to * kernel expects UFFD_API before UFFDIO_REGISTER, per -- 2.7.4
[Qemu-devel] [PATCH v12 1/6] migration: introduce postcopy-blocktime capability
Right now it could be used on destination side to enable vCPU blocktime calculation for postcopy live migration. vCPU blocktime - it's time since vCPU thread was put into interruptible sleep, till memory page was copied and thread awake. Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- migration/migration.c | 9 + migration/migration.h | 1 + qapi/migration.json | 6 +- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/migration/migration.c b/migration/migration.c index 62761d5..c5244ae 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1492,6 +1492,15 @@ bool migrate_zero_blocks(void) return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; } +bool migrate_postcopy_blocktime(void) +{ +MigrationState *s; + +s = migrate_get_current(); + +return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; +} + bool migrate_use_compression(void) { MigrationState *s; diff --git a/migration/migration.h b/migration/migration.h index 8ccdd7a..5f5e527 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -202,6 +202,7 @@ int migrate_compress_level(void); int migrate_compress_threads(void); int migrate_decompress_threads(void); bool migrate_use_events(void); +bool migrate_postcopy_blocktime(void); /* Sending on the return path - generic and then for each message type */ void migrate_send_rp_shut(MigrationIncomingState *mis, diff --git a/qapi/migration.json b/qapi/migration.json index 6ae866e..c20caf4 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -352,12 +352,16 @@ # # @x-multifd: Use more than one fd for migration (since 2.11) # +# @postcopy-blocktime: Calculate downtime for postcopy live migration +# (since 2.11) +# # Since: 1.2 ## { 'enum': 'MigrationCapability', 'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks', 'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram', - 'block', 'return-path', 'pause-before-switchover', 'x-multifd' ] } + 'block', 'return-path', 'pause-before-switchover', 'x-multifd', + 'postcopy-blocktime' ] } ## # @MigrationCapabilityStatus: -- 2.7.4
[Qemu-devel] [PATCH v12 3/6] migration: calculate vCPU blocktime on dst side
This patch provides blocktime calculation per vCPU, as a summary and as a overlapped value for all vCPUs. This approach was suggested by Peter Xu, as an improvements of previous approch where QEMU kept tree with faulted page address and cpus bitmask in it. Now QEMU is keeping array with faulted page address as value and vCPU as index. It helps to find proper vCPU at UFFD_COPY time. Also it keeps list for blocktime per vCPU (could be traced with page_fault_addr) Blocktime will not calculated if postcopy_blocktime field of MigrationIncomingState wasn't initialized. Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- migration/postcopy-ram.c | 143 ++- migration/trace-events | 5 +- 2 files changed, 146 insertions(+), 2 deletions(-) diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index c18ec5a..6bf24e9 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -553,6 +553,142 @@ static int ram_block_enable_notify(const char *block_name, void *host_addr, return 0; } +static int get_mem_fault_cpu_index(uint32_t pid) +{ +CPUState *cpu_iter; + +CPU_FOREACH(cpu_iter) { +if (cpu_iter->thread_id == pid) { +trace_get_mem_fault_cpu_index(cpu_iter->cpu_index, pid); +return cpu_iter->cpu_index; +} +} +trace_get_mem_fault_cpu_index(-1, pid); +return -1; +} + +/* + * This function is being called when pagefault occurs. It + * tracks down vCPU blocking time. + * + * @addr: faulted host virtual address + * @ptid: faulted process thread id + * @rb: ramblock appropriate to addr + */ +static void mark_postcopy_blocktime_begin(uint64_t addr, uint32_t ptid, + RAMBlock *rb) +{ +int cpu, already_received; +MigrationIncomingState *mis = migration_incoming_get_current(); +PostcopyBlocktimeContext *dc = mis->blocktime_ctx; +int64_t now_ms; + +if (!dc || ptid == 0) { +return; +} +cpu = get_mem_fault_cpu_index(ptid); +if (cpu < 0) { +return; +} + +now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); +if (dc->vcpu_addr[cpu] == 0) { +atomic_inc(>smp_cpus_down); +} + +atomic_xchg__nocheck(>last_begin, now_ms); +atomic_xchg__nocheck(>page_fault_vcpu_time[cpu], now_ms); +atomic_xchg__nocheck(>vcpu_addr[cpu], addr); + +/* check it here, not at the begining of the function, + * due to, check could accur early than bitmap_set in + * qemu_ufd_copy_ioctl */ +already_received = ramblock_recv_bitmap_test(rb, (void *)addr); +if (already_received) { +atomic_xchg__nocheck(>vcpu_addr[cpu], 0); +atomic_xchg__nocheck(>page_fault_vcpu_time[cpu], 0); +atomic_dec(>smp_cpus_down); +} +trace_mark_postcopy_blocktime_begin(addr, dc, dc->page_fault_vcpu_time[cpu], +cpu, already_received); +} + +/* + * This function just provide calculated blocktime per cpu and trace it. + * Total blocktime is calculated in mark_postcopy_blocktime_end. + * + * + * Assume we have 3 CPU + * + * S1E1 S1 E1 + * -***xxx***> CPU1 + * + * S2E2 + * xxx---> CPU2 + * + * S3E3 + * xxx---> CPU3 + * + * We have sequence S1,S2,E1,S3,S1,E2,E3,E1 + * S2,E1 - doesn't match condition due to sequence S1,S2,E1 doesn't include CPU3 + * S3,S1,E2 - sequence includes all CPUs, in this case overlap will be S1,E2 - + *it's a part of total blocktime. + * S1 - here is last_begin + * Legend of the picture is following: + * * - means blocktime per vCPU + * x - means overlapped blocktime (total blocktime) + * + * @addr: host virtual address + */ +static void mark_postcopy_blocktime_end(uint64_t addr) +{ +MigrationIncomingState *mis = migration_incoming_get_current(); +PostcopyBlocktimeContext *dc = mis->blocktime_ctx; +int i, affected_cpu = 0; +int64_t now_ms; +bool vcpu_total_blocktime = false; +int64_t read_vcpu_time; + +if (!dc) { +return; +} + +now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + +/* lookup cpu, to clear it, + * that algorithm looks straighforward, but it's not + * optimal, more optimal algorithm is keeping tree or hash + * where key is address value is a list of */ +for (i = 0; i < smp_cpus; i++) { +uint64_t vcpu_blocktime = 0; + +read_vcpu_time = atomic_fetch_add(>page_fault_vcpu_time[i], 0); +if (atomic_fetch_add(>vcpu_addr[i], 0) != addr || +read_vcpu_time == 0) { +continue; +} +atomic_xchg__nocheck
[Qemu-devel] [PATCH v12 0/6] calculate blocktime for postcopy live migration
This is 12th version. The rationale for that idea is following: vCPU could suspend during postcopy live migration until faulted page is not copied into kernel. Downtime on source side it's a value - time interval since source turn vCPU off, till destination start runnig vCPU. But that value was proper value for precopy migration it really shows amount of time when vCPU is down. But not for postcopy migration, because several vCPU threads could susppend after vCPU was started. That is important to estimate packet drop for SDN software. (V11 -> V12) - don't read read vcpu_times twice in mark_postcopy_blocktime_end (comment from David) - migration-test doesn't touch got_stop due to multiple tests, and some code changes due to latest migration-test refactoring. (V10 -> V11) - rebase - update documentation (comment from David) - postcopy_notifier was removed from PostcopyBlocktimeContext (comment from David) - fix "since 2.10" for postcopy-vcpu-blocktime (comment from Eric) - fix order in mark_postcopy_blocktime_begin/end (comment from David), but I think it still have a slim race condition - remove error_report from fill_destination_postcopy_migration_info (comment from David) (V9 -> V10) - rebase - patch "update kernel header for UFFD_FEATURE_*" has changed, and was generated by scripts/update-linux-headers.sh as David suggested. (V8 -> V9) - rebase - traces (V7 -> V8) - just one comma in "migration: fix hardcoded function name in error report" It was really missed, but fixed in futher patch. (V6 -> V7) - copied bitmap was placed into RAMBlock as another migration related bitmaps. - Ordering of mark_postcopy_blocktime_end call and ordering of checking copied bitmap were changed. - linewrap style defects - new patch "postcopy_place_page factoring out" - postcopy_ram_supported_by_host accepts MigrationIncomingState in qmp_migrate_set_capabilities - minor fixes of documentation. and huge description of get_postcopy_total_blocktime was moved. Davids comment. (V5 -> V6) - blocktime was added into hmp command. Comment from David. - bitmap for copied pages was added as well as check in *_begin/_end functions. Patch uses just introduced RAMBLOCK_FOREACH. Comment from David. - description of receive_ufd_features/request_ufd_features. Comment from David. - commit message headers/@since references were modified. Comment from Eric. - also typos in documentation. Comment from Eric. - style and description of field in MigrationInfo. Comment from Eric. - ufd_check_and_apply (former ufd_version_check) is calling twice, so my previous patch contained double allocation of blocktime context and as a result memory leak. In this patch series it was fixed. (V4 -> V5) - fill_destination_postcopy_migration_info empty stub was missed for none linux build (V3 -> V4) - get rid of Downtime as a name for vCPU waiting time during postcopy migration - PostcopyBlocktimeContext renamed (it was just BlocktimeContext) - atomic operations are used for dealing with fields of PostcopyBlocktimeContext affected in both threads. - hardcoded function names in error_report were replaced to %s and __line__ - this patch set includes postcopy-downtime capability, but it used on destination, coupled with not possibility to return calculated downtime back to source to show it in query-migrate, it looks like a big trade off - UFFD_API have to be sent notwithstanding need or not to ask kernel for a feature, due to kernel expects it in any case (see patch comment) - postcopy_downtime included into query-migrate output - also this patch set includes trivial fix migration: fix hardcoded function name in error report maybe that is a candidate for qemu-trivial mailing list, but I already sent "migration: Fixed code style" and it was unclaimed. (V2 -> V3) - Downtime calculation approach was changed, thanks to Peter Xu - Due to previous point no more need to keep GTree as well as bitmap of cpus. So glib changes aren't included in this patch set, it could be resent in another patch set, if it will be a good reason for it. - No procfs traces in this patchset, if somebody wants it, you could get it from patchwork site to track down page fault initiators. - UFFD_FEATURE_THREAD_ID is requesting only when kernel supports it - It doesn't send back the downtime, just trace it Patch set is based on commit 3be480ebb8fdcc99f0a4fcbbf36ec5642a16a10b and Juan Quintela's series "tests: Add migration compress threads tests" Alexey Perevalov (6): migration: introduce postcopy-blocktime capability migration: add postcopy blocktime ctx into MigrationIncomingState migration: calculate vCPU blocktime on dst side migration: postcopy_blocktime documentation migration: add blocktime calculation int
[Qemu-devel] [PATCH v12 4/6] migration: postcopy_blocktime documentation
Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- docs/devel/migration.txt | 13 + 1 file changed, 13 insertions(+) diff --git a/docs/devel/migration.txt b/docs/devel/migration.txt index 4030703..cebfe7a 100644 --- a/docs/devel/migration.txt +++ b/docs/devel/migration.txt @@ -402,6 +402,19 @@ will now cause the transition from precopy to postcopy. It can be issued immediately after migration is started or any time later on. Issuing it after the end of a migration is harmless. +Blocktime is a postcopy live migration metric, intended to show +how long the vCPU was in state of interruptable sleep due to pagefault. +That metric is calculated both for all vCPUs as overlapped value, and +separately for each vCPU. These values are calculated on destination side. +To enable postcopy blocktime calculation, enter following command on destination +monitor: + +migrate_set_capability postcopy-blocktime on + +Postcopy blocktime can be retrieved by query-migrate qmp command. +postcopy-blocktime value of qmp command will show overlapped blocking time for +all vCPU, postcopy-vcpu-blocktime will show list of blocking time per vCPU. + Note: During the postcopy phase, the bandwidth limits set using migrate_set_speed is ignored (to avoid delaying requested pages that the destination is waiting for). -- 2.7.4
[Qemu-devel] [PATCH v12 5/6] migration: add blocktime calculation into migration-test
This patch just requests blocktime calculation, and check it in case when UFFD_FEATURE_THREAD_ID feature is set on the host. Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- tests/migration-test.c | 16 1 file changed, 16 insertions(+) diff --git a/tests/migration-test.c b/tests/migration-test.c index db30b38..3b4d88a 100644 --- a/tests/migration-test.c +++ b/tests/migration-test.c @@ -25,6 +25,7 @@ const unsigned start_address = 1024 * 1024; const unsigned end_address = 100 * 1024 * 1024; bool got_stop; +static bool uffd_feature_thread_id; #if defined(__linux__) #include @@ -54,6 +55,7 @@ static bool ufd_version_check(void) g_test_message("Skipping test: UFFDIO_API failed"); return false; } +uffd_feature_thread_id = api_struct.features & UFFD_FEATURE_THREAD_ID; ioctl_mask = (__u64)1 << _UFFDIO_REGISTER | (__u64)1 << _UFFDIO_UNREGISTER; @@ -266,6 +268,16 @@ static uint64_t get_migration_pass(QTestState *who) return result; } +static void read_blocktime(QTestState *who) +{ +QDict *rsp, *rsp_return; + +rsp = wait_command(who, "{ 'execute': 'query-migrate' }"); +rsp_return = qdict_get_qdict(rsp, "return"); +g_assert(qdict_haskey(rsp_return, "postcopy-blocktime")); +QDECREF(rsp); +} + static void wait_for_migration_complete(QTestState *who) { QDict *rsp, *rsp_return; @@ -540,6 +552,7 @@ static void test_postcopy(void) migrate_set_capability(from, "postcopy-ram", "true"); migrate_set_capability(to, "postcopy-ram", "true"); +migrate_set_capability(to, "postcopy-blocktime", "true"); /* We want to pick a speed slow enough that the test completes * quickly, but that it doesn't complete precopy even on a slow @@ -568,6 +581,9 @@ static void test_postcopy(void) wait_for_serial("dest_serial"); wait_for_migration_complete(from); +if (uffd_feature_thread_id) { +read_blocktime(to); +} g_free(uri); test_migrate_end(from, to); -- 2.7.4
Re: [Qemu-devel] [PATCH v11 3/6] migration: calculate vCPU blocktime on dst side
On 10/18/2017 09:59 PM, Dr. David Alan Gilbert wrote: * Alexey Perevalov (a.pereva...@samsung.com) wrote: This patch provides blocktime calculation per vCPU, as a summary and as a overlapped value for all vCPUs. This approach was suggested by Peter Xu, as an improvements of previous approch where QEMU kept tree with faulted page address and cpus bitmask in it. Now QEMU is keeping array with faulted page address as value and vCPU as index. It helps to find proper vCPU at UFFD_COPY time. Also it keeps list for blocktime per vCPU (could be traced with page_fault_addr) Blocktime will not calculated if postcopy_blocktime field of MigrationIncomingState wasn't initialized. Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- migration/postcopy-ram.c | 142 ++- migration/trace-events | 5 +- 2 files changed, 145 insertions(+), 2 deletions(-) diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index c18ec5a..2e10870 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -553,6 +553,141 @@ static int ram_block_enable_notify(const char *block_name, void *host_addr, return 0; } +static int get_mem_fault_cpu_index(uint32_t pid) +{ +CPUState *cpu_iter; + +CPU_FOREACH(cpu_iter) { +if (cpu_iter->thread_id == pid) { +trace_get_mem_fault_cpu_index(cpu_iter->cpu_index, pid); +return cpu_iter->cpu_index; +} +} +trace_get_mem_fault_cpu_index(-1, pid); +return -1; +} + +/* + * This function is being called when pagefault occurs. It + * tracks down vCPU blocking time. + * + * @addr: faulted host virtual address + * @ptid: faulted process thread id + * @rb: ramblock appropriate to addr + */ +static void mark_postcopy_blocktime_begin(uint64_t addr, uint32_t ptid, + RAMBlock *rb) +{ +int cpu, already_received; +MigrationIncomingState *mis = migration_incoming_get_current(); +PostcopyBlocktimeContext *dc = mis->blocktime_ctx; +int64_t now_ms; + +if (!dc || ptid == 0) { +return; +} +cpu = get_mem_fault_cpu_index(ptid); +if (cpu < 0) { +return; +} + +now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); +if (dc->vcpu_addr[cpu] == 0) { +atomic_inc(>smp_cpus_down); +} + +atomic_xchg__nocheck(>last_begin, now_ms); +atomic_xchg__nocheck(>page_fault_vcpu_time[cpu], now_ms); +atomic_xchg__nocheck(>vcpu_addr[cpu], addr); + +/* check it here, not at the begining of the function, + * due to, check could accur early than bitmap_set in + * qemu_ufd_copy_ioctl */ +already_received = ramblock_recv_bitmap_test(rb, (void *)addr); +if (already_received) { +atomic_xchg__nocheck(>vcpu_addr[cpu], 0); +atomic_xchg__nocheck(>page_fault_vcpu_time[cpu], 0); +atomic_sub(>smp_cpus_down, 1); Minor; but you could use atomic_dec to go with the atomic_inc +} +trace_mark_postcopy_blocktime_begin(addr, dc, dc->page_fault_vcpu_time[cpu], +cpu, already_received); +} + +/* + * This function just provide calculated blocktime per cpu and trace it. + * Total blocktime is calculated in mark_postcopy_blocktime_end. + * + * + * Assume we have 3 CPU + * + * S1E1 S1 E1 + * -***xxx***> CPU1 + * + * S2E2 + * xxx---> CPU2 + * + * S3E3 + * xxx---> CPU3 + * + * We have sequence S1,S2,E1,S3,S1,E2,E3,E1 + * S2,E1 - doesn't match condition due to sequence S1,S2,E1 doesn't include CPU3 + * S3,S1,E2 - sequence includes all CPUs, in this case overlap will be S1,E2 - + *it's a part of total blocktime. + * S1 - here is last_begin + * Legend of the picture is following: + * * - means blocktime per vCPU + * x - means overlapped blocktime (total blocktime) + * + * @addr: host virtual address + */ +static void mark_postcopy_blocktime_end(uint64_t addr) +{ +MigrationIncomingState *mis = migration_incoming_get_current(); +PostcopyBlocktimeContext *dc = mis->blocktime_ctx; +int i, affected_cpu = 0; +int64_t now_ms; +bool vcpu_total_blocktime = false; + +if (!dc) { +return; +} + +now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + +/* lookup cpu, to clear it, + * that algorithm looks straighforward, but it's not + * optimal, more optimal algorithm is keeping tree or hash + * where key is address value is a list of */ +for (i = 0; i < smp_cpus; i++) { +uint64_t vcpu_blocktime = 0; + +if (atomic_fetch_add(>vcpu_addr[i], 0) != addr
Re: [Qemu-devel] [RFC v2 00/32] postcopy+vhost-user/shared ram
Hello Maxime On 09/01/2017 04:42 PM, Maxime Coquelin wrote: Hello Alexey, On 09/01/2017 03:34 PM, Alexey Perevalov wrote: Hello David, You wrote in previous version: We've had a postcopy migrate work now, with a few hacks we're still cleaning up, both on vhost-user-bridge and dpdk; so I'll get this updated and reposted. I want to know more about DPDK work, do you know, is somebody assigned to that task? I did the DPDK (rough) prototype, you may find it here: https://gitlab.com/mcoquelin/dpdk-next-virtio/commits/postcopy_proto_v1 I found it is for previous version of the patchset. Do you have any updates? Cheers, Maxime -- Best regards, Alexey Perevalov
[Qemu-devel] [PATCH v11 6/6] migration: add postcopy total blocktime into query-migrate
Postcopy total blocktime is available on destination side only. But query-migrate was possible only for source. This patch adds ability to call query-migrate on destination. To be able to see postcopy blocktime, need to request postcopy-blocktime capability. The query-migrate command will show following sample result: {"return": "postcopy-vcpu-blocktime": [115, 100], "status": "completed", "postcopy-blocktime": 100 }} postcopy_vcpu_blocktime contains list, where the first item is the first vCPU in QEMU. This patch has a drawback, it combines states of incoming and outgoing migration. Ongoing migration state will overwrite incoming state. Looks like better to separate query-migrate for incoming and outgoing migration or add parameter to indicate type of migration. Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- hmp.c| 15 + migration/migration.c| 42 migration/migration.h| 4 migration/postcopy-ram.c | 56 migration/trace-events | 1 + qapi/migration.json | 11 +- 6 files changed, 124 insertions(+), 5 deletions(-) diff --git a/hmp.c b/hmp.c index ace729d..1939c02 100644 --- a/hmp.c +++ b/hmp.c @@ -264,6 +264,21 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) info->cpu_throttle_percentage); } +if (info->has_postcopy_blocktime) { +monitor_printf(mon, "postcopy blocktime: %" PRId64 "\n", + info->postcopy_blocktime); +} + +if (info->has_postcopy_vcpu_blocktime) { +Visitor *v; +char *str; +v = string_output_visitor_new(false, ); +visit_type_int64List(v, NULL, >postcopy_vcpu_blocktime, NULL); +visit_complete(v, ); +monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str); +g_free(str); +visit_free(v); +} qapi_free_MigrationInfo(info); qapi_free_MigrationCapabilityStatusList(caps); } diff --git a/migration/migration.c b/migration/migration.c index 713f070..91fe885 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -584,14 +584,15 @@ static void populate_disk_info(MigrationInfo *info) } } -MigrationInfo *qmp_query_migrate(Error **errp) +static void fill_source_migration_info(MigrationInfo *info) { -MigrationInfo *info = g_malloc0(sizeof(*info)); MigrationState *s = migrate_get_current(); switch (s->state) { case MIGRATION_STATUS_NONE: /* no migration has happened ever */ +/* do not overwrite destination migration status */ +return; break; case MIGRATION_STATUS_SETUP: info->has_status = true; @@ -640,8 +641,6 @@ MigrationInfo *qmp_query_migrate(Error **errp) break; } info->status = s->state; - -return info; } /** @@ -705,6 +704,41 @@ static bool migrate_caps_check(bool *cap_list, return true; } +static void fill_destination_migration_info(MigrationInfo *info) +{ +MigrationIncomingState *mis = migration_incoming_get_current(); + +switch (mis->state) { +case MIGRATION_STATUS_NONE: +return; +break; +case MIGRATION_STATUS_SETUP: +case MIGRATION_STATUS_CANCELLING: +case MIGRATION_STATUS_CANCELLED: +case MIGRATION_STATUS_ACTIVE: +case MIGRATION_STATUS_POSTCOPY_ACTIVE: +case MIGRATION_STATUS_FAILED: +case MIGRATION_STATUS_COLO: +info->has_status = true; +break; +case MIGRATION_STATUS_COMPLETED: +info->has_status = true; +fill_destination_postcopy_migration_info(info); +break; +} +info->status = mis->state; +} + +MigrationInfo *qmp_query_migrate(Error **errp) +{ +MigrationInfo *info = g_malloc0(sizeof(*info)); + +fill_destination_migration_info(info); +fill_source_migration_info(info); + +return info; +} + void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, Error **errp) { diff --git a/migration/migration.h b/migration/migration.h index 2bae992..cb68768 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -71,6 +71,10 @@ struct MigrationIncomingState { MigrationIncomingState *migration_incoming_get_current(void); void migration_incoming_state_destroy(void); +/* + * Functions to work with blocktime context + */ +void fill_destination_postcopy_migration_info(MigrationInfo *info); #define TYPE_MIGRATION "migration" diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 2e10870..a203bae 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -108,6 +108,55 @@ static struct PostcopyBlocktimeContext *blocktime_context_new(void) re
[Qemu-devel] [PATCH v11 5/6] migration: add blocktime calculation into postcopy-test
This patch just requests blocktime calculation, and check it in case when UFFD_FEATURE_THREAD_ID feature is set on the host. Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- tests/postcopy-test.c | 63 +++ 1 file changed, 48 insertions(+), 15 deletions(-) diff --git a/tests/postcopy-test.c b/tests/postcopy-test.c index 8142f2a..4231cce 100644 --- a/tests/postcopy-test.c +++ b/tests/postcopy-test.c @@ -24,7 +24,8 @@ const unsigned start_address = 1024 * 1024; const unsigned end_address = 100 * 1024 * 1024; -bool got_stop; +static bool got_stop; +static bool uffd_feature_thread_id; #if defined(__linux__) #include @@ -54,6 +55,7 @@ static bool ufd_version_check(void) g_test_message("Skipping test: UFFDIO_API failed"); return false; } +uffd_feature_thread_id = api_struct.features & UFFD_FEATURE_THREAD_ID; ioctl_mask = (__u64)1 << _UFFDIO_REGISTER | (__u64)1 << _UFFDIO_UNREGISTER; @@ -265,22 +267,48 @@ static uint64_t get_migration_pass(void) return result; } -static void wait_for_migration_complete(void) +static bool get_src_status(void) { QDict *rsp, *rsp_return; +const char *status; +bool result; + +rsp = return_or_event(qmp("{ 'execute': 'query-migrate' }")); +rsp_return = qdict_get_qdict(rsp, "return"); +status = qdict_get_str(rsp_return, "status"); +g_assert_cmpstr(status, !=, "failed"); +result = strcmp(status, "completed") == 0; +QDECREF(rsp); +return result; +} + +static void read_blocktime(void) +{ +QDict *rsp, *rsp_return; + +rsp = return_or_event(qmp("{ 'execute': 'query-migrate' }")); +rsp_return = qdict_get_qdict(rsp, "return"); +g_assert(qdict_haskey(rsp_return, "postcopy-blocktime")); +QDECREF(rsp); +} + +static void wait_for_migration_complete(QTestState *from, QTestState *to) +{ bool completed; do { -const char *status; - -rsp = return_or_event(qmp("{ 'execute': 'query-migrate' }")); -rsp_return = qdict_get_qdict(rsp, "return"); -status = qdict_get_str(rsp_return, "status"); -completed = strcmp(status, "completed") == 0; -g_assert_cmpstr(status, !=, "failed"); -QDECREF(rsp); + +/* test src state */ +global_qtest = from; +completed = get_src_status(); + usleep(1000 * 100); } while (!completed); + +if (uffd_feature_thread_id) { +global_qtest = to; +read_blocktime(); +} } static void wait_for_migration_pass(void) @@ -364,8 +392,6 @@ static void test_migrate(void) char *bootpath = g_strdup_printf("%s/bootsect", tmpfs); const char *arch = qtest_get_arch(); -got_stop = false; - if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) { init_bootfile_x86(bootpath); cmd_src = g_strdup_printf("-machine accel=kvm:tcg -m 150M" @@ -425,6 +451,15 @@ static void test_migrate(void) g_assert(qdict_haskey(rsp, "return")); QDECREF(rsp); +global_qtest = to; +rsp = qmp("{ 'execute': 'migrate-set-capabilities'," + "'arguments': { " + "'capabilities': [ {" + "'capability': 'postcopy-blocktime'," + "'state': true } ] } }"); +g_assert(qdict_haskey(rsp, "return")); +QDECREF(rsp); + /* We want to pick a speed slow enough that the test completes * quickly, but that it doesn't complete precopy even on a slow * machine, so also set the downtime. @@ -441,7 +476,6 @@ static void test_migrate(void) g_assert(qdict_haskey(rsp, "return")); QDECREF(rsp); - /* Wait for the first serial output from the source */ wait_for_serial("src_serial"); @@ -467,8 +501,7 @@ static void test_migrate(void) qmp_eventwait("RESUME"); wait_for_serial("dest_serial"); -global_qtest = from; -wait_for_migration_complete(); +wait_for_migration_complete(from, to); qtest_quit(from); -- 2.7.4
[Qemu-devel] [PATCH v11 3/6] migration: calculate vCPU blocktime on dst side
This patch provides blocktime calculation per vCPU, as a summary and as a overlapped value for all vCPUs. This approach was suggested by Peter Xu, as an improvements of previous approch where QEMU kept tree with faulted page address and cpus bitmask in it. Now QEMU is keeping array with faulted page address as value and vCPU as index. It helps to find proper vCPU at UFFD_COPY time. Also it keeps list for blocktime per vCPU (could be traced with page_fault_addr) Blocktime will not calculated if postcopy_blocktime field of MigrationIncomingState wasn't initialized. Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- migration/postcopy-ram.c | 142 ++- migration/trace-events | 5 +- 2 files changed, 145 insertions(+), 2 deletions(-) diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index c18ec5a..2e10870 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -553,6 +553,141 @@ static int ram_block_enable_notify(const char *block_name, void *host_addr, return 0; } +static int get_mem_fault_cpu_index(uint32_t pid) +{ +CPUState *cpu_iter; + +CPU_FOREACH(cpu_iter) { +if (cpu_iter->thread_id == pid) { +trace_get_mem_fault_cpu_index(cpu_iter->cpu_index, pid); +return cpu_iter->cpu_index; +} +} +trace_get_mem_fault_cpu_index(-1, pid); +return -1; +} + +/* + * This function is being called when pagefault occurs. It + * tracks down vCPU blocking time. + * + * @addr: faulted host virtual address + * @ptid: faulted process thread id + * @rb: ramblock appropriate to addr + */ +static void mark_postcopy_blocktime_begin(uint64_t addr, uint32_t ptid, + RAMBlock *rb) +{ +int cpu, already_received; +MigrationIncomingState *mis = migration_incoming_get_current(); +PostcopyBlocktimeContext *dc = mis->blocktime_ctx; +int64_t now_ms; + +if (!dc || ptid == 0) { +return; +} +cpu = get_mem_fault_cpu_index(ptid); +if (cpu < 0) { +return; +} + +now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); +if (dc->vcpu_addr[cpu] == 0) { +atomic_inc(>smp_cpus_down); +} + +atomic_xchg__nocheck(>last_begin, now_ms); +atomic_xchg__nocheck(>page_fault_vcpu_time[cpu], now_ms); +atomic_xchg__nocheck(>vcpu_addr[cpu], addr); + +/* check it here, not at the begining of the function, + * due to, check could accur early than bitmap_set in + * qemu_ufd_copy_ioctl */ +already_received = ramblock_recv_bitmap_test(rb, (void *)addr); +if (already_received) { +atomic_xchg__nocheck(>vcpu_addr[cpu], 0); +atomic_xchg__nocheck(>page_fault_vcpu_time[cpu], 0); +atomic_sub(>smp_cpus_down, 1); +} +trace_mark_postcopy_blocktime_begin(addr, dc, dc->page_fault_vcpu_time[cpu], +cpu, already_received); +} + +/* + * This function just provide calculated blocktime per cpu and trace it. + * Total blocktime is calculated in mark_postcopy_blocktime_end. + * + * + * Assume we have 3 CPU + * + * S1E1 S1 E1 + * -***xxx***> CPU1 + * + * S2E2 + * xxx---> CPU2 + * + * S3E3 + * xxx---> CPU3 + * + * We have sequence S1,S2,E1,S3,S1,E2,E3,E1 + * S2,E1 - doesn't match condition due to sequence S1,S2,E1 doesn't include CPU3 + * S3,S1,E2 - sequence includes all CPUs, in this case overlap will be S1,E2 - + *it's a part of total blocktime. + * S1 - here is last_begin + * Legend of the picture is following: + * * - means blocktime per vCPU + * x - means overlapped blocktime (total blocktime) + * + * @addr: host virtual address + */ +static void mark_postcopy_blocktime_end(uint64_t addr) +{ +MigrationIncomingState *mis = migration_incoming_get_current(); +PostcopyBlocktimeContext *dc = mis->blocktime_ctx; +int i, affected_cpu = 0; +int64_t now_ms; +bool vcpu_total_blocktime = false; + +if (!dc) { +return; +} + +now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + +/* lookup cpu, to clear it, + * that algorithm looks straighforward, but it's not + * optimal, more optimal algorithm is keeping tree or hash + * where key is address value is a list of */ +for (i = 0; i < smp_cpus; i++) { +uint64_t vcpu_blocktime = 0; + +if (atomic_fetch_add(>vcpu_addr[i], 0) != addr || +atomic_fetch_add(>page_fault_vcpu_time[i], 0) == 0) { +continue; +} +atomic_xchg__nocheck(>vcpu_addr[i], 0); +vcpu_blocktime = now_ms - +
[Qemu-devel] [PATCH v11 2/6] migration: add postcopy blocktime ctx into MigrationIncomingState
This patch adds request to kernel space for UFFD_FEATURE_THREAD_ID, in case when this feature is provided by kernel. PostcopyBlocktimeContext is incapsulated inside postcopy-ram.c, due to it's postcopy only feature. Also it defines PostcopyBlocktimeContext's instance live time. Information from PostcopyBlocktimeContext instance will be provided much after postcopy migration end, instance of PostcopyBlocktimeContext will live till QEMU exit, but part of it (vcpu_addr, page_fault_vcpu_time) used only during calculation, will be released when postcopy ended or failed. To enable postcopy blocktime calculation on destination, need to request proper capabiltiy (Patch for documentation will be at the tail of the patch set). As an example following command enable that capability, assume QEMU was started with -chardev socket,id=charmonitor,path=/var/lib/migrate-vm-monitor.sock option to control it [root@host]#printf "{\"execute\" : \"qmp_capabilities\"}\r\n \ {\"execute\": \"migrate-set-capabilities\" , \"arguments\": { \"capabilities\": [ { \"capability\": \"postcopy-blocktime\", \"state\": true } ] } }" | nc -U /var/lib/migrate-vm-monitor.sock Or just with HMP (qemu) migrate_set_capability postcopy-blocktime on Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- migration/migration.h| 8 +++ migration/postcopy-ram.c | 59 2 files changed, 67 insertions(+) diff --git a/migration/migration.h b/migration/migration.h index c12ceba..2bae992 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -22,6 +22,8 @@ #include "hw/qdev.h" #include "io/channel.h" +struct PostcopyBlocktimeContext; + /* State for the incoming migration */ struct MigrationIncomingState { QEMUFile *from_src_file; @@ -59,6 +61,12 @@ struct MigrationIncomingState { /* The coroutine we should enter (back) after failover */ Coroutine *migration_incoming_co; QemuSemaphore colo_incoming_sem; + +/* + * PostcopyBlocktimeContext to keep information for postcopy + * live migration, to calculate vCPU block time + * */ +struct PostcopyBlocktimeContext *blocktime_ctx; }; MigrationIncomingState *migration_incoming_get_current(void); diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index bec6c2c..c18ec5a 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -61,6 +61,52 @@ struct PostcopyDiscardState { #include #include +typedef struct PostcopyBlocktimeContext { +/* time when page fault initiated per vCPU */ +int64_t *page_fault_vcpu_time; +/* page address per vCPU */ +uint64_t *vcpu_addr; +int64_t total_blocktime; +/* blocktime per vCPU */ +int64_t *vcpu_blocktime; +/* point in time when last page fault was initiated */ +int64_t last_begin; +/* number of vCPU are suspended */ +int smp_cpus_down; + +/* + * Handler for exit event, necessary for + * releasing whole blocktime_ctx + */ +Notifier exit_notifier; +} PostcopyBlocktimeContext; + +static void destroy_blocktime_context(struct PostcopyBlocktimeContext *ctx) +{ +g_free(ctx->page_fault_vcpu_time); +g_free(ctx->vcpu_addr); +g_free(ctx->vcpu_blocktime); +g_free(ctx); +} + +static void migration_exit_cb(Notifier *n, void *data) +{ +PostcopyBlocktimeContext *ctx = container_of(n, PostcopyBlocktimeContext, + exit_notifier); +destroy_blocktime_context(ctx); +} + +static struct PostcopyBlocktimeContext *blocktime_context_new(void) +{ +PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1); +ctx->page_fault_vcpu_time = g_new0(int64_t, smp_cpus); +ctx->vcpu_addr = g_new0(uint64_t, smp_cpus); +ctx->vcpu_blocktime = g_new0(int64_t, smp_cpus); + +ctx->exit_notifier.notify = migration_exit_cb; +qemu_add_exit_notifier(>exit_notifier); +return ctx; +} /** * receive_ufd_features: check userfault fd features, to request only supported @@ -153,6 +199,19 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) } } +#ifdef UFFD_FEATURE_THREAD_ID +if (migrate_postcopy_blocktime() && mis && +UFFD_FEATURE_THREAD_ID & supported_features) { +/* kernel supports that feature */ +/* don't create blocktime_context if it exists */ +if (!mis->blocktime_ctx) { +mis->blocktime_ctx = blocktime_context_new(); +} + +asked_features |= UFFD_FEATURE_THREAD_ID; +} +#endif + /* * request features, even if asked_features is 0, due to * kernel expects UFFD_API before UFFDIO_REGISTER, per -- 2.7.4
[Qemu-devel] [PATCH v11 0/6] calculate blocktime for postcopy live migration
This is 11th version. The rationale for that idea is following: vCPU could suspend during postcopy live migration until faulted page is not copied into kernel. Downtime on source side it's a value - time interval since source turn vCPU off, till destination start runnig vCPU. But that value was proper value for precopy migration it really shows amount of time when vCPU is down. But not for postcopy migration, because several vCPU threads could susppend after vCPU was started. That is important to estimate packet drop for SDN software. (V11 -> V10) - rebase - update documentation (comment from David) - postcopy_notifier was removed from PostcopyBlocktimeContext (comment from David) - fix "since 2.10" for postcopy-vcpu-blocktime (comment from Eric) - fix order in mark_postcopy_blocktime_begin/end (comment from David), but I think it still have a slim race condition - remove error_report from fill_destination_postcopy_migration_info (comment from David) (V9 -> V10) - rebase - patch "update kernel header for UFFD_FEATURE_*" has changed, and was generated by scripts/update-linux-headers.sh as David suggested. (V8 -> V9) - rebase - traces (V7 -> V8) - just one comma in "migration: fix hardcoded function name in error report" It was really missed, but fixed in futher patch. (V6 -> V7) - copied bitmap was placed into RAMBlock as another migration related bitmaps. - Ordering of mark_postcopy_blocktime_end call and ordering of checking copied bitmap were changed. - linewrap style defects - new patch "postcopy_place_page factoring out" - postcopy_ram_supported_by_host accepts MigrationIncomingState in qmp_migrate_set_capabilities - minor fixes of documentation. and huge description of get_postcopy_total_blocktime was moved. Davids comment. (V5 -> V6) - blocktime was added into hmp command. Comment from David. - bitmap for copied pages was added as well as check in *_begin/_end functions. Patch uses just introduced RAMBLOCK_FOREACH. Comment from David. - description of receive_ufd_features/request_ufd_features. Comment from David. - commit message headers/@since references were modified. Comment from Eric. - also typos in documentation. Comment from Eric. - style and description of field in MigrationInfo. Comment from Eric. - ufd_check_and_apply (former ufd_version_check) is calling twice, so my previous patch contained double allocation of blocktime context and as a result memory leak. In this patch series it was fixed. (V4 -> V5) - fill_destination_postcopy_migration_info empty stub was missed for none linux build (V3 -> V4) - get rid of Downtime as a name for vCPU waiting time during postcopy migration - PostcopyBlocktimeContext renamed (it was just BlocktimeContext) - atomic operations are used for dealing with fields of PostcopyBlocktimeContext affected in both threads. - hardcoded function names in error_report were replaced to %s and __line__ - this patch set includes postcopy-downtime capability, but it used on destination, coupled with not possibility to return calculated downtime back to source to show it in query-migrate, it looks like a big trade off - UFFD_API have to be sent notwithstanding need or not to ask kernel for a feature, due to kernel expects it in any case (see patch comment) - postcopy_downtime included into query-migrate output - also this patch set includes trivial fix migration: fix hardcoded function name in error report maybe that is a candidate for qemu-trivial mailing list, but I already sent "migration: Fixed code style" and it was unclaimed. (V2 -> V3) - Downtime calculation approach was changed, thanks to Peter Xu - Due to previous point no more need to keep GTree as well as bitmap of cpus. So glib changes aren't included in this patch set, it could be resent in another patch set, if it will be a good reason for it. - No procfs traces in this patchset, if somebody wants it, you could get it from patchwork site to track down page fault initiators. - UFFD_FEATURE_THREAD_ID is requesting only when kernel supports it - It doesn't send back the downtime, just trace it This patch set is based on commit [PATCH v10 0/3] Add bitmap for received pages in postcopy migration Both patch sets were rebased on commit d147f7e815f97cb477e223586bcb80c316ae10ea Alexey Perevalov (6): migration: introduce postcopy-blocktime capability migration: add postcopy blocktime ctx into MigrationIncomingState migration: calculate vCPU blocktime on dst side migration: postcopy_blocktime documentation migration: add blocktime calculation into postcopy-test migration: add postcopy total blocktime into query-migrate docs/devel/migration.txt | 13 +++ hmp.c| 15 +++ migration/migration.c| 51 +
[Qemu-devel] [PATCH v11 4/6] migration: postcopy_blocktime documentation
Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- docs/devel/migration.txt | 13 + 1 file changed, 13 insertions(+) diff --git a/docs/devel/migration.txt b/docs/devel/migration.txt index 4030703..cebfe7a 100644 --- a/docs/devel/migration.txt +++ b/docs/devel/migration.txt @@ -402,6 +402,19 @@ will now cause the transition from precopy to postcopy. It can be issued immediately after migration is started or any time later on. Issuing it after the end of a migration is harmless. +Blocktime is a postcopy live migration metric, intended to show +how long the vCPU was in state of interruptable sleep due to pagefault. +That metric is calculated both for all vCPUs as overlapped value, and +separately for each vCPU. These values are calculated on destination side. +To enable postcopy blocktime calculation, enter following command on destination +monitor: + +migrate_set_capability postcopy-blocktime on + +Postcopy blocktime can be retrieved by query-migrate qmp command. +postcopy-blocktime value of qmp command will show overlapped blocking time for +all vCPU, postcopy-vcpu-blocktime will show list of blocking time per vCPU. + Note: During the postcopy phase, the bandwidth limits set using migrate_set_speed is ignored (to avoid delaying requested pages that the destination is waiting for). -- 2.7.4
[Qemu-devel] [PATCH v11 1/6] migration: introduce postcopy-blocktime capability
Right now it could be used on destination side to enable vCPU blocktime calculation for postcopy live migration. vCPU blocktime - it's time since vCPU thread was put into interruptible sleep, till memory page was copied and thread awake. Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- migration/migration.c | 9 + migration/migration.h | 1 + qapi/migration.json | 5 - 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/migration/migration.c b/migration/migration.c index 98429dc..713f070 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1467,6 +1467,15 @@ bool migrate_zero_blocks(void) return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; } +bool migrate_postcopy_blocktime(void) +{ +MigrationState *s; + +s = migrate_get_current(); + +return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; +} + bool migrate_use_compression(void) { MigrationState *s; diff --git a/migration/migration.h b/migration/migration.h index b83ccea..c12ceba 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -193,6 +193,7 @@ int migrate_compress_level(void); int migrate_compress_threads(void); int migrate_decompress_threads(void); bool migrate_use_events(void); +bool migrate_postcopy_blocktime(void); /* Sending on the return path - generic and then for each message type */ void migrate_send_rp_shut(MigrationIncomingState *mis, diff --git a/qapi/migration.json b/qapi/migration.json index f8b365e..0f2af26 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -343,12 +343,15 @@ # # @x-multifd: Use more than one fd for migration (since 2.11) # +# @postcopy-blocktime: Calculate downtime for postcopy live migration +# (since 2.11) +# # Since: 1.2 ## { 'enum': 'MigrationCapability', 'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks', 'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram', - 'block', 'return-path', 'x-multifd' ] } + 'block', 'return-path', 'x-multifd', 'postcopy-blocktime' ] } ## # @MigrationCapabilityStatus: -- 2.7.4
[Qemu-devel] [PATCH v10 3/3] migration: add bitmap for received page
This patch adds ability to track down already received pages, it's necessary for calculation vCPU block time in postcopy migration feature, and for recovery after postcopy migration failure. Also it's necessary to solve shared memory issue in postcopy livemigration. Information about received pages will be transferred to the software virtual bridge (e.g. OVS-VSWITCHD), to avoid fallocate (unmap) for already received pages. fallocate syscall is required for remmaped shared memory, due to remmaping itself blocks ioctl(UFFDIO_COPY, ioctl in this case will end with EEXIT error (struct page is exists after remmap). Bitmap is placed into RAMBlock as another postcopy/precopy related bitmaps. Reviewed-by: Peter Xu <pet...@redhat.com> Signed-off-by: Peter Xu <pet...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- include/exec/ram_addr.h | 10 ++ migration/postcopy-ram.c | 17 - migration/ram.c | 40 migration/ram.h | 5 + 4 files changed, 67 insertions(+), 5 deletions(-) diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index d017639..6cbc02a 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -47,6 +47,8 @@ struct RAMBlock { * of the postcopy phase */ unsigned long *unsentmap; +/* bitmap of already received pages in postcopy */ +unsigned long *receivedmap; }; static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset) @@ -60,6 +62,14 @@ static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset) return (char *)block->host + offset; } +static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr, +RAMBlock *rb) +{ +uint64_t host_addr_offset = +(uint64_t)(uintptr_t)(host_addr - (void *)rb->host); +return host_addr_offset >> TARGET_PAGE_BITS; +} + long qemu_getrampagesize(void); unsigned long last_ram_page(void); RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 8bf6432..bec6c2c 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -642,22 +642,28 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) } static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr, -void *from_addr, uint64_t pagesize) + void *from_addr, uint64_t pagesize, RAMBlock *rb) { +int ret; if (from_addr) { struct uffdio_copy copy_struct; copy_struct.dst = (uint64_t)(uintptr_t)host_addr; copy_struct.src = (uint64_t)(uintptr_t)from_addr; copy_struct.len = pagesize; copy_struct.mode = 0; -return ioctl(userfault_fd, UFFDIO_COPY, _struct); +ret = ioctl(userfault_fd, UFFDIO_COPY, _struct); } else { struct uffdio_zeropage zero_struct; zero_struct.range.start = (uint64_t)(uintptr_t)host_addr; zero_struct.range.len = pagesize; zero_struct.mode = 0; -return ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct); +ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct); +} +if (!ret) { +ramblock_recv_bitmap_set_range(rb, host_addr, + pagesize / qemu_target_page_size()); } +return ret; } /* @@ -674,7 +680,7 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, * which would be slightly cheaper, but we'd have to be careful * of the order of updating our page state. */ -if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize)) { +if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize, rb)) { int e = errno; error_report("%s: %s copy host: %p from: %p (size: %zd)", __func__, strerror(e), host, from, pagesize); @@ -696,7 +702,8 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, trace_postcopy_place_page_zero(host); if (qemu_ram_pagesize(rb) == getpagesize()) { -if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize())) { +if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize(), +rb)) { int e = errno; error_report("%s: %s zero host: %p", __func__, strerror(e), host); diff --git a/migration/ram.c b/migration/ram.c index 304ac59..c30db15 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -44,6 +44,7 @@ #include "qemu/error-report.h" #include "trace.h" #include "exec/ram_addr.h" +#include "exec/target_page.h" #include "qemu/rcu_queue.h" #include "migration/colo.h" #include "migration/block.h" @@ -148,6 +149,35 @@ out:
[Qemu-devel] [PATCH v10 2/3] migration: introduce qemu_ufd_copy_ioctl helper
Just for placing auxilary operations inside helper, auxilary operations like: track received pages, notify about copying operation in futher patches. Reviewed-by: Juan Quintela <quint...@redhat.com> Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Reviewed-by: Peter Xu <pet...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- migration/postcopy-ram.c | 34 +- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index d3073b9..8bf6432 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -641,6 +641,25 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) return 0; } +static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr, +void *from_addr, uint64_t pagesize) +{ +if (from_addr) { +struct uffdio_copy copy_struct; +copy_struct.dst = (uint64_t)(uintptr_t)host_addr; +copy_struct.src = (uint64_t)(uintptr_t)from_addr; +copy_struct.len = pagesize; +copy_struct.mode = 0; +return ioctl(userfault_fd, UFFDIO_COPY, _struct); +} else { +struct uffdio_zeropage zero_struct; +zero_struct.range.start = (uint64_t)(uintptr_t)host_addr; +zero_struct.range.len = pagesize; +zero_struct.mode = 0; +return ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct); +} +} + /* * Place a host page (from) at (host) atomically * returns 0 on success @@ -648,20 +667,14 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, RAMBlock *rb) { -struct uffdio_copy copy_struct; size_t pagesize = qemu_ram_pagesize(rb); -copy_struct.dst = (uint64_t)(uintptr_t)host; -copy_struct.src = (uint64_t)(uintptr_t)from; -copy_struct.len = pagesize; -copy_struct.mode = 0; - /* copy also acks to the kernel waking the stalled thread up * TODO: We can inhibit that ack and only do it if it was requested * which would be slightly cheaper, but we'd have to be careful * of the order of updating our page state. */ -if (ioctl(mis->userfault_fd, UFFDIO_COPY, _struct)) { +if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize)) { int e = errno; error_report("%s: %s copy host: %p from: %p (size: %zd)", __func__, strerror(e), host, from, pagesize); @@ -683,12 +696,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, trace_postcopy_place_page_zero(host); if (qemu_ram_pagesize(rb) == getpagesize()) { -struct uffdio_zeropage zero_struct; -zero_struct.range.start = (uint64_t)(uintptr_t)host; -zero_struct.range.len = getpagesize(); -zero_struct.mode = 0; - -if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, _struct)) { +if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize())) { int e = errno; error_report("%s: %s zero host: %p", __func__, strerror(e), host); -- 2.7.4
[Qemu-devel] [PATCH v10 0/3] Add bitmap for received pages in postcopy migration
This is 10th version of [PATCH v1 0/2] Add bitmap for copied pages in postcopy migration cover message from there This is a separate patch set, it derived from https://www.mail-archive.com/qemu-devel@nongnu.org/msg456004.html There are several possible use cases: 1. solve issue with postcopy live migration and shared memory. OVS-VSWITCH requires information about copied pages, to fallocate newly allocated pages. 2. calculation vCPU blocktime for more details see https://www.mail-archive.com/qemu-devel@nongnu.org/msg456004.html 3. Recovery after fail in the middle of postcopy migration V10 -> V9 - ramblock_recv_bitmap_clear was removed from patchset, due to I didn't find any usage of it in existing patchsets based on this patch. V8 -> V9 - patch: "migration: fix incorrect postcopy recved_bitmap" from "[RFC 00/29] Migration: postcopy failure recovery" patch set was squashed into the latest patch of this patchset, getpagesize was replaced to qemu_target_page_size, as David suggested. - for the sake of API uniformity semantics of all functions were changed, now RAMBlock *rb is the first argument, as well as in bitmap API. - Also define TARGET_PAGE_BITS was replaced to qemu_target_page_bits in all other places of this patchset, for uniformity and maintenance. V7 -> V8 - removed unnecessary renaming and moving of block variable to ram_load's function scope - ramblock_recv_map_init became static function V6 -> V7 - rebased on [PATCH v7 0/5] Create setup/cleanup methods for migration incoming side - live time of the received map was changed (ram_load_setup/ram_load_cleanup) V5 -> V6 - call ramblock_recv_map_init from migration_fd_process_incoming (Peter suggested)But finalization is still in ram_load_cleanup as Juan suggested. V4 -> V5 - remove ramblock_recv_bitmap_clear_range in favor to bitmap_clear (comment from David) - single invocation place for ramblock_recv_bitmap_set (comment from Peter) - minor changes like removing comment from qemu_ufd_copy_ioctl and local variable from ramblock_recv_map_init (comment from Peter) V3 -> V4 - clear_bit instead of ramblock_recv_bitmap_clear in ramblock_recv_bitmap_clear_range, it reduced number of operation (comment from Juan) - for postcopy ramblock_recv_bitmap_set is calling after page was copied, only in case of success (comment from David) - indentation fixes (comment from Juan) V2 -> V3 - ramblock_recv_map_init call is placed into migration_incoming_get_current, looks like it's general place for both precopy and postcopy case. - received bitmap memory releasing is placed into ram_load_cleanup, unfortunatelly, it calls only in case of precopy. - precopy case and discard ram block case - function renaming, and another minor cleanups V1 -> V2 - change in terminology s/copied/received/g - granularity became TARGET_PAGE_SIZE, but not actual page size of the ramblock - movecopiedmap & get_copiedmap_size were removed, until patch set where it will be necessary - releasing memory of receivedmap was added into ram_load_cleanup - new patch "migration: introduce qemu_ufd_copy_ioctl helper" Patchset is based on: commit d147f7e815f97cb477e223586bcb80c316ae10ea Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging Alexey Perevalov (3): migration: postcopy_place_page factoring out migration: introduce qemu_ufd_copy_ioctl helper migration: add bitmap for received page include/exec/ram_addr.h | 10 + migration/postcopy-ram.c | 54 +++- migration/postcopy-ram.h | 4 ++-- migration/ram.c | 44 +-- migration/ram.h | 5 + 5 files changed, 94 insertions(+), 23 deletions(-) -- 2.7.4
[Qemu-devel] [PATCH v10 1/3] migration: postcopy_place_page factoring out
Need to mark copied pages as closer as possible to the place where it tracks down. That will be necessary in futher patch. Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Reviewed-by: Peter Xu <pet...@redhat.com> Reviewed-by: Juan Quintela <quint...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- migration/postcopy-ram.c | 13 +++-- migration/postcopy-ram.h | 4 ++-- migration/ram.c | 4 ++-- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 0de68e8..d3073b9 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -646,9 +646,10 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) * returns 0 on success */ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, -size_t pagesize) +RAMBlock *rb) { struct uffdio_copy copy_struct; +size_t pagesize = qemu_ram_pagesize(rb); copy_struct.dst = (uint64_t)(uintptr_t)host; copy_struct.src = (uint64_t)(uintptr_t)from; @@ -677,11 +678,11 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, * returns 0 on success */ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, - size_t pagesize) + RAMBlock *rb) { trace_postcopy_place_page_zero(host); -if (pagesize == getpagesize()) { +if (qemu_ram_pagesize(rb) == getpagesize()) { struct uffdio_zeropage zero_struct; zero_struct.range.start = (uint64_t)(uintptr_t)host; zero_struct.range.len = getpagesize(); @@ -711,7 +712,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size); } return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page, - pagesize); + rb); } return 0; @@ -774,14 +775,14 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) } int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, -size_t pagesize) +RAMBlock *rb) { assert(0); return -1; } int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, -size_t pagesize) +RAMBlock *rb) { assert(0); return -1; diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h index 587a8b8..77ea0fd 100644 --- a/migration/postcopy-ram.h +++ b/migration/postcopy-ram.h @@ -72,14 +72,14 @@ void postcopy_discard_send_finish(MigrationState *ms, * returns 0 on success */ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, -size_t pagesize); +RAMBlock *rb); /* * Place a zero page at (host) atomically * returns 0 on success */ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, - size_t pagesize); + RAMBlock *rb); /* The current postcopy state is read/set by postcopy_state_get/set * which update it atomically. diff --git a/migration/ram.c b/migration/ram.c index b83f897..304ac59 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2680,10 +2680,10 @@ static int ram_load_postcopy(QEMUFile *f) if (all_zero) { ret = postcopy_place_page_zero(mis, place_dest, - block->page_size); + block); } else { ret = postcopy_place_page(mis, place_dest, - place_source, block->page_size); + place_source, block); } } if (!ret) { -- 2.7.4
Re: [Qemu-devel] [PATCH v10 07/10] migration: calculate vCPU blocktime on dst side
On 09/21/2017 02:57 PM, Dr. David Alan Gilbert wrote: * Alexey Perevalov (a.pereva...@samsung.com) wrote: This patch provides blocktime calculation per vCPU, as a summary and as a overlapped value for all vCPUs. This approach was suggested by Peter Xu, as an improvements of previous approch where QEMU kept tree with faulted page address and cpus bitmask in it. Now QEMU is keeping array with faulted page address as value and vCPU as index. It helps to find proper vCPU at UFFD_COPY time. Also it keeps list for blocktime per vCPU (could be traced with page_fault_addr) Blocktime will not calculated if postcopy_blocktime field of MigrationIncomingState wasn't initialized. Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- migration/postcopy-ram.c | 138 ++- migration/trace-events | 5 +- 2 files changed, 140 insertions(+), 3 deletions(-) diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index cc78981..9a5133f 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -110,7 +110,6 @@ static struct PostcopyBlocktimeContext *blocktime_context_new(void) ctx->exit_notifier.notify = migration_exit_cb; qemu_add_exit_notifier(>exit_notifier); -add_migration_state_change_notifier(>postcopy_notifier); return ctx; } @@ -559,6 +558,136 @@ static int ram_block_enable_notify(const char *block_name, void *host_addr, return 0; } +static int get_mem_fault_cpu_index(uint32_t pid) +{ +CPUState *cpu_iter; + +CPU_FOREACH(cpu_iter) { +if (cpu_iter->thread_id == pid) { +trace_get_mem_fault_cpu_index(cpu_iter->cpu_index, pid); +return cpu_iter->cpu_index; +} +} +trace_get_mem_fault_cpu_index(-1, pid); +return -1; +} + +/* + * This function is being called when pagefault occurs. It + * tracks down vCPU blocking time. + * + * @addr: faulted host virtual address + * @ptid: faulted process thread id + * @rb: ramblock appropriate to addr + */ +static void mark_postcopy_blocktime_begin(uint64_t addr, uint32_t ptid, + RAMBlock *rb) +{ +int cpu, already_received; +MigrationIncomingState *mis = migration_incoming_get_current(); +PostcopyBlocktimeContext *dc = mis->blocktime_ctx; +int64_t now_ms; + +if (!dc || ptid == 0) { +return; +} +cpu = get_mem_fault_cpu_index(ptid); +if (cpu < 0) { +return; +} + +now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); +if (dc->vcpu_addr[cpu] == 0) { +atomic_inc(>smp_cpus_down); +} + +atomic_xchg__nocheck(>vcpu_addr[cpu], addr); +atomic_xchg__nocheck(>last_begin, now_ms); +atomic_xchg__nocheck(>page_fault_vcpu_time[cpu], now_ms); + +already_received = ramblock_recv_bitmap_test(rb, (void *)addr); +if (already_received) { +atomic_xchg__nocheck(>vcpu_addr[cpu], 0); +atomic_xchg__nocheck(>page_fault_vcpu_time[cpu], 0); +atomic_sub(>smp_cpus_down, 1); +} +trace_mark_postcopy_blocktime_begin(addr, dc, dc->page_fault_vcpu_time[cpu], +cpu, already_received); +} + +/* + * This function just provide calculated blocktime per cpu and trace it. + * Total blocktime is calculated in mark_postcopy_blocktime_end. + * + * + * Assume we have 3 CPU + * + * S1E1 S1 E1 + * -***xxx***> CPU1 + * + * S2E2 + * xxx---> CPU2 + * + * S3E3 + * xxx---> CPU3 + * + * We have sequence S1,S2,E1,S3,S1,E2,E3,E1 + * S2,E1 - doesn't match condition due to sequence S1,S2,E1 doesn't include CPU3 + * S3,S1,E2 - sequence includes all CPUs, in this case overlap will be S1,E2 - + *it's a part of total blocktime. + * S1 - here is last_begin + * Legend of the picture is following: + * * - means blocktime per vCPU + * x - means overlapped blocktime (total blocktime) + * + * @addr: host virtual address + */ +static void mark_postcopy_blocktime_end(uint64_t addr) +{ +MigrationIncomingState *mis = migration_incoming_get_current(); +PostcopyBlocktimeContext *dc = mis->blocktime_ctx; +int i, affected_cpu = 0; +int64_t now_ms; +bool vcpu_total_blocktime = false; + +if (!dc) { +return; +} + +now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + +/* lookup cpu, to clear it, + * that algorithm looks straighforward, but it's not + * optimal, more optimal algorithm is keeping tree or hash + * where key is address value is a list of */ +for (i = 0; i < smp_cpus; i++) { +uint64_t vcpu_blocktime = 0; +
[Qemu-devel] [PATCH] linux-headers: sync against v4.14-rc1
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- include/standard-headers/asm-x86/hyperv.h| 19 ++--- include/standard-headers/linux/pci_regs.h| 42 include/standard-headers/linux/virtio_ring.h | 4 +-- linux-headers/asm-s390/kvm.h | 6 linux-headers/linux/kvm.h| 3 +- linux-headers/linux/userfaultfd.h| 16 ++- 6 files changed, 64 insertions(+), 26 deletions(-) diff --git a/include/standard-headers/asm-x86/hyperv.h b/include/standard-headers/asm-x86/hyperv.h index fac7651..5f95d5e 100644 --- a/include/standard-headers/asm-x86/hyperv.h +++ b/include/standard-headers/asm-x86/hyperv.h @@ -149,12 +149,9 @@ */ #define HV_X64_DEPRECATING_AEOI_RECOMMENDED(1 << 9) -/* - * HV_VP_SET available - */ +/* Recommend using the newer ExProcessorMasks interface */ #define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED (1 << 11) - /* * Crash notification flag. */ @@ -242,7 +239,11 @@ (~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1)) /* Declare the various hypercall operations. */ +#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002 +#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003 #define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008 +#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013 +#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014 #define HVCALL_POST_MESSAGE0x005c #define HVCALL_SIGNAL_EVENT0x005d @@ -259,6 +260,16 @@ #define HV_PROCESSOR_POWER_STATE_C22 #define HV_PROCESSOR_POWER_STATE_C33 +#define HV_FLUSH_ALL_PROCESSORSBIT(0) +#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACESBIT(1) +#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2) +#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3) + +enum HV_GENERIC_SET_FORMAT { + HV_GENERIC_SET_SPARCE_4K, + HV_GENERIC_SET_ALL, +}; + /* hypercall status code */ #define HV_STATUS_SUCCESS 0 #define HV_STATUS_INVALID_HYPERCALL_CODE 2 diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h index c22d3eb..f8d5804 100644 --- a/include/standard-headers/linux/pci_regs.h +++ b/include/standard-headers/linux/pci_regs.h @@ -513,6 +513,7 @@ #define PCI_EXP_DEVSTA_URD0x0008 /* Unsupported Request Detected */ #define PCI_EXP_DEVSTA_AUXPD 0x0010 /* AUX Power Detected */ #define PCI_EXP_DEVSTA_TRPND 0x0020 /* Transactions Pending */ +#define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V1 12 /* v1 endpoints without link end here */ #define PCI_EXP_LNKCAP 12 /* Link Capabilities */ #define PCI_EXP_LNKCAP_SLS0x000f /* Supported Link Speeds */ #define PCI_EXP_LNKCAP_SLS_2_5GB 0x0001 /* LNKCAP2 SLS Vector bit 0 */ @@ -556,7 +557,7 @@ #define PCI_EXP_LNKSTA_DLLLA 0x2000 /* Data Link Layer Link Active */ #define PCI_EXP_LNKSTA_LBMS 0x4000 /* Link Bandwidth Management Status */ #define PCI_EXP_LNKSTA_LABS 0x8000 /* Link Autonomous Bandwidth Status */ -#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V1 20 /* v1 endpoints end here */ +#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V1 20 /* v1 endpoints with link end here */ #define PCI_EXP_SLTCAP 20 /* Slot Capabilities */ #define PCI_EXP_SLTCAP_ABP0x0001 /* Attention Button Present */ #define PCI_EXP_SLTCAP_PCP0x0002 /* Power Controller Present */ @@ -639,7 +640,7 @@ #define PCI_EXP_DEVCTL2_OBFF_MSGB_EN 0x4000 /* Enable OBFF Message type B */ #define PCI_EXP_DEVCTL2_OBFF_WAKE_EN 0x6000 /* OBFF using WAKE# signaling */ #define PCI_EXP_DEVSTA242 /* Device Status 2 */ -#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 44 /* v2 endpoints end here */ +#define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V2 44 /* v2 endpoints without link end here */ #define PCI_EXP_LNKCAP244 /* Link Capabilities 2 */ #define PCI_EXP_LNKCAP2_SLS_2_5GB 0x0002 /* Supported Speed 2.5GT/s */ #define PCI_EXP_LNKCAP2_SLS_5_0GB 0x0004 /* Supported Speed 5.0GT/s */ @@ -647,6 +648,7 @@ #define PCI_EXP_LNKCAP2_CROSSLINK 0x0100 /* Crosslink supported */ #define PCI_EXP_LNKCTL248 /* Link Control 2 */ #define PCI_EXP_LNKSTA250 /* Link Status 2 */ +#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 52 /* v2 endpoints with link end here */ #define PCI_EXP_SLTCAP252 /* Slot Capabilities 2 */ #define PCI_EXP_SLTCTL256 /* Slot Control 2 */ #define PCI_EXP_SLTSTA258 /* Slot Status 2 */ @@ -733,23 +735,17 @@ #define PCI_ERR_CAP_ECRC_CHKE 0x0100 /* ECRC Check Enable */ #define PCI_ERR_HEADER_LOG 28 /* Header Log Register (16 bytes) */ #define PCI_ERR_ROOT_COMMAND 44 /* Root Error Command */ -/* Correctable Err Reporting Enable */ -#define
[Qemu-devel] [PATCH] linux-headers: sync against v4.14-rc1
This patch contains modification of userfaultfd.h, necessary for series "calculate blocktime for postcopy live migration" it was decided to send it separatelly with another modifications. Build was tested with docker, but it's not fully tested at runtime. Based on a664607440511fdf8cff9d1c2afefbdbca1d1295 "Merge remote-tracking branch 'remotes/famz/tags/build-and-test-automation-pull-request' into staging" Alexey Perevalov (1): linux-headers: sync against v4.14-rc1 include/standard-headers/asm-x86/hyperv.h| 19 ++--- include/standard-headers/linux/pci_regs.h| 42 include/standard-headers/linux/virtio_ring.h | 4 +-- linux-headers/asm-s390/kvm.h | 6 linux-headers/linux/kvm.h| 3 +- linux-headers/linux/userfaultfd.h| 16 ++- 6 files changed, 64 insertions(+), 26 deletions(-) -- 1.9.1
Re: [Qemu-devel] [PATCH v10 07/10] migration: calculate vCPU blocktime on dst side
On 09/21/2017 02:57 PM, Dr. David Alan Gilbert wrote: * Alexey Perevalov (a.pereva...@samsung.com) wrote: This patch provides blocktime calculation per vCPU, as a summary and as a overlapped value for all vCPUs. This approach was suggested by Peter Xu, as an improvements of previous approch where QEMU kept tree with faulted page address and cpus bitmask in it. Now QEMU is keeping array with faulted page address as value and vCPU as index. It helps to find proper vCPU at UFFD_COPY time. Also it keeps list for blocktime per vCPU (could be traced with page_fault_addr) Blocktime will not calculated if postcopy_blocktime field of MigrationIncomingState wasn't initialized. Signed-off-by: Alexey Perevalov<a.pereva...@samsung.com> --- migration/postcopy-ram.c | 138 ++- migration/trace-events | 5 +- 2 files changed, 140 insertions(+), 3 deletions(-) diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index cc78981..9a5133f 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -110,7 +110,6 @@ static struct PostcopyBlocktimeContext *blocktime_context_new(void) ctx->exit_notifier.notify = migration_exit_cb; qemu_add_exit_notifier(>exit_notifier); -add_migration_state_change_notifier(>postcopy_notifier); return ctx; } @@ -559,6 +558,136 @@ static int ram_block_enable_notify(const char *block_name, void *host_addr, return 0; } +static int get_mem_fault_cpu_index(uint32_t pid) +{ +CPUState *cpu_iter; + +CPU_FOREACH(cpu_iter) { +if (cpu_iter->thread_id == pid) { +trace_get_mem_fault_cpu_index(cpu_iter->cpu_index, pid); +return cpu_iter->cpu_index; +} +} +trace_get_mem_fault_cpu_index(-1, pid); +return -1; +} + +/* + * This function is being called when pagefault occurs. It + * tracks down vCPU blocking time. + * + * @addr: faulted host virtual address + * @ptid: faulted process thread id + * @rb: ramblock appropriate to addr + */ +static void mark_postcopy_blocktime_begin(uint64_t addr, uint32_t ptid, + RAMBlock *rb) +{ +int cpu, already_received; +MigrationIncomingState *mis = migration_incoming_get_current(); +PostcopyBlocktimeContext *dc = mis->blocktime_ctx; +int64_t now_ms; + +if (!dc || ptid == 0) { +return; +} +cpu = get_mem_fault_cpu_index(ptid); +if (cpu < 0) { +return; +} + +now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); +if (dc->vcpu_addr[cpu] == 0) { +atomic_inc(>smp_cpus_down); +} + +atomic_xchg__nocheck(>vcpu_addr[cpu], addr); +atomic_xchg__nocheck(>last_begin, now_ms); +atomic_xchg__nocheck(>page_fault_vcpu_time[cpu], now_ms); + +already_received = ramblock_recv_bitmap_test(rb, (void *)addr); +if (already_received) { +atomic_xchg__nocheck(>vcpu_addr[cpu], 0); +atomic_xchg__nocheck(>page_fault_vcpu_time[cpu], 0); +atomic_sub(>smp_cpus_down, 1); +} +trace_mark_postcopy_blocktime_begin(addr, dc, dc->page_fault_vcpu_time[cpu], +cpu, already_received); +} + +/* + * This function just provide calculated blocktime per cpu and trace it. + * Total blocktime is calculated in mark_postcopy_blocktime_end. + * + * + * Assume we have 3 CPU + * + * S1E1 S1 E1 + * -***xxx***> CPU1 + * + * S2E2 + * xxx---> CPU2 + * + * S3E3 + * xxx---> CPU3 + * + * We have sequence S1,S2,E1,S3,S1,E2,E3,E1 + * S2,E1 - doesn't match condition due to sequence S1,S2,E1 doesn't include CPU3 + * S3,S1,E2 - sequence includes all CPUs, in this case overlap will be S1,E2 - + *it's a part of total blocktime. + * S1 - here is last_begin + * Legend of the picture is following: + * * - means blocktime per vCPU + * x - means overlapped blocktime (total blocktime) + * + * @addr: host virtual address + */ +static void mark_postcopy_blocktime_end(uint64_t addr) +{ +MigrationIncomingState *mis = migration_incoming_get_current(); +PostcopyBlocktimeContext *dc = mis->blocktime_ctx; +int i, affected_cpu = 0; +int64_t now_ms; +bool vcpu_total_blocktime = false; + +if (!dc) { +return; +} + +now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + +/* lookup cpu, to clear it, + * that algorithm looks straighforward, but it's not + * optimal, more optimal algorithm is keeping tree or hash + * where key is address value is a list of */ +for (i = 0; i < smp_cpus; i++) { +uint64_t vcpu_blocktime = 0; +
Re: [Qemu-devel] [PATCH v10 10/10] migration: add postcopy total blocktime into query-migrate
On 09/21/2017 03:42 PM, Dr. David Alan Gilbert wrote: * Alexey Perevalov (a.pereva...@samsung.com) wrote: Postcopy total blocktime is available on destination side only. But query-migrate was possible only for source. This patch adds ability to call query-migrate on destination. To be able to see postcopy blocktime, need to request postcopy-blocktime capability. The query-migrate command will show following sample result: {"return": "postcopy-vcpu-blocktime": [115, 100], "status": "completed", "postcopy-blocktime": 100 }} postcopy_vcpu_blocktime contains list, where the first item is the first vCPU in QEMU. This patch has a drawback, it combines states of incoming and outgoing migration. Ongoing migration state will overwrite incoming state. Looks like better to separate query-migrate for incoming and outgoing migration or add parameter to indicate type of migration. Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- hmp.c| 15 + migration/migration.c| 42 +++ migration/migration.h| 4 migration/postcopy-ram.c | 57 migration/trace-events | 1 + qapi/migration.json | 11 +- 6 files changed, 125 insertions(+), 5 deletions(-) diff --git a/hmp.c b/hmp.c index 0fb2bc7..142f76e 100644 --- a/hmp.c +++ b/hmp.c @@ -264,6 +264,21 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) info->cpu_throttle_percentage); } +if (info->has_postcopy_blocktime) { +monitor_printf(mon, "postcopy blocktime: %" PRId64 "\n", + info->postcopy_blocktime); +} + +if (info->has_postcopy_vcpu_blocktime) { +Visitor *v; +char *str; +v = string_output_visitor_new(false, ); +visit_type_int64List(v, NULL, >postcopy_vcpu_blocktime, NULL); +visit_complete(v, ); +monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str); +g_free(str); +visit_free(v); +} qapi_free_MigrationInfo(info); qapi_free_MigrationCapabilityStatusList(caps); } diff --git a/migration/migration.c b/migration/migration.c index 4f029e8..e1d3248 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -528,14 +528,15 @@ static void populate_disk_info(MigrationInfo *info) } } -MigrationInfo *qmp_query_migrate(Error **errp) +static void fill_source_migration_info(MigrationInfo *info) { -MigrationInfo *info = g_malloc0(sizeof(*info)); MigrationState *s = migrate_get_current(); switch (s->state) { case MIGRATION_STATUS_NONE: /* no migration has happened ever */ +/* do not overwrite destination migration status */ +return; break; case MIGRATION_STATUS_SETUP: info->has_status = true; @@ -584,8 +585,6 @@ MigrationInfo *qmp_query_migrate(Error **errp) break; } info->status = s->state; - -return info; } /** @@ -649,6 +648,41 @@ static bool migrate_caps_check(bool *cap_list, return true; } +static void fill_destination_migration_info(MigrationInfo *info) +{ +MigrationIncomingState *mis = migration_incoming_get_current(); + +switch (mis->state) { +case MIGRATION_STATUS_NONE: +return; +break; +case MIGRATION_STATUS_SETUP: +case MIGRATION_STATUS_CANCELLING: +case MIGRATION_STATUS_CANCELLED: +case MIGRATION_STATUS_ACTIVE: +case MIGRATION_STATUS_POSTCOPY_ACTIVE: +case MIGRATION_STATUS_FAILED: +case MIGRATION_STATUS_COLO: +info->has_status = true; +break; +case MIGRATION_STATUS_COMPLETED: +info->has_status = true; +fill_destination_postcopy_migration_info(info); +break; +} +info->status = mis->state; +} + +MigrationInfo *qmp_query_migrate(Error **errp) +{ +MigrationInfo *info = g_malloc0(sizeof(*info)); + +fill_destination_migration_info(info); +fill_source_migration_info(info); + +return info; +} + void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, Error **errp) { diff --git a/migration/migration.h b/migration/migration.h index 770466b..882a59b 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -70,6 +70,10 @@ struct MigrationIncomingState { MigrationIncomingState *migration_incoming_get_current(void); void migration_incoming_state_destroy(void); +/* + * Functions to work with blocktime context + */ +void fill_destination_postcopy_migration_info(MigrationInfo *info); #define TYPE_MIGRATION "migration" diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 9
Re: [Qemu-devel] [PATCH v10 08/10] migration: postcopy_blocktime documentation
On 09/21/2017 03:33 PM, Dr. David Alan Gilbert wrote: * Alexey Perevalov (a.pereva...@samsung.com) wrote: Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> Although it does have my R-b it might be worth adding some clarification that it's a measure of when *all* cpus are blocked and so isn't a total measure of impact of postcopy (when blocking some of them). yes, maybe additional clarification is necessary. now we have both values: {"return": {"postcopy-blocktime": 5691, "status": "completed", "postcopy-vcpu-blocktime": [7671, 6388]}} where postcopy-blocktime is for *all* and postcopy-vcpu-blocktime is per vCPU, it's really worth to describe it, like: Blocktime is a postcopy live migration metric, intended to show how long the vCPU was in state of interruptible sleep due to pagefault. That metric is calculated both for all vCPUs as overlapped value, and separately for each vCPU. These values are calculated on destination side. To enable postcopy blocktime calculation, enter following command on destination monitor: migrate_set_capability postcopy-blocktime on Postcopy blocktime can be retrieved by query-migrate qmp command. postcopy-blocktime value of qmp command will show overlapped blocking time for all vCPU, postcopy-vcpu-blocktime will show list of blocking time per vCPU. -- Best regards, Alexey Perevalov Dave --- docs/devel/migration.txt | 10 ++ 1 file changed, 10 insertions(+) diff --git a/docs/devel/migration.txt b/docs/devel/migration.txt index 1b940a8..4b625ca 100644 --- a/docs/devel/migration.txt +++ b/docs/devel/migration.txt @@ -402,6 +402,16 @@ will now cause the transition from precopy to postcopy. It can be issued immediately after migration is started or any time later on. Issuing it after the end of a migration is harmless. +Blocktime is a postcopy live migration metric, intended to show +how long the vCPU was in state of interruptable sleep due to pagefault. +This value is calculated on destination side. +To enable postcopy blocktime calculation, enter following command on destination +monitor: + +migrate_set_capability postcopy-blocktime on + +Postcopy blocktime can be retrieved by query-migrate qmp command. + Note: During the postcopy phase, the bandwidth limits set using migrate_set_speed is ignored (to avoid delaying requested pages that the destination is waiting for). -- 1.9.1 -- Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK
Re: [Qemu-devel] [PATCH v10 06/10] migration: add postcopy blocktime ctx into MigrationIncomingState
On 09/21/2017 01:16 PM, Dr. David Alan Gilbert wrote: * Alexey Perevalov (a.pereva...@samsung.com) wrote: This patch adds request to kernel space for UFFD_FEATURE_THREAD_ID, in case when this feature is provided by kernel. PostcopyBlocktimeContext is incapsulated inside postcopy-ram.c, due to it's postcopy only feature. Also it defines PostcopyBlocktimeContext's instance live time. Information from PostcopyBlocktimeContext instance will be provided much after postcopy migration end, instance of PostcopyBlocktimeContext will live till QEMU exit, but part of it (vcpu_addr, page_fault_vcpu_time) used only during calculation, will be released when postcopy ended or failed. To enable postcopy blocktime calculation on destination, need to request proper capabiltiy (Patch for documentation will be at the tail of the patch set). As an example following command enable that capability, assume QEMU was started with -chardev socket,id=charmonitor,path=/var/lib/migrate-vm-monitor.sock option to control it [root@host]#printf "{\"execute\" : \"qmp_capabilities\"}\r\n \ {\"execute\": \"migrate-set-capabilities\" , \"arguments\": { \"capabilities\": [ { \"capability\": \"postcopy-blocktime\", \"state\": true } ] } }" | nc -U /var/lib/migrate-vm-monitor.sock Or just with HMP (qemu) migrate_set_capability postcopy-blocktime on Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- migration/migration.h| 8 ++ migration/postcopy-ram.c | 65 2 files changed, 73 insertions(+) diff --git a/migration/migration.h b/migration/migration.h index 56bf33c..770466b 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -21,6 +21,8 @@ #include "qemu/coroutine_int.h" #include "hw/qdev.h" +struct PostcopyBlocktimeContext; + /* State for the incoming migration */ struct MigrationIncomingState { QEMUFile *from_src_file; @@ -58,6 +60,12 @@ struct MigrationIncomingState { /* The coroutine we should enter (back) after failover */ Coroutine *migration_incoming_co; QemuSemaphore colo_incoming_sem; + +/* + * PostcopyBlocktimeContext to keep information for postcopy + * live migration, to calculate vCPU block time + * */ +struct PostcopyBlocktimeContext *blocktime_ctx; }; MigrationIncomingState *migration_incoming_get_current(void); diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index bec6c2c..cc78981 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -61,6 +61,58 @@ struct PostcopyDiscardState { #include #include +typedef struct PostcopyBlocktimeContext { +/* time when page fault initiated per vCPU */ +int64_t *page_fault_vcpu_time; +/* page address per vCPU */ +uint64_t *vcpu_addr; +int64_t total_blocktime; +/* blocktime per vCPU */ +int64_t *vcpu_blocktime; +/* point in time when last page fault was initiated */ +int64_t last_begin; +/* number of vCPU are suspended */ +int smp_cpus_down; + +/* + * Handler for exit event, necessary for + * releasing whole blocktime_ctx + */ +Notifier exit_notifier; +/* + * Handler for postcopy event, necessary for + * releasing unnecessary part of blocktime_ctx + */ +Notifier postcopy_notifier; Is this actually used? It's just that... +} PostcopyBlocktimeContext; + +static void destroy_blocktime_context(struct PostcopyBlocktimeContext *ctx) +{ +g_free(ctx->page_fault_vcpu_time); +g_free(ctx->vcpu_addr); +g_free(ctx->vcpu_blocktime); +g_free(ctx); +} + +static void migration_exit_cb(Notifier *n, void *data) +{ +PostcopyBlocktimeContext *ctx = container_of(n, PostcopyBlocktimeContext, + exit_notifier); +destroy_blocktime_context(ctx); +} + +static struct PostcopyBlocktimeContext *blocktime_context_new(void) +{ +PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1); +ctx->page_fault_vcpu_time = g_new0(int64_t, smp_cpus); +ctx->vcpu_addr = g_new0(uint64_t, smp_cpus); +ctx->vcpu_blocktime = g_new0(int64_t, smp_cpus); + +ctx->exit_notifier.notify = migration_exit_cb; +qemu_add_exit_notifier(>exit_notifier); +add_migration_state_change_notifier(>postcopy_notifier); Patch 7 removes that line, and I don't see what puts it back; and this line doesn't actually set up ctx->postcopy_notifier. Other than that, it looks OK. Thank you, I really changed my mind, and decided to keep blocktime context (and all calculated values) till the stop of VM, but not till the end of migration. -- Best regards, Alexey Perevalov Dave +return ctx; +} /** * receive_ufd_features: check userfault fd features, to request only supported @@ -153,6 +205,19 @@ static bool uf
Re: [Qemu-devel] [PATCH v10 01/10] userfault: update kernel header for UFFD_FEATURE_*
On 09/20/2017 09:43 PM, Dr. David Alan Gilbert wrote: * Alexey Perevalov (a.pereva...@samsung.com) wrote: This commit adds modification for UFFD_FEATURE_SIGBUS and UFFD_FEATURE_THREAD_ID. Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> This should be replaced with just running the scripts/update-linux-headers.sh against a 4.14-rc1 checkout. That can be done as a separate patch or the first patch of this series. Ok, in case of separate patch it's reasonably to send modification for all headers. -- Best regards, Alexey Perevalov Dave --- linux-headers/linux/userfaultfd.h | 16 +++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h index 9701772..b43cf0d 100644 --- a/linux-headers/linux/userfaultfd.h +++ b/linux-headers/linux/userfaultfd.h @@ -23,7 +23,9 @@ UFFD_FEATURE_EVENT_REMOVE | \ UFFD_FEATURE_EVENT_UNMAP | \ UFFD_FEATURE_MISSING_HUGETLBFS | \ - UFFD_FEATURE_MISSING_SHMEM) + UFFD_FEATURE_MISSING_SHMEM | \ + UFFD_FEATURE_SIGBUS |\ + UFFD_FEATURE_THREAD_ID) #define UFFD_API_IOCTLS \ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -78,6 +80,9 @@ struct uffd_msg { struct { __u64 flags; __u64 address; + union { + __u32 ptid; + } feat; } pagefault; struct { @@ -153,6 +158,13 @@ struct uffdio_api { * UFFD_FEATURE_MISSING_SHMEM works the same as * UFFD_FEATURE_MISSING_HUGETLBFS, but it applies to shmem * (i.e. tmpfs and other shmem based APIs). +* +* UFFD_FEATURE_SIGBUS feature means no page-fault +* (UFFD_EVENT_PAGEFAULT) event will be delivered, instead +* a SIGBUS signal will be sent to the faulting process. +* +* UFFD_FEATURE_THREAD_ID pid of the page faulted task_struct will +* be returned, if feature is not requested 0 will be returned. */ #define UFFD_FEATURE_PAGEFAULT_FLAG_WP(1<<0) #define UFFD_FEATURE_EVENT_FORK (1<<1) @@ -161,6 +173,8 @@ struct uffdio_api { #define UFFD_FEATURE_MISSING_HUGETLBFS(1<<4) #define UFFD_FEATURE_MISSING_SHMEM(1<<5) #define UFFD_FEATURE_EVENT_UNMAP (1<<6) +#define UFFD_FEATURE_SIGBUS(1<<7) +#define UFFD_FEATURE_THREAD_ID (1<<8) __u64 features; __u64 ioctls; -- 1.9.1 -- Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK
[Qemu-devel] [PATCH v10 10/10] migration: add postcopy total blocktime into query-migrate
Postcopy total blocktime is available on destination side only. But query-migrate was possible only for source. This patch adds ability to call query-migrate on destination. To be able to see postcopy blocktime, need to request postcopy-blocktime capability. The query-migrate command will show following sample result: {"return": "postcopy-vcpu-blocktime": [115, 100], "status": "completed", "postcopy-blocktime": 100 }} postcopy_vcpu_blocktime contains list, where the first item is the first vCPU in QEMU. This patch has a drawback, it combines states of incoming and outgoing migration. Ongoing migration state will overwrite incoming state. Looks like better to separate query-migrate for incoming and outgoing migration or add parameter to indicate type of migration. Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- hmp.c| 15 + migration/migration.c| 42 +++ migration/migration.h| 4 migration/postcopy-ram.c | 57 migration/trace-events | 1 + qapi/migration.json | 11 +- 6 files changed, 125 insertions(+), 5 deletions(-) diff --git a/hmp.c b/hmp.c index 0fb2bc7..142f76e 100644 --- a/hmp.c +++ b/hmp.c @@ -264,6 +264,21 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) info->cpu_throttle_percentage); } +if (info->has_postcopy_blocktime) { +monitor_printf(mon, "postcopy blocktime: %" PRId64 "\n", + info->postcopy_blocktime); +} + +if (info->has_postcopy_vcpu_blocktime) { +Visitor *v; +char *str; +v = string_output_visitor_new(false, ); +visit_type_int64List(v, NULL, >postcopy_vcpu_blocktime, NULL); +visit_complete(v, ); +monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str); +g_free(str); +visit_free(v); +} qapi_free_MigrationInfo(info); qapi_free_MigrationCapabilityStatusList(caps); } diff --git a/migration/migration.c b/migration/migration.c index 4f029e8..e1d3248 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -528,14 +528,15 @@ static void populate_disk_info(MigrationInfo *info) } } -MigrationInfo *qmp_query_migrate(Error **errp) +static void fill_source_migration_info(MigrationInfo *info) { -MigrationInfo *info = g_malloc0(sizeof(*info)); MigrationState *s = migrate_get_current(); switch (s->state) { case MIGRATION_STATUS_NONE: /* no migration has happened ever */ +/* do not overwrite destination migration status */ +return; break; case MIGRATION_STATUS_SETUP: info->has_status = true; @@ -584,8 +585,6 @@ MigrationInfo *qmp_query_migrate(Error **errp) break; } info->status = s->state; - -return info; } /** @@ -649,6 +648,41 @@ static bool migrate_caps_check(bool *cap_list, return true; } +static void fill_destination_migration_info(MigrationInfo *info) +{ +MigrationIncomingState *mis = migration_incoming_get_current(); + +switch (mis->state) { +case MIGRATION_STATUS_NONE: +return; +break; +case MIGRATION_STATUS_SETUP: +case MIGRATION_STATUS_CANCELLING: +case MIGRATION_STATUS_CANCELLED: +case MIGRATION_STATUS_ACTIVE: +case MIGRATION_STATUS_POSTCOPY_ACTIVE: +case MIGRATION_STATUS_FAILED: +case MIGRATION_STATUS_COLO: +info->has_status = true; +break; +case MIGRATION_STATUS_COMPLETED: +info->has_status = true; +fill_destination_postcopy_migration_info(info); +break; +} +info->status = mis->state; +} + +MigrationInfo *qmp_query_migrate(Error **errp) +{ +MigrationInfo *info = g_malloc0(sizeof(*info)); + +fill_destination_migration_info(info); +fill_source_migration_info(info); + +return info; +} + void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, Error **errp) { diff --git a/migration/migration.h b/migration/migration.h index 770466b..882a59b 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -70,6 +70,10 @@ struct MigrationIncomingState { MigrationIncomingState *migration_incoming_get_current(void); void migration_incoming_state_destroy(void); +/* + * Functions to work with blocktime context + */ +void fill_destination_postcopy_migration_info(MigrationInfo *info); #define TYPE_MIGRATION "migration" diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 9a5133f..5fdbf1e 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -113,6 +113,55 @@ static struct PostcopyBlocktimeContext *blocktime_context_new(void) re
[Qemu-devel] [PATCH v10 05/10] migration: introduce postcopy-blocktime capability
Right now it could be used on destination side to enable vCPU blocktime calculation for postcopy live migration. vCPU blocktime - it's time since vCPU thread was put into interruptible sleep, till memory page was copied and thread awake. Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- migration/migration.c | 9 + migration/migration.h | 1 + qapi/migration.json | 5 - 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/migration/migration.c b/migration/migration.c index e820d47..4f029e8 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1380,6 +1380,15 @@ bool migrate_zero_blocks(void) return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; } +bool migrate_postcopy_blocktime(void) +{ +MigrationState *s; + +s = migrate_get_current(); + +return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; +} + bool migrate_use_compression(void) { MigrationState *s; diff --git a/migration/migration.h b/migration/migration.h index 148c9fa..56bf33c 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -184,6 +184,7 @@ int migrate_compress_level(void); int migrate_compress_threads(void); int migrate_decompress_threads(void); bool migrate_use_events(void); +bool migrate_postcopy_blocktime(void); /* Sending on the return path - generic and then for each message type */ void migrate_send_rp_shut(MigrationIncomingState *mis, diff --git a/qapi/migration.json b/qapi/migration.json index ee2b3b8..2e4a15d 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -341,12 +341,15 @@ # @return-path: If enabled, migration will use the return path even # for precopy. (since 2.10) # +# @postcopy-blocktime: Calculate downtime for postcopy live migration +# (since 2.11) +# # Since: 1.2 ## { 'enum': 'MigrationCapability', 'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks', 'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram', - 'block', 'return-path' ] } + 'block', 'return-path', 'postcopy-blocktime' ] } ## # @MigrationCapabilityStatus: -- 1.9.1
[Qemu-devel] [PATCH v10 02/10] migration: pass MigrationIncomingState* into migration check functions
That tiny refactoring is necessary to be able to set UFFD_FEATURE_THREAD_ID while requesting features, and then to create downtime context in case when kernel supports it. Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- migration/migration.c| 3 ++- migration/postcopy-ram.c | 10 +- migration/postcopy-ram.h | 2 +- migration/savevm.c | 2 +- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 959e8ec..e820d47 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -603,6 +603,7 @@ static bool migrate_caps_check(bool *cap_list, { MigrationCapabilityStatusList *cap; bool old_postcopy_cap; +MigrationIncomingState *mis = migration_incoming_get_current(); old_postcopy_cap = cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]; @@ -636,7 +637,7 @@ static bool migrate_caps_check(bool *cap_list, * special support. */ if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) && -!postcopy_ram_supported_by_host()) { +!postcopy_ram_supported_by_host(mis)) { /* postcopy_ram_supported_by_host will have emitted a more * detailed message */ diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 7a414eb..4350dd0 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -61,7 +61,7 @@ struct PostcopyDiscardState { #include #include -static bool ufd_version_check(int ufd) +static bool ufd_version_check(int ufd, MigrationIncomingState *mis) { struct uffdio_api api_struct; uint64_t ioctl_mask; @@ -124,7 +124,7 @@ static int test_ramblock_postcopiable(const char *block_name, void *host_addr, * normally fine since if the postcopy succeeds it gets turned back on at the * end. */ -bool postcopy_ram_supported_by_host(void) +bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) { long pagesize = getpagesize(); int ufd = -1; @@ -147,7 +147,7 @@ bool postcopy_ram_supported_by_host(void) } /* Version and features check */ -if (!ufd_version_check(ufd)) { +if (!ufd_version_check(ufd, mis)) { goto out; } @@ -523,7 +523,7 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) * Although the host check already tested the API, we need to * do the check again as an ABI handshake on the new fd. */ -if (!ufd_version_check(mis->userfault_fd)) { +if (!ufd_version_check(mis->userfault_fd, mis)) { return -1; } @@ -677,7 +677,7 @@ void *postcopy_get_tmp_page(MigrationIncomingState *mis) #else /* No target OS support, stubs just fail */ -bool postcopy_ram_supported_by_host(void) +bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) { error_report("%s: No OS support", __func__); return false; diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h index 78a3591..77ea0fd 100644 --- a/migration/postcopy-ram.h +++ b/migration/postcopy-ram.h @@ -14,7 +14,7 @@ #define QEMU_POSTCOPY_RAM_H /* Return true if the host supports everything we need to do postcopy-ram */ -bool postcopy_ram_supported_by_host(void); +bool postcopy_ram_supported_by_host(MigrationIncomingState *mis); /* * Make all of RAM sensitive to accesses to areas that haven't yet been written diff --git a/migration/savevm.c b/migration/savevm.c index 7a55023..6ed6d57 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1352,7 +1352,7 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis) return -1; } -if (!postcopy_ram_supported_by_host()) { +if (!postcopy_ram_supported_by_host(mis)) { postcopy_state_set(POSTCOPY_INCOMING_NONE); return -1; } -- 1.9.1
[Qemu-devel] [PATCH v10 01/10] userfault: update kernel header for UFFD_FEATURE_*
This commit adds modification for UFFD_FEATURE_SIGBUS and UFFD_FEATURE_THREAD_ID. Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- linux-headers/linux/userfaultfd.h | 16 +++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h index 9701772..b43cf0d 100644 --- a/linux-headers/linux/userfaultfd.h +++ b/linux-headers/linux/userfaultfd.h @@ -23,7 +23,9 @@ UFFD_FEATURE_EVENT_REMOVE | \ UFFD_FEATURE_EVENT_UNMAP | \ UFFD_FEATURE_MISSING_HUGETLBFS | \ - UFFD_FEATURE_MISSING_SHMEM) + UFFD_FEATURE_MISSING_SHMEM | \ + UFFD_FEATURE_SIGBUS |\ + UFFD_FEATURE_THREAD_ID) #define UFFD_API_IOCTLS\ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -78,6 +80,9 @@ struct uffd_msg { struct { __u64 flags; __u64 address; + union { + __u32 ptid; + } feat; } pagefault; struct { @@ -153,6 +158,13 @@ struct uffdio_api { * UFFD_FEATURE_MISSING_SHMEM works the same as * UFFD_FEATURE_MISSING_HUGETLBFS, but it applies to shmem * (i.e. tmpfs and other shmem based APIs). +* +* UFFD_FEATURE_SIGBUS feature means no page-fault +* (UFFD_EVENT_PAGEFAULT) event will be delivered, instead +* a SIGBUS signal will be sent to the faulting process. +* +* UFFD_FEATURE_THREAD_ID pid of the page faulted task_struct will +* be returned, if feature is not requested 0 will be returned. */ #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) #define UFFD_FEATURE_EVENT_FORK(1<<1) @@ -161,6 +173,8 @@ struct uffdio_api { #define UFFD_FEATURE_MISSING_HUGETLBFS (1<<4) #define UFFD_FEATURE_MISSING_SHMEM (1<<5) #define UFFD_FEATURE_EVENT_UNMAP (1<<6) +#define UFFD_FEATURE_SIGBUS(1<<7) +#define UFFD_FEATURE_THREAD_ID (1<<8) __u64 features; __u64 ioctls; -- 1.9.1
[Qemu-devel] [PATCH v10 03/10] migration: fix hardcoded function name in error report
Reviewed-by: Juan Quintela <quint...@redhat.com> Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- migration/postcopy-ram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 4350dd0..a0e74db 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -69,7 +69,7 @@ static bool ufd_version_check(int ufd, MigrationIncomingState *mis) api_struct.api = UFFD_API; api_struct.features = 0; if (ioctl(ufd, UFFDIO_API, _struct)) { -error_report("postcopy_ram_supported_by_host: UFFDIO_API failed: %s", +error_report("%s: UFFDIO_API failed: %s", __func__, strerror(errno)); return false; } -- 1.9.1
[Qemu-devel] [PATCH v10 07/10] migration: calculate vCPU blocktime on dst side
This patch provides blocktime calculation per vCPU, as a summary and as a overlapped value for all vCPUs. This approach was suggested by Peter Xu, as an improvements of previous approch where QEMU kept tree with faulted page address and cpus bitmask in it. Now QEMU is keeping array with faulted page address as value and vCPU as index. It helps to find proper vCPU at UFFD_COPY time. Also it keeps list for blocktime per vCPU (could be traced with page_fault_addr) Blocktime will not calculated if postcopy_blocktime field of MigrationIncomingState wasn't initialized. Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- migration/postcopy-ram.c | 138 ++- migration/trace-events | 5 +- 2 files changed, 140 insertions(+), 3 deletions(-) diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index cc78981..9a5133f 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -110,7 +110,6 @@ static struct PostcopyBlocktimeContext *blocktime_context_new(void) ctx->exit_notifier.notify = migration_exit_cb; qemu_add_exit_notifier(>exit_notifier); -add_migration_state_change_notifier(>postcopy_notifier); return ctx; } @@ -559,6 +558,136 @@ static int ram_block_enable_notify(const char *block_name, void *host_addr, return 0; } +static int get_mem_fault_cpu_index(uint32_t pid) +{ +CPUState *cpu_iter; + +CPU_FOREACH(cpu_iter) { +if (cpu_iter->thread_id == pid) { +trace_get_mem_fault_cpu_index(cpu_iter->cpu_index, pid); +return cpu_iter->cpu_index; +} +} +trace_get_mem_fault_cpu_index(-1, pid); +return -1; +} + +/* + * This function is being called when pagefault occurs. It + * tracks down vCPU blocking time. + * + * @addr: faulted host virtual address + * @ptid: faulted process thread id + * @rb: ramblock appropriate to addr + */ +static void mark_postcopy_blocktime_begin(uint64_t addr, uint32_t ptid, + RAMBlock *rb) +{ +int cpu, already_received; +MigrationIncomingState *mis = migration_incoming_get_current(); +PostcopyBlocktimeContext *dc = mis->blocktime_ctx; +int64_t now_ms; + +if (!dc || ptid == 0) { +return; +} +cpu = get_mem_fault_cpu_index(ptid); +if (cpu < 0) { +return; +} + +now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); +if (dc->vcpu_addr[cpu] == 0) { +atomic_inc(>smp_cpus_down); +} + +atomic_xchg__nocheck(>vcpu_addr[cpu], addr); +atomic_xchg__nocheck(>last_begin, now_ms); +atomic_xchg__nocheck(>page_fault_vcpu_time[cpu], now_ms); + +already_received = ramblock_recv_bitmap_test(rb, (void *)addr); +if (already_received) { +atomic_xchg__nocheck(>vcpu_addr[cpu], 0); +atomic_xchg__nocheck(>page_fault_vcpu_time[cpu], 0); +atomic_sub(>smp_cpus_down, 1); +} +trace_mark_postcopy_blocktime_begin(addr, dc, dc->page_fault_vcpu_time[cpu], +cpu, already_received); +} + +/* + * This function just provide calculated blocktime per cpu and trace it. + * Total blocktime is calculated in mark_postcopy_blocktime_end. + * + * + * Assume we have 3 CPU + * + * S1E1 S1 E1 + * -***xxx***> CPU1 + * + * S2E2 + * xxx---> CPU2 + * + * S3E3 + * xxx---> CPU3 + * + * We have sequence S1,S2,E1,S3,S1,E2,E3,E1 + * S2,E1 - doesn't match condition due to sequence S1,S2,E1 doesn't include CPU3 + * S3,S1,E2 - sequence includes all CPUs, in this case overlap will be S1,E2 - + *it's a part of total blocktime. + * S1 - here is last_begin + * Legend of the picture is following: + * * - means blocktime per vCPU + * x - means overlapped blocktime (total blocktime) + * + * @addr: host virtual address + */ +static void mark_postcopy_blocktime_end(uint64_t addr) +{ +MigrationIncomingState *mis = migration_incoming_get_current(); +PostcopyBlocktimeContext *dc = mis->blocktime_ctx; +int i, affected_cpu = 0; +int64_t now_ms; +bool vcpu_total_blocktime = false; + +if (!dc) { +return; +} + +now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + +/* lookup cpu, to clear it, + * that algorithm looks straighforward, but it's not + * optimal, more optimal algorithm is keeping tree or hash + * where key is address value is a list of */ +for (i = 0; i < smp_cpus; i++) { +uint64_t vcpu_blocktime = 0; +if (atomic_fetch_add(>vcpu_addr[i], 0) != addr) { +continue; +} +atomic_xchg__nocheck(>vcpu
[Qemu-devel] [PATCH v10 04/10] migration: split ufd_version_check onto receive/request features part
This modification is necessary for userfault fd features which are required to be requested from userspace. UFFD_FEATURE_THREAD_ID is a one of such "on demand" feature, which will be introduced in the next patch. QEMU have to use separate userfault file descriptor, due to userfault context has internal state, and after first call of ioctl UFFD_API it changes its state to UFFD_STATE_RUNNING (in case of success), but kernel while handling ioctl UFFD_API expects UFFD_STATE_WAIT_API. So only one ioctl with UFFD_API is possible per ufd. Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- migration/postcopy-ram.c | 94 1 file changed, 88 insertions(+), 6 deletions(-) diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index a0e74db..bec6c2c 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -61,16 +61,67 @@ struct PostcopyDiscardState { #include #include -static bool ufd_version_check(int ufd, MigrationIncomingState *mis) + +/** + * receive_ufd_features: check userfault fd features, to request only supported + * features in the future. + * + * Returns: true on success + * + * __NR_userfaultfd - should be checked before + * @features: out parameter will contain uffdio_api.features provided by kernel + * in case of success + */ +static bool receive_ufd_features(uint64_t *features) { -struct uffdio_api api_struct; -uint64_t ioctl_mask; +struct uffdio_api api_struct = {0}; +int ufd; +bool ret = true; + +/* if we are here __NR_userfaultfd should exists */ +ufd = syscall(__NR_userfaultfd, O_CLOEXEC); +if (ufd == -1) { +error_report("%s: syscall __NR_userfaultfd failed: %s", __func__, + strerror(errno)); +return false; +} +/* ask features */ api_struct.api = UFFD_API; api_struct.features = 0; if (ioctl(ufd, UFFDIO_API, _struct)) { error_report("%s: UFFDIO_API failed: %s", __func__, strerror(errno)); +ret = false; +goto release_ufd; +} + +*features = api_struct.features; + +release_ufd: +close(ufd); +return ret; +} + +/** + * request_ufd_features: this function should be called only once on a newly + * opened ufd, subsequent calls will lead to error. + * + * Returns: true on succes + * + * @ufd: fd obtained from userfaultfd syscall + * @features: bit mask see UFFD_API_FEATURES + */ +static bool request_ufd_features(int ufd, uint64_t features) +{ +struct uffdio_api api_struct = {0}; +uint64_t ioctl_mask; + +api_struct.api = UFFD_API; +api_struct.features = features; +if (ioctl(ufd, UFFDIO_API, _struct)) { +error_report("%s failed: UFFDIO_API failed: %s", __func__, + strerror(errno)); return false; } @@ -82,11 +133,42 @@ static bool ufd_version_check(int ufd, MigrationIncomingState *mis) return false; } +return true; +} + +static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) +{ +uint64_t asked_features = 0; +static uint64_t supported_features; + +/* + * it's not possible to + * request UFFD_API twice per one fd + * userfault fd features is persistent + */ +if (!supported_features) { +if (!receive_ufd_features(_features)) { +error_report("%s failed", __func__); +return false; +} +} + +/* + * request features, even if asked_features is 0, due to + * kernel expects UFFD_API before UFFDIO_REGISTER, per + * userfault file descriptor + */ +if (!request_ufd_features(ufd, asked_features)) { +error_report("%s failed: features %" PRIu64, __func__, + asked_features); +return false; +} + if (getpagesize() != ram_pagesize_summary()) { bool have_hp = false; /* We've got a huge page */ #ifdef UFFD_FEATURE_MISSING_HUGETLBFS -have_hp = api_struct.features & UFFD_FEATURE_MISSING_HUGETLBFS; +have_hp = supported_features & UFFD_FEATURE_MISSING_HUGETLBFS; #endif if (!have_hp) { error_report("Userfault on this host does not support huge pages"); @@ -147,7 +229,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) } /* Version and features check */ -if (!ufd_version_check(ufd, mis)) { +if (!ufd_check_and_apply(ufd, mis)) { goto out; } @@ -523,7 +605,7 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) * Although the host check already tested the API, we need to * do the check again as an ABI handshake on the new fd. */ -if (!ufd_version_check(mis->userfault_fd, mis)) { +if (!ufd_check_and_apply(mis->userfault_fd, mis)) { return -1; } -- 1.9.1
[Qemu-devel] [PATCH v10 06/10] migration: add postcopy blocktime ctx into MigrationIncomingState
This patch adds request to kernel space for UFFD_FEATURE_THREAD_ID, in case when this feature is provided by kernel. PostcopyBlocktimeContext is incapsulated inside postcopy-ram.c, due to it's postcopy only feature. Also it defines PostcopyBlocktimeContext's instance live time. Information from PostcopyBlocktimeContext instance will be provided much after postcopy migration end, instance of PostcopyBlocktimeContext will live till QEMU exit, but part of it (vcpu_addr, page_fault_vcpu_time) used only during calculation, will be released when postcopy ended or failed. To enable postcopy blocktime calculation on destination, need to request proper capabiltiy (Patch for documentation will be at the tail of the patch set). As an example following command enable that capability, assume QEMU was started with -chardev socket,id=charmonitor,path=/var/lib/migrate-vm-monitor.sock option to control it [root@host]#printf "{\"execute\" : \"qmp_capabilities\"}\r\n \ {\"execute\": \"migrate-set-capabilities\" , \"arguments\": { \"capabilities\": [ { \"capability\": \"postcopy-blocktime\", \"state\": true } ] } }" | nc -U /var/lib/migrate-vm-monitor.sock Or just with HMP (qemu) migrate_set_capability postcopy-blocktime on Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- migration/migration.h| 8 ++ migration/postcopy-ram.c | 65 2 files changed, 73 insertions(+) diff --git a/migration/migration.h b/migration/migration.h index 56bf33c..770466b 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -21,6 +21,8 @@ #include "qemu/coroutine_int.h" #include "hw/qdev.h" +struct PostcopyBlocktimeContext; + /* State for the incoming migration */ struct MigrationIncomingState { QEMUFile *from_src_file; @@ -58,6 +60,12 @@ struct MigrationIncomingState { /* The coroutine we should enter (back) after failover */ Coroutine *migration_incoming_co; QemuSemaphore colo_incoming_sem; + +/* + * PostcopyBlocktimeContext to keep information for postcopy + * live migration, to calculate vCPU block time + * */ +struct PostcopyBlocktimeContext *blocktime_ctx; }; MigrationIncomingState *migration_incoming_get_current(void); diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index bec6c2c..cc78981 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -61,6 +61,58 @@ struct PostcopyDiscardState { #include #include +typedef struct PostcopyBlocktimeContext { +/* time when page fault initiated per vCPU */ +int64_t *page_fault_vcpu_time; +/* page address per vCPU */ +uint64_t *vcpu_addr; +int64_t total_blocktime; +/* blocktime per vCPU */ +int64_t *vcpu_blocktime; +/* point in time when last page fault was initiated */ +int64_t last_begin; +/* number of vCPU are suspended */ +int smp_cpus_down; + +/* + * Handler for exit event, necessary for + * releasing whole blocktime_ctx + */ +Notifier exit_notifier; +/* + * Handler for postcopy event, necessary for + * releasing unnecessary part of blocktime_ctx + */ +Notifier postcopy_notifier; +} PostcopyBlocktimeContext; + +static void destroy_blocktime_context(struct PostcopyBlocktimeContext *ctx) +{ +g_free(ctx->page_fault_vcpu_time); +g_free(ctx->vcpu_addr); +g_free(ctx->vcpu_blocktime); +g_free(ctx); +} + +static void migration_exit_cb(Notifier *n, void *data) +{ +PostcopyBlocktimeContext *ctx = container_of(n, PostcopyBlocktimeContext, + exit_notifier); +destroy_blocktime_context(ctx); +} + +static struct PostcopyBlocktimeContext *blocktime_context_new(void) +{ +PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1); +ctx->page_fault_vcpu_time = g_new0(int64_t, smp_cpus); +ctx->vcpu_addr = g_new0(uint64_t, smp_cpus); +ctx->vcpu_blocktime = g_new0(int64_t, smp_cpus); + +ctx->exit_notifier.notify = migration_exit_cb; +qemu_add_exit_notifier(>exit_notifier); +add_migration_state_change_notifier(>postcopy_notifier); +return ctx; +} /** * receive_ufd_features: check userfault fd features, to request only supported @@ -153,6 +205,19 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) } } +#ifdef UFFD_FEATURE_THREAD_ID +if (migrate_postcopy_blocktime() && mis && +UFFD_FEATURE_THREAD_ID & supported_features) { +/* kernel supports that feature */ +/* don't create blocktime_context if it exists */ +if (!mis->blocktime_ctx) { +mis->blocktime_ctx = blocktime_context_new(); +} + +asked_features |= UFFD_FEATURE_THREAD_ID; +} +#endif + /* * request features, even if asked_features is 0, due to * kernel expects UFFD_API before UFFDIO_REGISTER, per -- 1.9.1
[Qemu-devel] [PATCH v10 08/10] migration: postcopy_blocktime documentation
Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- docs/devel/migration.txt | 10 ++ 1 file changed, 10 insertions(+) diff --git a/docs/devel/migration.txt b/docs/devel/migration.txt index 1b940a8..4b625ca 100644 --- a/docs/devel/migration.txt +++ b/docs/devel/migration.txt @@ -402,6 +402,16 @@ will now cause the transition from precopy to postcopy. It can be issued immediately after migration is started or any time later on. Issuing it after the end of a migration is harmless. +Blocktime is a postcopy live migration metric, intended to show +how long the vCPU was in state of interruptable sleep due to pagefault. +This value is calculated on destination side. +To enable postcopy blocktime calculation, enter following command on destination +monitor: + +migrate_set_capability postcopy-blocktime on + +Postcopy blocktime can be retrieved by query-migrate qmp command. + Note: During the postcopy phase, the bandwidth limits set using migrate_set_speed is ignored (to avoid delaying requested pages that the destination is waiting for). -- 1.9.1
[Qemu-devel] [PATCH v10 00/10] calculate blocktime for postcopy live migration
This is 10th version. The rationale for that idea is following: vCPU could suspend during postcopy live migration until faulted page is not copied into kernel. Downtime on source side it's a value - time interval since source turn vCPU off, till destination start runnig vCPU. But that value was proper value for precopy migration it really shows amount of time when vCPU is down. But not for postcopy migration, because several vCPU threads could susppend after vCPU was started. That is important to estimate packet drop for SDN software. (V9 -> V10) - rebase - patch "update kernel header for UFFD_FEATURE_*" has changed, and was generated by scripts/update-linux-headers.sh as David suggested. (V8 -> V9) - rebase - traces (V7 -> V8) - just one comma in "migration: fix hardcoded function name in error report" It was really missed, but fixed in futher patch. (V6 -> V7) - copied bitmap was placed into RAMBlock as another migration related bitmaps. - Ordering of mark_postcopy_blocktime_end call and ordering of checking copied bitmap were changed. - linewrap style defects - new patch "postcopy_place_page factoring out" - postcopy_ram_supported_by_host accepts MigrationIncomingState in qmp_migrate_set_capabilities - minor fixes of documentation. and huge description of get_postcopy_total_blocktime was moved. Davids comment. (V5 -> V6) - blocktime was added into hmp command. Comment from David. - bitmap for copied pages was added as well as check in *_begin/_end functions. Patch uses just introduced RAMBLOCK_FOREACH. Comment from David. - description of receive_ufd_features/request_ufd_features. Comment from David. - commit message headers/@since references were modified. Comment from Eric. - also typos in documentation. Comment from Eric. - style and description of field in MigrationInfo. Comment from Eric. - ufd_check_and_apply (former ufd_version_check) is calling twice, so my previous patch contained double allocation of blocktime context and as a result memory leak. In this patch series it was fixed. (V4 -> V5) - fill_destination_postcopy_migration_info empty stub was missed for none linux build (V3 -> V4) - get rid of Downtime as a name for vCPU waiting time during postcopy migration - PostcopyBlocktimeContext renamed (it was just BlocktimeContext) - atomic operations are used for dealing with fields of PostcopyBlocktimeContext affected in both threads. - hardcoded function names in error_report were replaced to %s and __line__ - this patch set includes postcopy-downtime capability, but it used on destination, coupled with not possibility to return calculated downtime back to source to show it in query-migrate, it looks like a big trade off - UFFD_API have to be sent notwithstanding need or not to ask kernel for a feature, due to kernel expects it in any case (see patch comment) - postcopy_downtime included into query-migrate output - also this patch set includes trivial fix migration: fix hardcoded function name in error report maybe that is a candidate for qemu-trivial mailing list, but I already sent "migration: Fixed code style" and it was unclaimed. (V2 -> V3) - Downtime calculation approach was changed, thanks to Peter Xu - Due to previous point no more need to keep GTree as well as bitmap of cpus. So glib changes aren't included in this patch set, it could be resent in another patch set, if it will be a good reason for it. - No procfs traces in this patchset, if somebody wants it, you could get it from patchwork site to track down page fault initiators. - UFFD_FEATURE_THREAD_ID is requesting only when kernel supports it - It doesn't send back the downtime, just trace it This patch set is based on commit [PATCH v9 0/3] Add bitmap for received pages in postcopy migration Both patch sets were rebased on commit a9158a5cba955b79d580a252cc58ff44d154e370 AlexeyaPerevalov (10): userfault: update kernel header for UFFD_FEATURE_* migration: pass MigrationIncomingState* into migration check functions migration: fix hardcoded function name in error report migration: split ufd_version_check onto receive/request features part migration: introduce postcopy-blocktime capability migration: add postcopy blocktime ctx into MigrationIncomingState migration: calculate vCPU blocktime on dst side migration: postcopy_blocktime documentation migration: add blocktime calculation into postcopy-test migration: add postcopy total blocktime into query-migrate docs/devel/migration.txt | 10 ++ hmp.c | 15 ++ linux-headers/linux/userfaultfd.h | 16 +- migration/migration.c | 54 +- migration/migration.h | 13 ++ migration/postcopy-ram.c | 358 -- migration/postcopy-ram.h | 2 +- migration/savevm.c| 2 +-
[Qemu-devel] [PATCH v10 09/10] migration: add blocktime calculation into postcopy-test
This patch just requests blocktime calculation, but doesn't add any facility to check or show it. Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- tests/postcopy-test.c | 12 +++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tests/postcopy-test.c b/tests/postcopy-test.c index 8142f2a..1db5359 100644 --- a/tests/postcopy-test.c +++ b/tests/postcopy-test.c @@ -425,6 +425,17 @@ static void test_migrate(void) g_assert(qdict_haskey(rsp, "return")); QDECREF(rsp); +#ifdef UFFD_FEATURE_THREAD_ID +global_qtest = to; +rsp = qmp("{ 'execute': 'migrate-set-capabilities'," + "'arguments': { " + "'capabilities': [ {" + "'capability': 'postcopy-blocktime'," + "'state': true } ] } }"); +g_assert(qdict_haskey(rsp, "return")); +QDECREF(rsp); +#endif + /* We want to pick a speed slow enough that the test completes * quickly, but that it doesn't complete precopy even on a slow * machine, so also set the downtime. @@ -441,7 +452,6 @@ static void test_migrate(void) g_assert(qdict_haskey(rsp, "return")); QDECREF(rsp); - /* Wait for the first serial output from the source */ wait_for_serial("src_serial"); -- 1.9.1
Re: [Qemu-devel] [PATCH v9 0/8] calculate blocktime for postcopy live migration
On 09/18/2017 02:15 PM, Dr. David Alan Gilbert wrote: * Alexey Perevalov (a.pereva...@samsung.com) wrote: This is 9th version. The rationale for that idea is following: vCPU could suspend during postcopy live migration until faulted page is not copied into kernel. Downtime on source side it's a value - time interval since source turn vCPU off, till destination start runnig vCPU. But that value was proper value for precopy migration it really shows amount of time when vCPU is down. But not for postcopy migration, because several vCPU threads could susppend after vCPU was started. That is important to estimate packet drop for SDN software. Hi Alexey, I see that the UFFD_FEATURE_THREAD_ID has landed in kernel v4.14-rc1 over the weekend, so it's probably time to reheat this patchset. I think you should be able to generate a first patch by running scripts/update-linux-headers.sh Hi David, ok, I'll resend it tomorrow, I also added set capability postcopy-blocktime into tests/postcopy-test.c, but I don't check the result of the qmp there, I added it just to enable and test code path, is it ok for you? Dave (V8 -> V9) - rebase - traces (V7 -> V8) - just one comma in "migration: fix hardcoded function name in error report" It was really missed, but fixed in futher patch. (V6 -> V7) - copied bitmap was placed into RAMBlock as another migration related bitmaps. - Ordering of mark_postcopy_blocktime_end call and ordering of checking copied bitmap were changed. - linewrap style defects - new patch "postcopy_place_page factoring out" - postcopy_ram_supported_by_host accepts MigrationIncomingState in qmp_migrate_set_capabilities - minor fixes of documentation. and huge description of get_postcopy_total_blocktime was moved. Davids comment. (V5 -> V6) - blocktime was added into hmp command. Comment from David. - bitmap for copied pages was added as well as check in *_begin/_end functions. Patch uses just introduced RAMBLOCK_FOREACH. Comment from David. - description of receive_ufd_features/request_ufd_features. Comment from David. - commit message headers/@since references were modified. Comment from Eric. - also typos in documentation. Comment from Eric. - style and description of field in MigrationInfo. Comment from Eric. - ufd_check_and_apply (former ufd_version_check) is calling twice, so my previous patch contained double allocation of blocktime context and as a result memory leak. In this patch series it was fixed. (V4 -> V5) - fill_destination_postcopy_migration_info empty stub was missed for none linux build (V3 -> V4) - get rid of Downtime as a name for vCPU waiting time during postcopy migration - PostcopyBlocktimeContext renamed (it was just BlocktimeContext) - atomic operations are used for dealing with fields of PostcopyBlocktimeContext affected in both threads. - hardcoded function names in error_report were replaced to %s and __line__ - this patch set includes postcopy-downtime capability, but it used on destination, coupled with not possibility to return calculated downtime back to source to show it in query-migrate, it looks like a big trade off - UFFD_API have to be sent notwithstanding need or not to ask kernel for a feature, due to kernel expects it in any case (see patch comment) - postcopy_downtime included into query-migrate output - also this patch set includes trivial fix migration: fix hardcoded function name in error report maybe that is a candidate for qemu-trivial mailing list, but I already sent "migration: Fixed code style" and it was unclaimed. (V2 -> V3) - Downtime calculation approach was changed, thanks to Peter Xu - Due to previous point no more need to keep GTree as well as bitmap of cpus. So glib changes aren't included in this patch set, it could be resent in another patch set, if it will be a good reason for it. - No procfs traces in this patchset, if somebody wants it, you could get it from patchwork site to track down page fault initiators. - UFFD_FEATURE_THREAD_ID is requesting only when kernel supports it - It doesn't send back the downtime, just trace it This patch set is based on commit [PATCH v3 0/3] Add bitmap for received pages in postcopy migration Alexey Perevalov (8): userfault: add pid into uffd_msg & update UFFD_FEATURE_* migration: pass MigrationIncomingState* into migration check functions migration: fix hardcoded function name in error report migration: split ufd_version_check onto receive/request features part migration: introduce postcopy-blocktime capability migration: add postcopy blocktime ctx into MigrationIncomingState migration: calculate vCPU blocktime on dst side migration: postcopy_blocktime documentation docs/devel/migration.txt | 10 ++ linux-headers/linux/userfaul
Re: [Qemu-devel] [RFC v2 00/32] postcopy+vhost-user/shared ram
Hello David, You wrote in previous version: We've had a postcopy migrate work now, with a few hacks we're still cleaning up, both on vhost-user-bridge and dpdk; so I'll get this updated and reposted. I want to know more about DPDK work, do you know, is somebody assigned to that task? On 08/24/2017 10:26 PM, Dr. David Alan Gilbert (git) wrote: From: "Dr. David Alan Gilbert" <dgilb...@redhat.com> Hi, This is a RFC/WIP series that enables postcopy migration with shared memory to a vhost-user process. It's based off current-head + Alexey's bitmap series It's tested with vhost-user-bridge and a dpdk (modified by Maxime that will get posted separately) - both very lightly. It's still got a few very rough edges, but it succesfully migrates with both normal and huge pages (2M). The major difference over v1 is that there's a set of code that merges vhost regions together on the qemu side so that we get a single hugepage region on the PC spanning the 640k hole (the hole hopefully isn't accessed by the client, but the client used to align around it anyway). It's also got a lot of cleanups from the comments from v1 but there's still a few things that need work. In particular, there's still the hack around qemu waiting for the set_mem_table to come back; I also worry what would happen if a set-mem-table was triggered during a migrate; I suspect it would break badly. One problem that didn't cause a problem was madvises for hugepages; because we register userfault directly after mmap'ing the region in the client, we have no pages mapped and hence the madvise's/fallocate's are fortunately not compulsary. Still, I'd like a way to do it, it would feel safer. A copy of this code, based off the current 2.10.0-rc4 together with Alexey's bitmap code is available here: https://github.com/dagrh/qemu/tree/vhost-wipv2 Dave Dr. David Alan Gilbert (32): vhu: vu_queue_started vhub: Only process received packets on started queues migrate: Update ram_block_discard_range for shared qemu_ram_block_host_offset migration/ram: ramblock_recv_bitmap_test_byte_offset postcopy: use UFFDIO_ZEROPAGE only when available postcopy: Add notifier chain postcopy: Add vhost-user flag for postcopy and check it vhost-user: Add 'VHOST_USER_POSTCOPY_ADVISE' message vhub: Support sending fds back to qemu vhub: Open userfaultfd postcopy: Allow registering of fd handler vhost+postcopy: Register shared ufd with postcopy vhost+postcopy: Transmit 'listen' to client vhost+postcopy: Register new regions with the ufd vhost+postcopy: Send address back to qemu vhost+postcopy: Stash RAMBlock and offset vhost+postcopy: Send requests to source for shared pages vhost+postcopy: Resolve client address postcopy: wake shared postcopy: postcopy_notify_shared_wake vhost+postcopy: Add vhost waker vhost+postcopy: Call wakeups vub+postcopy: madvises vhost+postcopy: Lock around set_mem_table vhost: Add VHOST_USER_POSTCOPY_END message vhost+postcopy: Wire up POSTCOPY_END notify postcopy: Allow shared memory vhost-user: Claim support for postcopy vhost: Merge neighbouring hugepage regions where appropriate vhost: Don't break merged regions on small remove/non-adds postcopy shared docs contrib/libvhost-user/libvhost-user.c | 226 - contrib/libvhost-user/libvhost-user.h | 22 ++- docs/devel/migration.txt | 39 docs/interop/vhost-user.txt | 39 exec.c| 60 -- hw/virtio/trace-events| 27 +++ hw/virtio/vhost-user.c| 326 +- hw/virtio/vhost.c | 121 +++- include/exec/cpu-common.h | 4 + migration/migration.c | 3 + migration/migration.h | 4 + migration/postcopy-ram.c | 359 +++--- migration/postcopy-ram.h | 69 +++ migration/ram.c | 5 + migration/ram.h | 1 + migration/savevm.c| 13 ++ migration/trace-events| 6 + tests/vhost-user-bridge.c | 1 + trace-events | 3 + vl.c | 2 + 20 files changed, 1241 insertions(+), 89 deletions(-) -- Best regards, Alexey Perevalov
Re: [Qemu-devel] [RFC 23/29] vub+postcopy: madvises
On 08/08/2017 08:06 PM, Dr. David Alan Gilbert wrote: * Alexey Perevalov (a.pereva...@samsung.com) wrote: On 06/28/2017 10:00 PM, Dr. David Alan Gilbert (git) wrote: From: "Dr. David Alan Gilbert" <dgilb...@redhat.com> Clear the area and turn off THP. Signed-off-by: Dr. David Alan Gilbert <dgilb...@redhat.com> --- contrib/libvhost-user/libvhost-user.c | 32 ++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c index 0658b6e847..ceddeac74f 100644 --- a/contrib/libvhost-user/libvhost-user.c +++ b/contrib/libvhost-user/libvhost-user.c @@ -451,11 +451,39 @@ vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg) } if (dev->postcopy_listening) { +int ret; /* We should already have an open ufd need to mark each memory * range as ufd. - * Note: Do we need any madvises? Well it's not been accessed - * yet, still probably need no THP to be safe, discard to be safe? */ + +/* Discard any mapping we have here; note I can't use MADV_REMOVE + * or fallocate to make the hole since I don't want to lose + * data that's already arrived in the shared process. + * TODO: How to do hugepage + */ Hi, David, frankly saying, I stuck with my solution, and I have also another issues, but here I could suggest solution for hugepages. I think we could transmit a received pages bitmap in VHOST_USER_SET_MEM_TABLE (VhostUserMemoryRegion), but it will raise a compatibility issue, or introduce special message type for that and send it before VHOST_USER_SET_MEM_TABLE. So it will be possible to do fallocate on received bitmap basis, just skip already copied pages. If you wish, I could send patches, rebased on yours, for doing it. What we found works is that actually we don't need to do a discard - since we've only just done the mmap of the arena, nothing will be occupying it on the shared client, so we don't need to discard. Looks like yes, I checked on kernel from Andrea's git, there is any more EEXIST error in case when client doesn't fallocate. We've had a postcopy migrate work now, with a few hacks we're still cleaning up, both on vhost-user-bridge and dpdk; so I'll get this updated and reposted. In you patch series vring is disabling in case of VHOST_USER_GET_VRING_BASE. It's being called when vhost-user server want's to stop vring. QEMU is enabling vring as soon as virtual machine is started, so I didn't see explicit vring disabling for migrating VRING. So migrating VRING is protected just by uffd_register, isn't it? And PMD thread (any vhost-user thread which accessing migrating VRING) will wait page copying in this case, right? Dave +ret = madvise((void *)dev_region->mmap_addr, + dev_region->size + dev_region->mmap_offset, + MADV_DONTNEED); +if (ret) { +fprintf(stderr, +"%s: Failed to madvise(DONTNEED) region %d: %s\n", +__func__, i, strerror(errno)); +} +/* Turn off transparent hugepages so we dont get lose wakeups + * in neighbouring pages. + * TODO: Turn this backon later. + */ +ret = madvise((void *)dev_region->mmap_addr, + dev_region->size + dev_region->mmap_offset, + MADV_NOHUGEPAGE); +if (ret) { +/* Note: This can happen legally on kernels that are configured + * without madvise'able hugepages + */ +fprintf(stderr, +"%s: Failed to madvise(NOHUGEPAGE) region %d: %s\n", +__func__, i, strerror(errno)); +} struct uffdio_register reg_struct; /* Note: We might need to go back to using mmap_addr and * len + mmap_offset for * huge pages, but then we do hope not to -- Best regards, Alexey Perevalov -- Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK -- Best regards, Alexey Perevalov
Re: [Qemu-devel] [RFC 23/29] vub+postcopy: madvises
On 06/28/2017 10:00 PM, Dr. David Alan Gilbert (git) wrote: From: "Dr. David Alan Gilbert" <dgilb...@redhat.com> Clear the area and turn off THP. Signed-off-by: Dr. David Alan Gilbert <dgilb...@redhat.com> --- contrib/libvhost-user/libvhost-user.c | 32 ++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c index 0658b6e847..ceddeac74f 100644 --- a/contrib/libvhost-user/libvhost-user.c +++ b/contrib/libvhost-user/libvhost-user.c @@ -451,11 +451,39 @@ vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg) } if (dev->postcopy_listening) { +int ret; /* We should already have an open ufd need to mark each memory * range as ufd. - * Note: Do we need any madvises? Well it's not been accessed - * yet, still probably need no THP to be safe, discard to be safe? */ + +/* Discard any mapping we have here; note I can't use MADV_REMOVE + * or fallocate to make the hole since I don't want to lose + * data that's already arrived in the shared process. + * TODO: How to do hugepage + */ Hi, David, frankly saying, I stuck with my solution, and I have also another issues, but here I could suggest solution for hugepages. I think we could transmit a received pages bitmap in VHOST_USER_SET_MEM_TABLE (VhostUserMemoryRegion), but it will raise a compatibility issue, or introduce special message type for that and send it before VHOST_USER_SET_MEM_TABLE. So it will be possible to do fallocate on received bitmap basis, just skip already copied pages. If you wish, I could send patches, rebased on yours, for doing it. +ret = madvise((void *)dev_region->mmap_addr, + dev_region->size + dev_region->mmap_offset, + MADV_DONTNEED); +if (ret) { +fprintf(stderr, +"%s: Failed to madvise(DONTNEED) region %d: %s\n", +__func__, i, strerror(errno)); +} +/* Turn off transparent hugepages so we dont get lose wakeups + * in neighbouring pages. + * TODO: Turn this backon later. + */ +ret = madvise((void *)dev_region->mmap_addr, + dev_region->size + dev_region->mmap_offset, + MADV_NOHUGEPAGE); +if (ret) { +/* Note: This can happen legally on kernels that are configured + * without madvise'able hugepages + */ +fprintf(stderr, +"%s: Failed to madvise(NOHUGEPAGE) region %d: %s\n", +__func__, i, strerror(errno)); +} struct uffdio_register reg_struct; /* Note: We might need to go back to using mmap_addr and * len + mmap_offset for * huge pages, but then we do hope not to -- Best regards, Alexey Perevalov
[Qemu-devel] [PATCH v9 3/3] migration: add bitmap for received page
This patch adds ability to track down already received pages, it's necessary for calculation vCPU block time in postcopy migration feature, and for recovery after postcopy migration failure. Also it's necessary to solve shared memory issue in postcopy livemigration. Information about received pages will be transferred to the software virtual bridge (e.g. OVS-VSWITCHD), to avoid fallocate (unmap) for already received pages. fallocate syscall is required for remmaped shared memory, due to remmaping itself blocks ioctl(UFFDIO_COPY, ioctl in this case will end with EEXIT error (struct page is exists after remmap). Bitmap is placed into RAMBlock as another postcopy/precopy related bitmaps. Reviewed-by: Peter Xu <pet...@redhat.com> Signed-off-by: Peter Xu <pet...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- include/exec/ram_addr.h | 10 ++ migration/postcopy-ram.c | 17 - migration/ram.c | 45 + migration/ram.h | 6 ++ 4 files changed, 73 insertions(+), 5 deletions(-) diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index c04f4f6..bb902bb 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -47,6 +47,8 @@ struct RAMBlock { * of the postcopy phase */ unsigned long *unsentmap; +/* bitmap of already received pages in postcopy */ +unsigned long *receivedmap; }; static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset) @@ -60,6 +62,14 @@ static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset) return (char *)block->host + offset; } +static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr, +RAMBlock *rb) +{ +uint64_t host_addr_offset = +(uint64_t)(uintptr_t)(host_addr - (void *)rb->host); +return host_addr_offset >> TARGET_PAGE_BITS; +} + long qemu_getrampagesize(void); unsigned long last_ram_page(void); RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index be497bb..7a414eb 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -560,22 +560,28 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) } static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr, -void *from_addr, uint64_t pagesize) + void *from_addr, uint64_t pagesize, RAMBlock *rb) { +int ret; if (from_addr) { struct uffdio_copy copy_struct; copy_struct.dst = (uint64_t)(uintptr_t)host_addr; copy_struct.src = (uint64_t)(uintptr_t)from_addr; copy_struct.len = pagesize; copy_struct.mode = 0; -return ioctl(userfault_fd, UFFDIO_COPY, _struct); +ret = ioctl(userfault_fd, UFFDIO_COPY, _struct); } else { struct uffdio_zeropage zero_struct; zero_struct.range.start = (uint64_t)(uintptr_t)host_addr; zero_struct.range.len = pagesize; zero_struct.mode = 0; -return ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct); +ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct); +} +if (!ret) { +ramblock_recv_bitmap_set_range(rb, host_addr, + pagesize / qemu_target_page_size()); } +return ret; } /* @@ -592,7 +598,7 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, * which would be slightly cheaper, but we'd have to be careful * of the order of updating our page state. */ -if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize)) { +if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize, rb)) { int e = errno; error_report("%s: %s copy host: %p from: %p (size: %zd)", __func__, strerror(e), host, from, pagesize); @@ -614,7 +620,8 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, trace_postcopy_place_page_zero(host); if (qemu_ram_pagesize(rb) == getpagesize()) { -if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize())) { +if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize(), +rb)) { int e = errno; error_report("%s: %s zero host: %p", __func__, strerror(e), host); diff --git a/migration/ram.c b/migration/ram.c index 9cc1b17..d14b8bb 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -44,6 +44,7 @@ #include "qemu/error-report.h" #include "trace.h" #include "exec/ram_addr.h" +#include "exec/target_page.h" #include "qemu/rcu_queue.h" #include "migration/colo.h" @@ -147,6 +148,40 @@ out: return ret; } +sta
[Qemu-devel] [PATCH v9 0/3] Add bitmap for received pages in postcopy migration
This is 9th version of [PATCH v1 0/2] Add bitmap for copied pages in postcopy migration cover message from there This is a separate patch set, it derived from https://www.mail-archive.com/qemu-devel@nongnu.org/msg456004.html There are several possible use cases: 1. solve issue with postcopy live migration and shared memory. OVS-VSWITCH requires information about copied pages, to fallocate newly allocated pages. 2. calculation vCPU blocktime for more details see https://www.mail-archive.com/qemu-devel@nongnu.org/msg456004.html 3. Recovery after fail in the middle of postcopy migration V8 -> V9 - patch: "migration: fix incorrect postcopy recved_bitmap" from "[RFC 00/29] Migration: postcopy failure recovery" patch set was squashed into the latest patch of this patchset, getpagesize was replaced to qemu_target_page_size, as David suggested. - for the sake of API uniformity semantics of all functions were changed, now RAMBlock *rb is the first argument, as well as in bitmap API. - Also define TARGET_PAGE_BITS was replaced to qemu_target_page_bits in all other places of this patchset, for uniformity and maintenance. V7 -> V8 - removed unnecessary renaming and moving of block variable to ram_load's function scope - ramblock_recv_map_init became static function V6 -> V7 - rebased on [PATCH v7 0/5] Create setup/cleanup methods for migration incoming side - live time of the received map was changed (ram_load_setup/ram_load_cleanup) V5 -> V6 - call ramblock_recv_map_init from migration_fd_process_incoming (Peter suggested)But finalization is still in ram_load_cleanup as Juan suggested. V4 -> V5 - remove ramblock_recv_bitmap_clear_range in favor to bitmap_clear (comment from David) - single invocation place for ramblock_recv_bitmap_set (comment from Peter) - minor changes like removing comment from qemu_ufd_copy_ioctl and local variable from ramblock_recv_map_init (comment from Peter) V3 -> V4 - clear_bit instead of ramblock_recv_bitmap_clear in ramblock_recv_bitmap_clear_range, it reduced number of operation (comment from Juan) - for postcopy ramblock_recv_bitmap_set is calling after page was copied, only in case of success (comment from David) - indentation fixes (comment from Juan) V2 -> V3 - ramblock_recv_map_init call is placed into migration_incoming_get_current, looks like it's general place for both precopy and postcopy case. - received bitmap memory releasing is placed into ram_load_cleanup, unfortunatelly, it calls only in case of precopy. - precopy case and discard ram block case - function renaming, and another minor cleanups V1 -> V2 - change in terminology s/copied/received/g - granularity became TARGET_PAGE_SIZE, but not actual page size of the ramblock - movecopiedmap & get_copiedmap_size were removed, until patch set where it will be necessary - releasing memory of receivedmap was added into ram_load_cleanup - new patch "migration: introduce qemu_ufd_copy_ioctl helper" Patchset is based on: commit 6d60e295ef020759a03b90724d0342012c189ba2 "Merge remote-tracking branch 'remotes/jnsnow/tags/ide-pull-request' into staging" Alexey Perevalov (3): migration: postcopy_place_page factoring out migration: introduce qemu_ufd_copy_ioctl helper migration: add bitmap for received page include/exec/ram_addr.h | 10 + migration/postcopy-ram.c | 54 +++- migration/postcopy-ram.h | 4 ++-- migration/ram.c | 49 +-- migration/ram.h | 6 ++ 5 files changed, 100 insertions(+), 23 deletions(-) -- 1.9.1
[Qemu-devel] [PATCH v9 1/3] migration: postcopy_place_page factoring out
Need to mark copied pages as closer as possible to the place where it tracks down. That will be necessary in futher patch. Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Reviewed-by: Peter Xu <pet...@redhat.com> Reviewed-by: Juan Quintela <quint...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- migration/postcopy-ram.c | 13 +++-- migration/postcopy-ram.h | 4 ++-- migration/ram.c | 4 ++-- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 7e21e6f..996e64d 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -564,9 +564,10 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) * returns 0 on success */ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, -size_t pagesize) +RAMBlock *rb) { struct uffdio_copy copy_struct; +size_t pagesize = qemu_ram_pagesize(rb); copy_struct.dst = (uint64_t)(uintptr_t)host; copy_struct.src = (uint64_t)(uintptr_t)from; @@ -595,11 +596,11 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, * returns 0 on success */ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, - size_t pagesize) + RAMBlock *rb) { trace_postcopy_place_page_zero(host); -if (pagesize == getpagesize()) { +if (qemu_ram_pagesize(rb) == getpagesize()) { struct uffdio_zeropage zero_struct; zero_struct.range.start = (uint64_t)(uintptr_t)host; zero_struct.range.len = getpagesize(); @@ -629,7 +630,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size); } return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page, - pagesize); + rb); } return 0; @@ -692,14 +693,14 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) } int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, -size_t pagesize) +RAMBlock *rb) { assert(0); return -1; } int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, -size_t pagesize) +RAMBlock *rb) { assert(0); return -1; diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h index 52d51e8..78a3591 100644 --- a/migration/postcopy-ram.h +++ b/migration/postcopy-ram.h @@ -72,14 +72,14 @@ void postcopy_discard_send_finish(MigrationState *ms, * returns 0 on success */ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, -size_t pagesize); +RAMBlock *rb); /* * Place a zero page at (host) atomically * returns 0 on success */ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, - size_t pagesize); + RAMBlock *rb); /* The current postcopy state is read/set by postcopy_state_get/set * which update it atomically. diff --git a/migration/ram.c b/migration/ram.c index 1b08296..9cc1b17 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2470,10 +2470,10 @@ static int ram_load_postcopy(QEMUFile *f) if (all_zero) { ret = postcopy_place_page_zero(mis, place_dest, - block->page_size); + block); } else { ret = postcopy_place_page(mis, place_dest, - place_source, block->page_size); + place_source, block); } } if (!ret) { -- 1.9.1
[Qemu-devel] [PATCH v9 2/3] migration: introduce qemu_ufd_copy_ioctl helper
Just for placing auxilary operations inside helper, auxilary operations like: track received pages, notify about copying operation in futher patches. Reviewed-by: Juan Quintela <quint...@redhat.com> Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Reviewed-by: Peter Xu <pet...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- migration/postcopy-ram.c | 34 +- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 996e64d..be497bb 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -559,6 +559,25 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) return 0; } +static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr, +void *from_addr, uint64_t pagesize) +{ +if (from_addr) { +struct uffdio_copy copy_struct; +copy_struct.dst = (uint64_t)(uintptr_t)host_addr; +copy_struct.src = (uint64_t)(uintptr_t)from_addr; +copy_struct.len = pagesize; +copy_struct.mode = 0; +return ioctl(userfault_fd, UFFDIO_COPY, _struct); +} else { +struct uffdio_zeropage zero_struct; +zero_struct.range.start = (uint64_t)(uintptr_t)host_addr; +zero_struct.range.len = pagesize; +zero_struct.mode = 0; +return ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct); +} +} + /* * Place a host page (from) at (host) atomically * returns 0 on success @@ -566,20 +585,14 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, RAMBlock *rb) { -struct uffdio_copy copy_struct; size_t pagesize = qemu_ram_pagesize(rb); -copy_struct.dst = (uint64_t)(uintptr_t)host; -copy_struct.src = (uint64_t)(uintptr_t)from; -copy_struct.len = pagesize; -copy_struct.mode = 0; - /* copy also acks to the kernel waking the stalled thread up * TODO: We can inhibit that ack and only do it if it was requested * which would be slightly cheaper, but we'd have to be careful * of the order of updating our page state. */ -if (ioctl(mis->userfault_fd, UFFDIO_COPY, _struct)) { +if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize)) { int e = errno; error_report("%s: %s copy host: %p from: %p (size: %zd)", __func__, strerror(e), host, from, pagesize); @@ -601,12 +614,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, trace_postcopy_place_page_zero(host); if (qemu_ram_pagesize(rb) == getpagesize()) { -struct uffdio_zeropage zero_struct; -zero_struct.range.start = (uint64_t)(uintptr_t)host; -zero_struct.range.len = getpagesize(); -zero_struct.mode = 0; - -if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, _struct)) { +if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize())) { int e = errno; error_report("%s: %s zero host: %p", __func__, strerror(e), host); -- 1.9.1
Re: [Qemu-devel] [RFC 01/29] migration: fix incorrect postcopy recved_bitmap
On 08/01/2017 09:02 AM, Peter Xu wrote: On Tue, Aug 01, 2017 at 08:48:18AM +0300, Alexey Perevalov wrote: On 08/01/2017 05:11 AM, Peter Xu wrote: On Mon, Jul 31, 2017 at 05:34:14PM +0100, Dr. David Alan Gilbert wrote: * Peter Xu (pet...@redhat.com) wrote: The bitmap setup during postcopy is incorrectly when the pgaes are huge pages. Fix it. Signed-off-by: Peter Xu <pet...@redhat.com> --- migration/postcopy-ram.c | 2 +- migration/ram.c | 8 migration/ram.h | 2 ++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 276ce12..952b73a 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -578,7 +578,7 @@ static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr, ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct); } if (!ret) { -ramblock_recv_bitmap_set(host_addr, rb); +ramblock_recv_bitmap_set_range(rb, host_addr, pagesize / getpagesize()); isn't that pagesize / qemu_target_page_size() ? Other than that it looks OK. Yes, I should have fixed this before. I guess Alexey will handle this change (along with the copied bitmap series)? Anyway, I'll fix it as well in my series, until Alexey post the new version that I can rebase to. Thanks, I'll squash it, and I'll resend it today. Are you agree to add Signed-off-by: Peter Xu <pet...@redhat.com> to my patch? Firstly, if you are squashing the patch, fixing the issue that Dave has pointed out, please feel free to add my R-b on the patch. Of course I'll take into account David's suggestion. I don't know whether it would be suitable to add my S-o-b here - since most of the patch content is written by you, not me. But I'm totally fine if you want to include that (btw, thanks for the offer :). So either one R-b or S-o-b is okay to me. Thanks, -- Best regards, Alexey Perevalov
Re: [Qemu-devel] [RFC 01/29] migration: fix incorrect postcopy recved_bitmap
On 08/01/2017 05:11 AM, Peter Xu wrote: On Mon, Jul 31, 2017 at 05:34:14PM +0100, Dr. David Alan Gilbert wrote: * Peter Xu (pet...@redhat.com) wrote: The bitmap setup during postcopy is incorrectly when the pgaes are huge pages. Fix it. Signed-off-by: Peter Xu <pet...@redhat.com> --- migration/postcopy-ram.c | 2 +- migration/ram.c | 8 migration/ram.h | 2 ++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 276ce12..952b73a 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -578,7 +578,7 @@ static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr, ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct); } if (!ret) { -ramblock_recv_bitmap_set(host_addr, rb); +ramblock_recv_bitmap_set_range(rb, host_addr, pagesize / getpagesize()); isn't that pagesize / qemu_target_page_size() ? Other than that it looks OK. Yes, I should have fixed this before. I guess Alexey will handle this change (along with the copied bitmap series)? Anyway, I'll fix it as well in my series, until Alexey post the new version that I can rebase to. Thanks, I'll squash it, and I'll resend it today. Are you agree to add Signed-off-by: Peter Xu <pet...@redhat.com> to my patch? -- Best regards, Alexey Perevalov
Re: [Qemu-devel] [PATCH v8 3/3] migration: add bitmap for received page
On 07/28/2017 10:06 AM, Alexey Perevalov wrote: On 07/28/2017 09:57 AM, Peter Xu wrote: On Fri, Jul 28, 2017 at 09:43:28AM +0300, Alexey Perevalov wrote: On 07/28/2017 07:27 AM, Peter Xu wrote: On Thu, Jul 27, 2017 at 10:27:41AM +0300, Alexey Perevalov wrote: On 07/27/2017 05:35 AM, Peter Xu wrote: On Wed, Jul 26, 2017 at 06:24:11PM +0300, Alexey Perevalov wrote: On 07/26/2017 11:43 AM, Peter Xu wrote: On Wed, Jul 26, 2017 at 11:07:17AM +0300, Alexey Perevalov wrote: On 07/26/2017 04:49 AM, Peter Xu wrote: On Thu, Jul 20, 2017 at 09:52:34AM +0300, Alexey Perevalov wrote: This patch adds ability to track down already received pages, it's necessary for calculation vCPU block time in postcopy migration feature, maybe for restore after postcopy migration failure. Also it's necessary to solve shared memory issue in postcopy livemigration. Information about received pages will be transferred to the software virtual bridge (e.g. OVS-VSWITCHD), to avoid fallocate (unmap) for already received pages. fallocate syscall is required for remmaped shared memory, due to remmaping itself blocks ioctl(UFFDIO_COPY, ioctl in this case will end with EEXIT error (struct page is exists after remmap). Bitmap is placed into RAMBlock as another postcopy/precopy related bitmaps. Reviewed-by: Peter Xu <pet...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- [...] static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr, -void *from_addr, uint64_t pagesize) + void *from_addr, uint64_t pagesize, RAMBlock *rb) { +int ret; if (from_addr) { struct uffdio_copy copy_struct; copy_struct.dst = (uint64_t)(uintptr_t)host_addr; copy_struct.src = (uint64_t)(uintptr_t)from_addr; copy_struct.len = pagesize; copy_struct.mode = 0; -return ioctl(userfault_fd, UFFDIO_COPY, _struct); +ret = ioctl(userfault_fd, UFFDIO_COPY, _struct); } else { struct uffdio_zeropage zero_struct; zero_struct.range.start = (uint64_t)(uintptr_t)host_addr; zero_struct.range.len = pagesize; zero_struct.mode = 0; -return ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct); +ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct); +} +if (!ret) { +ramblock_recv_bitmap_set(host_addr, rb); Wait... Now we are using 4k-page/bit bitmap, do we need to take care of the huge pages here? Looks like we are only setting the first bit of it if it is a huge page? First version was per ramblock page size, IOW bitmap was smaller in case of hugepages. Yes, but this is not the first version any more. :) This patch is using: bitmap_new(rb->max_length >> TARGET_PAGE_BITS); to allocate bitmap, so it is using small pages always for bitmap, right? (I should not really say "4k" pages, here I think the size is host page size, which is the thing returned from getpagesize()). You mentioned that TARGET_PAGE_SIZE is reasonable for precopy case, in "Re: [Qemu-devel] [PATCH v1 2/2] migration: add bitmap for copied page" I though TARGET_PAGE_SIZE as transmition unit, is using in precopy even hugepage case. But it's not so logically, page being marked as dirty, should be sent as a whole page. Sorry if I misunderstood, but I didn't see anything wrong - we are sending pages in small pages, but when postcopy is there, we do UFFDIO_COPY in huge page, so everything is fine? I think yes, we chose TARGET_PAGE_SIZE because of wider use case ranges. So... are you going to post another version? IIUC we just need to use a bitmap_set() to replace the ramblock_recv_bitmap_set(), while set the size with "pagesize / TARGET_PAGE_SIZE"? From my point of view TARGET_PAGE_SIZE/TARGET_PAGE_BITS it's a platform specific and it used in ram_load to copy to buffer so it's more preferred for bitmap size and I'm not going to replace ramblock_recv_bitmap_set helper - it calculates offset. (I think I was wrong when saying getpagesize() above: the small page should be target page size, while the huge page should be the host's) I think we should forget about huge page case in "received bitmap" concept, maybe in "uffd_copied bitmap" it was reasonable ;) Again, I am not sure I got the whole idea of the reply... However, I do think when we UFFDIO_COPY a huge page, then we should do bitmap_set() on the received bitmap for the whole range that the huge page covers. for what purpose? We chose to use small-paged bitmap since in precopy we need to have such a granularity (in precopy, we can copy a small page even that small page is on a host huge page). Since we decided to use the small-paged bitmap, we need to make sure it follows how it was defined: one bit defines whether the corresponding small page is received. IMHO not following that is hacky and error-prone. IMHO, the
Re: [Qemu-devel] [PATCH v8 3/3] migration: add bitmap for received page
On 07/28/2017 09:57 AM, Peter Xu wrote: On Fri, Jul 28, 2017 at 09:43:28AM +0300, Alexey Perevalov wrote: On 07/28/2017 07:27 AM, Peter Xu wrote: On Thu, Jul 27, 2017 at 10:27:41AM +0300, Alexey Perevalov wrote: On 07/27/2017 05:35 AM, Peter Xu wrote: On Wed, Jul 26, 2017 at 06:24:11PM +0300, Alexey Perevalov wrote: On 07/26/2017 11:43 AM, Peter Xu wrote: On Wed, Jul 26, 2017 at 11:07:17AM +0300, Alexey Perevalov wrote: On 07/26/2017 04:49 AM, Peter Xu wrote: On Thu, Jul 20, 2017 at 09:52:34AM +0300, Alexey Perevalov wrote: This patch adds ability to track down already received pages, it's necessary for calculation vCPU block time in postcopy migration feature, maybe for restore after postcopy migration failure. Also it's necessary to solve shared memory issue in postcopy livemigration. Information about received pages will be transferred to the software virtual bridge (e.g. OVS-VSWITCHD), to avoid fallocate (unmap) for already received pages. fallocate syscall is required for remmaped shared memory, due to remmaping itself blocks ioctl(UFFDIO_COPY, ioctl in this case will end with EEXIT error (struct page is exists after remmap). Bitmap is placed into RAMBlock as another postcopy/precopy related bitmaps. Reviewed-by: Peter Xu <pet...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- [...] static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr, -void *from_addr, uint64_t pagesize) + void *from_addr, uint64_t pagesize, RAMBlock *rb) { +int ret; if (from_addr) { struct uffdio_copy copy_struct; copy_struct.dst = (uint64_t)(uintptr_t)host_addr; copy_struct.src = (uint64_t)(uintptr_t)from_addr; copy_struct.len = pagesize; copy_struct.mode = 0; -return ioctl(userfault_fd, UFFDIO_COPY, _struct); +ret = ioctl(userfault_fd, UFFDIO_COPY, _struct); } else { struct uffdio_zeropage zero_struct; zero_struct.range.start = (uint64_t)(uintptr_t)host_addr; zero_struct.range.len = pagesize; zero_struct.mode = 0; -return ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct); +ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct); +} +if (!ret) { +ramblock_recv_bitmap_set(host_addr, rb); Wait... Now we are using 4k-page/bit bitmap, do we need to take care of the huge pages here? Looks like we are only setting the first bit of it if it is a huge page? First version was per ramblock page size, IOW bitmap was smaller in case of hugepages. Yes, but this is not the first version any more. :) This patch is using: bitmap_new(rb->max_length >> TARGET_PAGE_BITS); to allocate bitmap, so it is using small pages always for bitmap, right? (I should not really say "4k" pages, here I think the size is host page size, which is the thing returned from getpagesize()). You mentioned that TARGET_PAGE_SIZE is reasonable for precopy case, in "Re: [Qemu-devel] [PATCH v1 2/2] migration: add bitmap for copied page" I though TARGET_PAGE_SIZE as transmition unit, is using in precopy even hugepage case. But it's not so logically, page being marked as dirty, should be sent as a whole page. Sorry if I misunderstood, but I didn't see anything wrong - we are sending pages in small pages, but when postcopy is there, we do UFFDIO_COPY in huge page, so everything is fine? I think yes, we chose TARGET_PAGE_SIZE because of wider use case ranges. So... are you going to post another version? IIUC we just need to use a bitmap_set() to replace the ramblock_recv_bitmap_set(), while set the size with "pagesize / TARGET_PAGE_SIZE"? From my point of view TARGET_PAGE_SIZE/TARGET_PAGE_BITS it's a platform specific and it used in ram_load to copy to buffer so it's more preferred for bitmap size and I'm not going to replace ramblock_recv_bitmap_set helper - it calculates offset. (I think I was wrong when saying getpagesize() above: the small page should be target page size, while the huge page should be the host's) I think we should forget about huge page case in "received bitmap" concept, maybe in "uffd_copied bitmap" it was reasonable ;) Again, I am not sure I got the whole idea of the reply... However, I do think when we UFFDIO_COPY a huge page, then we should do bitmap_set() on the received bitmap for the whole range that the huge page covers. for what purpose? We chose to use small-paged bitmap since in precopy we need to have such a granularity (in precopy, we can copy a small page even that small page is on a host huge page). Since we decided to use the small-paged bitmap, we need to make sure it follows how it was defined: one bit defines whether the corresponding small page is received. IMHO not following that is hacky and error-prone. IMHO, the bitmap is defined as "one bit per small page", and the small page size is T
Re: [Qemu-devel] [PATCH v8 3/3] migration: add bitmap for received page
On 07/28/2017 07:27 AM, Peter Xu wrote: On Thu, Jul 27, 2017 at 10:27:41AM +0300, Alexey Perevalov wrote: On 07/27/2017 05:35 AM, Peter Xu wrote: On Wed, Jul 26, 2017 at 06:24:11PM +0300, Alexey Perevalov wrote: On 07/26/2017 11:43 AM, Peter Xu wrote: On Wed, Jul 26, 2017 at 11:07:17AM +0300, Alexey Perevalov wrote: On 07/26/2017 04:49 AM, Peter Xu wrote: On Thu, Jul 20, 2017 at 09:52:34AM +0300, Alexey Perevalov wrote: This patch adds ability to track down already received pages, it's necessary for calculation vCPU block time in postcopy migration feature, maybe for restore after postcopy migration failure. Also it's necessary to solve shared memory issue in postcopy livemigration. Information about received pages will be transferred to the software virtual bridge (e.g. OVS-VSWITCHD), to avoid fallocate (unmap) for already received pages. fallocate syscall is required for remmaped shared memory, due to remmaping itself blocks ioctl(UFFDIO_COPY, ioctl in this case will end with EEXIT error (struct page is exists after remmap). Bitmap is placed into RAMBlock as another postcopy/precopy related bitmaps. Reviewed-by: Peter Xu <pet...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- [...] static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr, -void *from_addr, uint64_t pagesize) + void *from_addr, uint64_t pagesize, RAMBlock *rb) { +int ret; if (from_addr) { struct uffdio_copy copy_struct; copy_struct.dst = (uint64_t)(uintptr_t)host_addr; copy_struct.src = (uint64_t)(uintptr_t)from_addr; copy_struct.len = pagesize; copy_struct.mode = 0; -return ioctl(userfault_fd, UFFDIO_COPY, _struct); +ret = ioctl(userfault_fd, UFFDIO_COPY, _struct); } else { struct uffdio_zeropage zero_struct; zero_struct.range.start = (uint64_t)(uintptr_t)host_addr; zero_struct.range.len = pagesize; zero_struct.mode = 0; -return ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct); +ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct); +} +if (!ret) { +ramblock_recv_bitmap_set(host_addr, rb); Wait... Now we are using 4k-page/bit bitmap, do we need to take care of the huge pages here? Looks like we are only setting the first bit of it if it is a huge page? First version was per ramblock page size, IOW bitmap was smaller in case of hugepages. Yes, but this is not the first version any more. :) This patch is using: bitmap_new(rb->max_length >> TARGET_PAGE_BITS); to allocate bitmap, so it is using small pages always for bitmap, right? (I should not really say "4k" pages, here I think the size is host page size, which is the thing returned from getpagesize()). You mentioned that TARGET_PAGE_SIZE is reasonable for precopy case, in "Re: [Qemu-devel] [PATCH v1 2/2] migration: add bitmap for copied page" I though TARGET_PAGE_SIZE as transmition unit, is using in precopy even hugepage case. But it's not so logically, page being marked as dirty, should be sent as a whole page. Sorry if I misunderstood, but I didn't see anything wrong - we are sending pages in small pages, but when postcopy is there, we do UFFDIO_COPY in huge page, so everything is fine? I think yes, we chose TARGET_PAGE_SIZE because of wider use case ranges. So... are you going to post another version? IIUC we just need to use a bitmap_set() to replace the ramblock_recv_bitmap_set(), while set the size with "pagesize / TARGET_PAGE_SIZE"? From my point of view TARGET_PAGE_SIZE/TARGET_PAGE_BITS it's a platform specific and it used in ram_load to copy to buffer so it's more preferred for bitmap size and I'm not going to replace ramblock_recv_bitmap_set helper - it calculates offset. (I think I was wrong when saying getpagesize() above: the small page should be target page size, while the huge page should be the host's) I think we should forget about huge page case in "received bitmap" concept, maybe in "uffd_copied bitmap" it was reasonable ;) Again, I am not sure I got the whole idea of the reply... However, I do think when we UFFDIO_COPY a huge page, then we should do bitmap_set() on the received bitmap for the whole range that the huge page covers. for what purpose? IMHO, the bitmap is defined as "one bit per small page", and the small page size is TARGET_PAGE_SIZE. We cannot just assume that "as long as the first bit of the huge page is set, all the small pages in the huge page are set". At the moment of copying all small pages of the huge page, should be received. Yes it's assumption, but I couldn't predict side effect, maybe it will be necessary in postcopy failure handling, while copying pages back, but I'm not sure right now. To know that, need to start implementing it, or at least to deep investigation. Thanks, -- Best regards, Alexey Perevalov
Re: [Qemu-devel] [PATCH v8 3/3] migration: add bitmap for received page
On 07/27/2017 05:35 AM, Peter Xu wrote: On Wed, Jul 26, 2017 at 06:24:11PM +0300, Alexey Perevalov wrote: On 07/26/2017 11:43 AM, Peter Xu wrote: On Wed, Jul 26, 2017 at 11:07:17AM +0300, Alexey Perevalov wrote: On 07/26/2017 04:49 AM, Peter Xu wrote: On Thu, Jul 20, 2017 at 09:52:34AM +0300, Alexey Perevalov wrote: This patch adds ability to track down already received pages, it's necessary for calculation vCPU block time in postcopy migration feature, maybe for restore after postcopy migration failure. Also it's necessary to solve shared memory issue in postcopy livemigration. Information about received pages will be transferred to the software virtual bridge (e.g. OVS-VSWITCHD), to avoid fallocate (unmap) for already received pages. fallocate syscall is required for remmaped shared memory, due to remmaping itself blocks ioctl(UFFDIO_COPY, ioctl in this case will end with EEXIT error (struct page is exists after remmap). Bitmap is placed into RAMBlock as another postcopy/precopy related bitmaps. Reviewed-by: Peter Xu <pet...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- [...] static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr, -void *from_addr, uint64_t pagesize) + void *from_addr, uint64_t pagesize, RAMBlock *rb) { +int ret; if (from_addr) { struct uffdio_copy copy_struct; copy_struct.dst = (uint64_t)(uintptr_t)host_addr; copy_struct.src = (uint64_t)(uintptr_t)from_addr; copy_struct.len = pagesize; copy_struct.mode = 0; -return ioctl(userfault_fd, UFFDIO_COPY, _struct); +ret = ioctl(userfault_fd, UFFDIO_COPY, _struct); } else { struct uffdio_zeropage zero_struct; zero_struct.range.start = (uint64_t)(uintptr_t)host_addr; zero_struct.range.len = pagesize; zero_struct.mode = 0; -return ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct); +ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct); +} +if (!ret) { +ramblock_recv_bitmap_set(host_addr, rb); Wait... Now we are using 4k-page/bit bitmap, do we need to take care of the huge pages here? Looks like we are only setting the first bit of it if it is a huge page? First version was per ramblock page size, IOW bitmap was smaller in case of hugepages. Yes, but this is not the first version any more. :) This patch is using: bitmap_new(rb->max_length >> TARGET_PAGE_BITS); to allocate bitmap, so it is using small pages always for bitmap, right? (I should not really say "4k" pages, here I think the size is host page size, which is the thing returned from getpagesize()). You mentioned that TARGET_PAGE_SIZE is reasonable for precopy case, in "Re: [Qemu-devel] [PATCH v1 2/2] migration: add bitmap for copied page" I though TARGET_PAGE_SIZE as transmition unit, is using in precopy even hugepage case. But it's not so logically, page being marked as dirty, should be sent as a whole page. Sorry if I misunderstood, but I didn't see anything wrong - we are sending pages in small pages, but when postcopy is there, we do UFFDIO_COPY in huge page, so everything is fine? I think yes, we chose TARGET_PAGE_SIZE because of wider use case ranges. So... are you going to post another version? IIUC we just need to use a bitmap_set() to replace the ramblock_recv_bitmap_set(), while set the size with "pagesize / TARGET_PAGE_SIZE"? From my point of view TARGET_PAGE_SIZE/TARGET_PAGE_BITS it's a platform specific and it used in ram_load to copy to buffer so it's more preferred for bitmap size and I'm not going to replace ramblock_recv_bitmap_set helper - it calculates offset. (I think I was wrong when saying getpagesize() above: the small page should be target page size, while the huge page should be the host's) I think we should forget about huge page case in "received bitmap" concept, maybe in "uffd_copied bitmap" it was reasonable ;) -- Best regards, Alexey Perevalov
Re: [Qemu-devel] [PATCH v8 3/3] migration: add bitmap for received page
On 07/26/2017 11:43 AM, Peter Xu wrote: On Wed, Jul 26, 2017 at 11:07:17AM +0300, Alexey Perevalov wrote: On 07/26/2017 04:49 AM, Peter Xu wrote: On Thu, Jul 20, 2017 at 09:52:34AM +0300, Alexey Perevalov wrote: This patch adds ability to track down already received pages, it's necessary for calculation vCPU block time in postcopy migration feature, maybe for restore after postcopy migration failure. Also it's necessary to solve shared memory issue in postcopy livemigration. Information about received pages will be transferred to the software virtual bridge (e.g. OVS-VSWITCHD), to avoid fallocate (unmap) for already received pages. fallocate syscall is required for remmaped shared memory, due to remmaping itself blocks ioctl(UFFDIO_COPY, ioctl in this case will end with EEXIT error (struct page is exists after remmap). Bitmap is placed into RAMBlock as another postcopy/precopy related bitmaps. Reviewed-by: Peter Xu <pet...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- [...] static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr, -void *from_addr, uint64_t pagesize) + void *from_addr, uint64_t pagesize, RAMBlock *rb) { +int ret; if (from_addr) { struct uffdio_copy copy_struct; copy_struct.dst = (uint64_t)(uintptr_t)host_addr; copy_struct.src = (uint64_t)(uintptr_t)from_addr; copy_struct.len = pagesize; copy_struct.mode = 0; -return ioctl(userfault_fd, UFFDIO_COPY, _struct); +ret = ioctl(userfault_fd, UFFDIO_COPY, _struct); } else { struct uffdio_zeropage zero_struct; zero_struct.range.start = (uint64_t)(uintptr_t)host_addr; zero_struct.range.len = pagesize; zero_struct.mode = 0; -return ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct); +ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct); +} +if (!ret) { +ramblock_recv_bitmap_set(host_addr, rb); Wait... Now we are using 4k-page/bit bitmap, do we need to take care of the huge pages here? Looks like we are only setting the first bit of it if it is a huge page? First version was per ramblock page size, IOW bitmap was smaller in case of hugepages. Yes, but this is not the first version any more. :) This patch is using: bitmap_new(rb->max_length >> TARGET_PAGE_BITS); to allocate bitmap, so it is using small pages always for bitmap, right? (I should not really say "4k" pages, here I think the size is host page size, which is the thing returned from getpagesize()). You mentioned that TARGET_PAGE_SIZE is reasonable for precopy case, in "Re: [Qemu-devel] [PATCH v1 2/2] migration: add bitmap for copied page" I though TARGET_PAGE_SIZE as transmition unit, is using in precopy even hugepage case. But it's not so logically, page being marked as dirty, should be sent as a whole page. Sorry if I misunderstood, but I didn't see anything wrong - we are sending pages in small pages, but when postcopy is there, we do UFFDIO_COPY in huge page, so everything is fine? I think yes, we chose TARGET_PAGE_SIZE because of wider use case ranges. -- Best regards, Alexey Perevalov
Re: [Qemu-devel] [PATCH v8 3/3] migration: add bitmap for received page
On 07/26/2017 04:49 AM, Peter Xu wrote: On Thu, Jul 20, 2017 at 09:52:34AM +0300, Alexey Perevalov wrote: This patch adds ability to track down already received pages, it's necessary for calculation vCPU block time in postcopy migration feature, maybe for restore after postcopy migration failure. Also it's necessary to solve shared memory issue in postcopy livemigration. Information about received pages will be transferred to the software virtual bridge (e.g. OVS-VSWITCHD), to avoid fallocate (unmap) for already received pages. fallocate syscall is required for remmaped shared memory, due to remmaping itself blocks ioctl(UFFDIO_COPY, ioctl in this case will end with EEXIT error (struct page is exists after remmap). Bitmap is placed into RAMBlock as another postcopy/precopy related bitmaps. Reviewed-by: Peter Xu <pet...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- [...] static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr, -void *from_addr, uint64_t pagesize) + void *from_addr, uint64_t pagesize, RAMBlock *rb) { +int ret; if (from_addr) { struct uffdio_copy copy_struct; copy_struct.dst = (uint64_t)(uintptr_t)host_addr; copy_struct.src = (uint64_t)(uintptr_t)from_addr; copy_struct.len = pagesize; copy_struct.mode = 0; -return ioctl(userfault_fd, UFFDIO_COPY, _struct); +ret = ioctl(userfault_fd, UFFDIO_COPY, _struct); } else { struct uffdio_zeropage zero_struct; zero_struct.range.start = (uint64_t)(uintptr_t)host_addr; zero_struct.range.len = pagesize; zero_struct.mode = 0; -return ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct); +ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct); +} +if (!ret) { +ramblock_recv_bitmap_set(host_addr, rb); Wait... Now we are using 4k-page/bit bitmap, do we need to take care of the huge pages here? Looks like we are only setting the first bit of it if it is a huge page? First version was per ramblock page size, IOW bitmap was smaller in case of hugepages. You mentioned that TARGET_PAGE_SIZE is reasonable for precopy case, in "Re: [Qemu-devel] [PATCH v1 2/2] migration: add bitmap for copied page" I though TARGET_PAGE_SIZE as transmition unit, is using in precopy even hugepage case. But it's not so logically, page being marked as dirty, should be sent as a whole page. -- Best regards, Alexey Perevalov
[Qemu-devel] [PATCH v8 3/3] migration: add bitmap for received page
This patch adds ability to track down already received pages, it's necessary for calculation vCPU block time in postcopy migration feature, maybe for restore after postcopy migration failure. Also it's necessary to solve shared memory issue in postcopy livemigration. Information about received pages will be transferred to the software virtual bridge (e.g. OVS-VSWITCHD), to avoid fallocate (unmap) for already received pages. fallocate syscall is required for remmaped shared memory, due to remmaping itself blocks ioctl(UFFDIO_COPY, ioctl in this case will end with EEXIT error (struct page is exists after remmap). Bitmap is placed into RAMBlock as another postcopy/precopy related bitmaps. Reviewed-by: Peter Xu <pet...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- include/exec/ram_addr.h | 10 ++ migration/postcopy-ram.c | 16 +++- migration/ram.c | 36 migration/ram.h | 5 + 4 files changed, 62 insertions(+), 5 deletions(-) diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index c04f4f6..bb902bb 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -47,6 +47,8 @@ struct RAMBlock { * of the postcopy phase */ unsigned long *unsentmap; +/* bitmap of already received pages in postcopy */ +unsigned long *receivedmap; }; static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset) @@ -60,6 +62,14 @@ static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset) return (char *)block->host + offset; } +static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr, +RAMBlock *rb) +{ +uint64_t host_addr_offset = +(uint64_t)(uintptr_t)(host_addr - (void *)rb->host); +return host_addr_offset >> TARGET_PAGE_BITS; +} + long qemu_getrampagesize(void); unsigned long last_ram_page(void); RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index be497bb..276ce12 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -560,22 +560,27 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) } static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr, -void *from_addr, uint64_t pagesize) + void *from_addr, uint64_t pagesize, RAMBlock *rb) { +int ret; if (from_addr) { struct uffdio_copy copy_struct; copy_struct.dst = (uint64_t)(uintptr_t)host_addr; copy_struct.src = (uint64_t)(uintptr_t)from_addr; copy_struct.len = pagesize; copy_struct.mode = 0; -return ioctl(userfault_fd, UFFDIO_COPY, _struct); +ret = ioctl(userfault_fd, UFFDIO_COPY, _struct); } else { struct uffdio_zeropage zero_struct; zero_struct.range.start = (uint64_t)(uintptr_t)host_addr; zero_struct.range.len = pagesize; zero_struct.mode = 0; -return ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct); +ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct); +} +if (!ret) { +ramblock_recv_bitmap_set(host_addr, rb); } +return ret; } /* @@ -592,7 +597,7 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, * which would be slightly cheaper, but we'd have to be careful * of the order of updating our page state. */ -if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize)) { +if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize, rb)) { int e = errno; error_report("%s: %s copy host: %p from: %p (size: %zd)", __func__, strerror(e), host, from, pagesize); @@ -614,7 +619,8 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, trace_postcopy_place_page_zero(host); if (qemu_ram_pagesize(rb) == getpagesize()) { -if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize())) { +if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize(), +rb)) { int e = errno; error_report("%s: %s zero host: %p", __func__, strerror(e), host); diff --git a/migration/ram.c b/migration/ram.c index 9cc1b17..107ee9d 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -147,6 +147,32 @@ out: return ret; } +static void ramblock_recv_map_init(void) +{ +RAMBlock *rb; + +RAMBLOCK_FOREACH(rb) { +assert(!rb->receivedmap); +rb->receivedmap = bitmap_new(rb->max_length >> TARGET_PAGE_BITS); +} +} + +int ramblock_recv_bitmap_test(void *host_addr, RAMBlock *rb) +{ +return test_bit(ramblock_recv_bitmap_offset(host_addr, rb), +rb->recei
[Qemu-devel] [PATCH v8 0/3] Add bitmap for received pages in postcopy migration
This is 8th version of [PATCH v1 0/2] Add bitmap for copied pages in postcopy migration cover message from there This is a separate patch set, it derived from https://www.mail-archive.com/qemu-devel@nongnu.org/msg456004.html There are several possible use cases: 1. solve issue with postcopy live migration and shared memory. OVS-VSWITCH requires information about copied pages, to fallocate newly allocated pages. 2. calculation vCPU blocktime for more details see https://www.mail-archive.com/qemu-devel@nongnu.org/msg456004.html 3. Recovery after fail in the middle of postcopy migration V7 -> V8 - removed unnecessary renaming and moving of block variable to ram_load's function scope - ramblock_recv_map_init became static function V6 -> V7 - rebased on [PATCH v7 0/5] Create setup/cleanup methods for migration incoming side - live time of the received map was changed (ram_load_setup/ram_load_cleanup) V5 -> V6 - call ramblock_recv_map_init from migration_fd_process_incoming (Peter suggested)But finalization is still in ram_load_cleanup as Juan suggested. V4 -> V5 - remove ramblock_recv_bitmap_clear_range in favor to bitmap_clear (comment from David) - single invocation place for ramblock_recv_bitmap_set (comment from Peter) - minor changes like removing comment from qemu_ufd_copy_ioctl and local variable from ramblock_recv_map_init (comment from Peter) V3 -> V4 - clear_bit instead of ramblock_recv_bitmap_clear in ramblock_recv_bitmap_clear_range, it reduced number of operation (comment from Juan) - for postcopy ramblock_recv_bitmap_set is calling after page was copied, only in case of success (comment from David) - indentation fixes (comment from Juan) V2 -> V3 - ramblock_recv_map_init call is placed into migration_incoming_get_current, looks like it's general place for both precopy and postcopy case. - received bitmap memory releasing is placed into ram_load_cleanup, unfortunatelly, it calls only in case of precopy. - precopy case and discard ram block case - function renaming, and another minor cleanups V1 -> V2 - change in terminology s/copied/received/g - granularity became TARGET_PAGE_SIZE, but not actual page size of the ramblock - movecopiedmap & get_copiedmap_size were removed, until patch set where it will be necessary - releasing memory of receivedmap was added into ram_load_cleanup - new patch "migration: introduce qemu_ufd_copy_ioctl helper" Patchset is based on: commit 6d60e295ef020759a03b90724d0342012c189ba2 "Merge remote-tracking branch 'remotes/jnsnow/tags/ide-pull-request' into staging" Alexey Perevalov (3): migration: postcopy_place_page factoring out migration: introduce qemu_ufd_copy_ioctl helper migration: add bitmap for received page include/exec/ram_addr.h | 10 + migration/postcopy-ram.c | 53 +++- migration/postcopy-ram.h | 4 ++-- migration/ram.c | 40 ++-- migration/ram.h | 5 + 5 files changed, 89 insertions(+), 23 deletions(-) -- 1.9.1
[Qemu-devel] [PATCH v8 2/3] migration: introduce qemu_ufd_copy_ioctl helper
Just for placing auxilary operations inside helper, auxilary operations like: track received pages, notify about copying operation in futher patches. Reviewed-by: Juan Quintela <quint...@redhat.com> Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Reviewed-by: Peter Xu <pet...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- migration/postcopy-ram.c | 34 +- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 996e64d..be497bb 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -559,6 +559,25 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) return 0; } +static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr, +void *from_addr, uint64_t pagesize) +{ +if (from_addr) { +struct uffdio_copy copy_struct; +copy_struct.dst = (uint64_t)(uintptr_t)host_addr; +copy_struct.src = (uint64_t)(uintptr_t)from_addr; +copy_struct.len = pagesize; +copy_struct.mode = 0; +return ioctl(userfault_fd, UFFDIO_COPY, _struct); +} else { +struct uffdio_zeropage zero_struct; +zero_struct.range.start = (uint64_t)(uintptr_t)host_addr; +zero_struct.range.len = pagesize; +zero_struct.mode = 0; +return ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct); +} +} + /* * Place a host page (from) at (host) atomically * returns 0 on success @@ -566,20 +585,14 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, RAMBlock *rb) { -struct uffdio_copy copy_struct; size_t pagesize = qemu_ram_pagesize(rb); -copy_struct.dst = (uint64_t)(uintptr_t)host; -copy_struct.src = (uint64_t)(uintptr_t)from; -copy_struct.len = pagesize; -copy_struct.mode = 0; - /* copy also acks to the kernel waking the stalled thread up * TODO: We can inhibit that ack and only do it if it was requested * which would be slightly cheaper, but we'd have to be careful * of the order of updating our page state. */ -if (ioctl(mis->userfault_fd, UFFDIO_COPY, _struct)) { +if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize)) { int e = errno; error_report("%s: %s copy host: %p from: %p (size: %zd)", __func__, strerror(e), host, from, pagesize); @@ -601,12 +614,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, trace_postcopy_place_page_zero(host); if (qemu_ram_pagesize(rb) == getpagesize()) { -struct uffdio_zeropage zero_struct; -zero_struct.range.start = (uint64_t)(uintptr_t)host; -zero_struct.range.len = getpagesize(); -zero_struct.mode = 0; - -if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, _struct)) { +if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize())) { int e = errno; error_report("%s: %s zero host: %p", __func__, strerror(e), host); -- 1.9.1
[Qemu-devel] [PATCH v8 1/3] migration: postcopy_place_page factoring out
Need to mark copied pages as closer as possible to the place where it tracks down. That will be necessary in futher patch. Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Reviewed-by: Peter Xu <pet...@redhat.com> Reviewed-by: Juan Quintela <quint...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- migration/postcopy-ram.c | 13 +++-- migration/postcopy-ram.h | 4 ++-- migration/ram.c | 4 ++-- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 7e21e6f..996e64d 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -564,9 +564,10 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) * returns 0 on success */ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, -size_t pagesize) +RAMBlock *rb) { struct uffdio_copy copy_struct; +size_t pagesize = qemu_ram_pagesize(rb); copy_struct.dst = (uint64_t)(uintptr_t)host; copy_struct.src = (uint64_t)(uintptr_t)from; @@ -595,11 +596,11 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, * returns 0 on success */ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, - size_t pagesize) + RAMBlock *rb) { trace_postcopy_place_page_zero(host); -if (pagesize == getpagesize()) { +if (qemu_ram_pagesize(rb) == getpagesize()) { struct uffdio_zeropage zero_struct; zero_struct.range.start = (uint64_t)(uintptr_t)host; zero_struct.range.len = getpagesize(); @@ -629,7 +630,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size); } return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page, - pagesize); + rb); } return 0; @@ -692,14 +693,14 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) } int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, -size_t pagesize) +RAMBlock *rb) { assert(0); return -1; } int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, -size_t pagesize) +RAMBlock *rb) { assert(0); return -1; diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h index 52d51e8..78a3591 100644 --- a/migration/postcopy-ram.h +++ b/migration/postcopy-ram.h @@ -72,14 +72,14 @@ void postcopy_discard_send_finish(MigrationState *ms, * returns 0 on success */ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, -size_t pagesize); +RAMBlock *rb); /* * Place a zero page at (host) atomically * returns 0 on success */ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, - size_t pagesize); + RAMBlock *rb); /* The current postcopy state is read/set by postcopy_state_get/set * which update it atomically. diff --git a/migration/ram.c b/migration/ram.c index 1b08296..9cc1b17 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2470,10 +2470,10 @@ static int ram_load_postcopy(QEMUFile *f) if (all_zero) { ret = postcopy_place_page_zero(mis, place_dest, - block->page_size); + block); } else { ret = postcopy_place_page(mis, place_dest, - place_source, block->page_size); + place_source, block); } } if (!ret) { -- 1.9.1
[Qemu-devel] [PATCH v7 3/3] migration: add bitmap for received page
This patch adds ability to track down already received pages, it's necessary for calculation vCPU block time in postcopy migration feature, maybe for restore after postcopy migration failure. Also it's necessary to solve shared memory issue in postcopy livemigration. Information about received pages will be transferred to the software virtual bridge (e.g. OVS-VSWITCHD), to avoid fallocate (unmap) for already received pages. fallocate syscall is required for remmaped shared memory, due to remmaping itself blocks ioctl(UFFDIO_COPY, ioctl in this case will end with EEXIT error (struct page is exists after remmap). Bitmap is placed into RAMBlock as another postcopy/precopy related bitmaps. Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- include/exec/ram_addr.h | 10 ++ migration/postcopy-ram.c | 16 +++- migration/ram.c | 43 --- migration/ram.h | 6 ++ 4 files changed, 67 insertions(+), 8 deletions(-) diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index 73d1bea..af5bf26 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -47,6 +47,8 @@ struct RAMBlock { * of the postcopy phase */ unsigned long *unsentmap; +/* bitmap of already received pages in postcopy */ +unsigned long *receivedmap; }; static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset) @@ -60,6 +62,14 @@ static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset) return (char *)block->host + offset; } +static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr, +RAMBlock *rb) +{ +uint64_t host_addr_offset = +(uint64_t)(uintptr_t)(host_addr - (void *)rb->host); +return host_addr_offset >> TARGET_PAGE_BITS; +} + long qemu_getrampagesize(void); unsigned long last_ram_page(void); RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index be497bb..276ce12 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -560,22 +560,27 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) } static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr, -void *from_addr, uint64_t pagesize) + void *from_addr, uint64_t pagesize, RAMBlock *rb) { +int ret; if (from_addr) { struct uffdio_copy copy_struct; copy_struct.dst = (uint64_t)(uintptr_t)host_addr; copy_struct.src = (uint64_t)(uintptr_t)from_addr; copy_struct.len = pagesize; copy_struct.mode = 0; -return ioctl(userfault_fd, UFFDIO_COPY, _struct); +ret = ioctl(userfault_fd, UFFDIO_COPY, _struct); } else { struct uffdio_zeropage zero_struct; zero_struct.range.start = (uint64_t)(uintptr_t)host_addr; zero_struct.range.len = pagesize; zero_struct.mode = 0; -return ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct); +ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct); +} +if (!ret) { +ramblock_recv_bitmap_set(host_addr, rb); } +return ret; } /* @@ -592,7 +597,7 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, * which would be slightly cheaper, but we'd have to be careful * of the order of updating our page state. */ -if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize)) { +if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize, rb)) { int e = errno; error_report("%s: %s copy host: %p from: %p (size: %zd)", __func__, strerror(e), host, from, pagesize); @@ -614,7 +619,8 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, trace_postcopy_place_page_zero(host); if (qemu_ram_pagesize(rb) == getpagesize()) { -if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize())) { +if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize(), +rb)) { int e = errno; error_report("%s: %s zero host: %p", __func__, strerror(e), host); diff --git a/migration/ram.c b/migration/ram.c index 9cc1b17..dfbb36b 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -147,6 +147,32 @@ out: return ret; } +void ramblock_recv_map_init(void) +{ +RAMBlock *rb; + +RAMBLOCK_FOREACH(rb) { +assert(!rb->receivedmap); +rb->receivedmap = bitmap_new(rb->max_length >> TARGET_PAGE_BITS); +} +} + +int ramblock_recv_bitmap_test(void *host_addr, RAMBlock *rb) +{ +return test_bit(ramblock_recv_bitmap_offset(host_addr, rb), +rb->receivedmap); +} + +void ramblock_r
[Qemu-devel] [PATCH v7 1/3] migration: postcopy_place_page factoring out
Need to mark copied pages as closer as possible to the place where it tracks down. That will be necessary in futher patch. Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Reviewed-by: Peter Xu <pet...@redhat.com> Reviewed-by: Juan Quintela <quint...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- migration/postcopy-ram.c | 13 +++-- migration/postcopy-ram.h | 4 ++-- migration/ram.c | 4 ++-- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 7e21e6f..996e64d 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -564,9 +564,10 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) * returns 0 on success */ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, -size_t pagesize) +RAMBlock *rb) { struct uffdio_copy copy_struct; +size_t pagesize = qemu_ram_pagesize(rb); copy_struct.dst = (uint64_t)(uintptr_t)host; copy_struct.src = (uint64_t)(uintptr_t)from; @@ -595,11 +596,11 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, * returns 0 on success */ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, - size_t pagesize) + RAMBlock *rb) { trace_postcopy_place_page_zero(host); -if (pagesize == getpagesize()) { +if (qemu_ram_pagesize(rb) == getpagesize()) { struct uffdio_zeropage zero_struct; zero_struct.range.start = (uint64_t)(uintptr_t)host; zero_struct.range.len = getpagesize(); @@ -629,7 +630,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size); } return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page, - pagesize); + rb); } return 0; @@ -692,14 +693,14 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) } int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, -size_t pagesize) +RAMBlock *rb) { assert(0); return -1; } int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, -size_t pagesize) +RAMBlock *rb) { assert(0); return -1; diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h index 52d51e8..78a3591 100644 --- a/migration/postcopy-ram.h +++ b/migration/postcopy-ram.h @@ -72,14 +72,14 @@ void postcopy_discard_send_finish(MigrationState *ms, * returns 0 on success */ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, -size_t pagesize); +RAMBlock *rb); /* * Place a zero page at (host) atomically * returns 0 on success */ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, - size_t pagesize); + RAMBlock *rb); /* The current postcopy state is read/set by postcopy_state_get/set * which update it atomically. diff --git a/migration/ram.c b/migration/ram.c index 1b08296..9cc1b17 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2470,10 +2470,10 @@ static int ram_load_postcopy(QEMUFile *f) if (all_zero) { ret = postcopy_place_page_zero(mis, place_dest, - block->page_size); + block); } else { ret = postcopy_place_page(mis, place_dest, - place_source, block->page_size); + place_source, block); } } if (!ret) { -- 1.8.3.1
[Qemu-devel] [PATCH v7 2/3] migration: introduce qemu_ufd_copy_ioctl helper
Just for placing auxilary operations inside helper, auxilary operations like: track received pages, notify about copying operation in futher patches. Reviewed-by: Juan Quintela <quint...@redhat.com> Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Reviewed-by: Peter Xu <pet...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- migration/postcopy-ram.c | 34 +- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 996e64d..be497bb 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -559,6 +559,25 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) return 0; } +static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr, +void *from_addr, uint64_t pagesize) +{ +if (from_addr) { +struct uffdio_copy copy_struct; +copy_struct.dst = (uint64_t)(uintptr_t)host_addr; +copy_struct.src = (uint64_t)(uintptr_t)from_addr; +copy_struct.len = pagesize; +copy_struct.mode = 0; +return ioctl(userfault_fd, UFFDIO_COPY, _struct); +} else { +struct uffdio_zeropage zero_struct; +zero_struct.range.start = (uint64_t)(uintptr_t)host_addr; +zero_struct.range.len = pagesize; +zero_struct.mode = 0; +return ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct); +} +} + /* * Place a host page (from) at (host) atomically * returns 0 on success @@ -566,20 +585,14 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, RAMBlock *rb) { -struct uffdio_copy copy_struct; size_t pagesize = qemu_ram_pagesize(rb); -copy_struct.dst = (uint64_t)(uintptr_t)host; -copy_struct.src = (uint64_t)(uintptr_t)from; -copy_struct.len = pagesize; -copy_struct.mode = 0; - /* copy also acks to the kernel waking the stalled thread up * TODO: We can inhibit that ack and only do it if it was requested * which would be slightly cheaper, but we'd have to be careful * of the order of updating our page state. */ -if (ioctl(mis->userfault_fd, UFFDIO_COPY, _struct)) { +if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize)) { int e = errno; error_report("%s: %s copy host: %p from: %p (size: %zd)", __func__, strerror(e), host, from, pagesize); @@ -601,12 +614,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, trace_postcopy_place_page_zero(host); if (qemu_ram_pagesize(rb) == getpagesize()) { -struct uffdio_zeropage zero_struct; -zero_struct.range.start = (uint64_t)(uintptr_t)host; -zero_struct.range.len = getpagesize(); -zero_struct.mode = 0; - -if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, _struct)) { +if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize())) { int e = errno; error_report("%s: %s zero host: %p", __func__, strerror(e), host); -- 1.8.3.1
[Qemu-devel] [PATCH v6 3/3] migration: add bitmap for received page
This patch adds ability to track down already received pages, it's necessary for calculation vCPU block time in postcopy migration feature, maybe for restore after postcopy migration failure. Also it's necessary to solve shared memory issue in postcopy livemigration. Information about received pages will be transferred to the software virtual bridge (e.g. OVS-VSWITCHD), to avoid fallocate (unmap) for already received pages. fallocate syscall is required for remmaped shared memory, due to remmaping itself blocks ioctl(UFFDIO_COPY, ioctl in this case will end with EEXIT error (struct page is exists after remmap). Bitmap is placed into RAMBlock as another postcopy/precopy related bitmaps. Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- include/exec/ram_addr.h | 10 ++ migration/migration.c| 1 + migration/postcopy-ram.c | 16 +++- migration/ram.c | 42 +++--- migration/ram.h | 6 ++ 5 files changed, 67 insertions(+), 8 deletions(-) diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index 140efa8..4170656 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -47,6 +47,8 @@ struct RAMBlock { * of the postcopy phase */ unsigned long *unsentmap; +/* bitmap of already received pages in postcopy */ +unsigned long *receivedmap; }; static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset) @@ -60,6 +62,14 @@ static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset) return (char *)block->host + offset; } +static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr, +RAMBlock *rb) +{ +uint64_t host_addr_offset = +(uint64_t)(uintptr_t)(host_addr - (void *)rb->host); +return host_addr_offset >> TARGET_PAGE_BITS; +} + long qemu_getrampagesize(void); unsigned long last_ram_page(void); RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, diff --git a/migration/migration.c b/migration/migration.c index 71e38bc..63ded8c 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -363,6 +363,7 @@ void migration_fd_process_incoming(QEMUFile *f) Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, f); qemu_file_set_blocking(f, false); +ramblock_recv_map_init(); qemu_coroutine_enter(co); } diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 293db97..f980d93 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -562,22 +562,27 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) } static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr, -void *from_addr, uint64_t pagesize) + void *from_addr, uint64_t pagesize, RAMBlock *rb) { +int ret; if (from_addr) { struct uffdio_copy copy_struct; copy_struct.dst = (uint64_t)(uintptr_t)host_addr; copy_struct.src = (uint64_t)(uintptr_t)from_addr; copy_struct.len = pagesize; copy_struct.mode = 0; -return ioctl(userfault_fd, UFFDIO_COPY, _struct); +ret = ioctl(userfault_fd, UFFDIO_COPY, _struct); } else { struct uffdio_zeropage zero_struct; zero_struct.range.start = (uint64_t)(uintptr_t)host_addr; zero_struct.range.len = pagesize; zero_struct.mode = 0; -return ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct); +ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct); +} +if (!ret) { +ramblock_recv_bitmap_set(host_addr, rb); } +return ret; } /* @@ -594,7 +599,7 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, * which would be slightly cheaper, but we'd have to be careful * of the order of updating our page state. */ -if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize)) { +if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize, rb)) { int e = errno; error_report("%s: %s copy host: %p from: %p (size: %zd)", __func__, strerror(e), host, from, pagesize); @@ -616,7 +621,8 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, trace_postcopy_place_page_zero(host); if (qemu_ram_pagesize(rb) == getpagesize()) { -if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize())) { +if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize(), +rb)) { int e = errno; error_report("%s: %s zero host: %p", __func__, strerror(e), host); diff --git a/migration/ram.c b/migration/ram.c index f50479d..95962a0 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -151,6 +151,32 @@ out: return ret; } +void
[Qemu-devel] [PATCH v6 1/3] migration: postcopy_place_page factoring out
Need to mark copied pages as closer as possible to the place where it tracks down. That will be necessary in futher patch. Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Reviewed-by: Peter Xu <pet...@redhat.com> Reviewed-by: Juan Quintela <quint...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- migration/postcopy-ram.c | 13 +++-- migration/postcopy-ram.h | 4 ++-- migration/ram.c | 4 ++-- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index c8c4500..dae41b5 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -566,9 +566,10 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) * returns 0 on success */ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, -size_t pagesize) +RAMBlock *rb) { struct uffdio_copy copy_struct; +size_t pagesize = qemu_ram_pagesize(rb); copy_struct.dst = (uint64_t)(uintptr_t)host; copy_struct.src = (uint64_t)(uintptr_t)from; @@ -597,11 +598,11 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, * returns 0 on success */ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, - size_t pagesize) + RAMBlock *rb) { trace_postcopy_place_page_zero(host); -if (pagesize == getpagesize()) { +if (qemu_ram_pagesize(rb) == getpagesize()) { struct uffdio_zeropage zero_struct; zero_struct.range.start = (uint64_t)(uintptr_t)host; zero_struct.range.len = getpagesize(); @@ -631,7 +632,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size); } return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page, - pagesize); + rb); } return 0; @@ -694,14 +695,14 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) } int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, -size_t pagesize) +RAMBlock *rb) { assert(0); return -1; } int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, -size_t pagesize) +RAMBlock *rb) { assert(0); return -1; diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h index 52d51e8..78a3591 100644 --- a/migration/postcopy-ram.h +++ b/migration/postcopy-ram.h @@ -72,14 +72,14 @@ void postcopy_discard_send_finish(MigrationState *ms, * returns 0 on success */ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, -size_t pagesize); +RAMBlock *rb); /* * Place a zero page at (host) atomically * returns 0 on success */ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, - size_t pagesize); + RAMBlock *rb); /* The current postcopy state is read/set by postcopy_state_get/set * which update it atomically. diff --git a/migration/ram.c b/migration/ram.c index 8dbdfdb..f50479d 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2465,10 +2465,10 @@ static int ram_load_postcopy(QEMUFile *f) if (all_zero) { ret = postcopy_place_page_zero(mis, place_dest, - block->page_size); + block); } else { ret = postcopy_place_page(mis, place_dest, - place_source, block->page_size); + place_source, block); } } if (!ret) { -- 1.8.3.1
[Qemu-devel] [PATCH v6 0/3] Add bitmap for received pages in postcopy migration
This is 6th version of [PATCH v1 0/2] Add bitmap for copied pages in postcopy migration cover message from there This is a separate patch set, it derived from https://www.mail-archive.com/qemu-devel@nongnu.org/msg456004.html There are several possible use cases: 1. solve issue with postcopy live migration and shared memory. OVS-VSWITCH requires information about copied pages, to fallocate newly allocated pages. 2. calculation vCPU blocktime for more details see https://www.mail-archive.com/qemu-devel@nongnu.org/msg456004.html 3. Recovery after fail in the middle of postcopy migration Declaration is placed in two places include/migration/migration.h and into migration/postcopy-ram.h, because some functions are required in virtio and into public function include/exec/ram_addr.h. V5 -> V6 - call ramblock_recv_map_init from migration_fd_process_incoming (Peter suggested)But finalization is still in ram_load_cleanup as Juan suggested. V4 -> V5 - remove ramblock_recv_bitmap_clear_range in favor to bitmap_clear (comment from David) - single invocation place for ramblock_recv_bitmap_set (comment from Peter) - minor changes like removing comment from qemu_ufd_copy_ioctl and local variable from ramblock_recv_map_init (comment from Peter) V3 -> V4 - clear_bit instead of ramblock_recv_bitmap_clear in ramblock_recv_bitmap_clear_range, it reduced number of operation (comment from Juan) - for postcopy ramblock_recv_bitmap_set is calling after page was copied, only in case of success (comment from David) - indentation fixes (comment from Juan) V2 -> V3 - ramblock_recv_map_init call is placed into migration_incoming_get_current, looks like it's general place for both precopy and postcopy case. - received bitmap memory releasing is placed into ram_load_cleanup, unfortunatelly, it calls only in case of precopy. - precopy case and discard ram block case - function renaming, and another minor cleanups V1 -> V2 - change in terminology s/copied/received/g - granularity became TARGET_PAGE_SIZE, but not actual page size of the ramblock - movecopiedmap & get_copiedmap_size were removed, until patch set where it will be necessary - releasing memory of receivedmap was added into ram_load_cleanup - new patch "migration: introduce qemu_ufd_copy_ioctl helper" Patchset is based on Juan's patchset: [PATCH v2 0/5] Create setup/cleanup methods for migration incoming side Alexey Perevalov (3): migration: postcopy_place_page factoring out migration: introduce qemu_ufd_copy_ioctl helper migration: add bitmap for received page include/exec/ram_addr.h | 10 + migration/migration.c| 1 + migration/postcopy-ram.c | 53 +++- migration/postcopy-ram.h | 4 ++-- migration/ram.c | 46 - migration/ram.h | 6 ++ 6 files changed, 94 insertions(+), 26 deletions(-) -- 1.8.3.1
[Qemu-devel] [PATCH v6 2/3] migration: introduce qemu_ufd_copy_ioctl helper
Just for placing auxilary operations inside helper, auxilary operations like: track received pages, notify about copying operation in futher patches. Reviewed-by: Juan Quintela <quint...@redhat.com> Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Reviewed-by: Peter Xu <pet...@redhat.com> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- migration/postcopy-ram.c | 34 +- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index dae41b5..293db97 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -561,6 +561,25 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) return 0; } +static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr, +void *from_addr, uint64_t pagesize) +{ +if (from_addr) { +struct uffdio_copy copy_struct; +copy_struct.dst = (uint64_t)(uintptr_t)host_addr; +copy_struct.src = (uint64_t)(uintptr_t)from_addr; +copy_struct.len = pagesize; +copy_struct.mode = 0; +return ioctl(userfault_fd, UFFDIO_COPY, _struct); +} else { +struct uffdio_zeropage zero_struct; +zero_struct.range.start = (uint64_t)(uintptr_t)host_addr; +zero_struct.range.len = pagesize; +zero_struct.mode = 0; +return ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct); +} +} + /* * Place a host page (from) at (host) atomically * returns 0 on success @@ -568,20 +587,14 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, RAMBlock *rb) { -struct uffdio_copy copy_struct; size_t pagesize = qemu_ram_pagesize(rb); -copy_struct.dst = (uint64_t)(uintptr_t)host; -copy_struct.src = (uint64_t)(uintptr_t)from; -copy_struct.len = pagesize; -copy_struct.mode = 0; - /* copy also acks to the kernel waking the stalled thread up * TODO: We can inhibit that ack and only do it if it was requested * which would be slightly cheaper, but we'd have to be careful * of the order of updating our page state. */ -if (ioctl(mis->userfault_fd, UFFDIO_COPY, _struct)) { +if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize)) { int e = errno; error_report("%s: %s copy host: %p from: %p (size: %zd)", __func__, strerror(e), host, from, pagesize); @@ -603,12 +616,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, trace_postcopy_place_page_zero(host); if (qemu_ram_pagesize(rb) == getpagesize()) { -struct uffdio_zeropage zero_struct; -zero_struct.range.start = (uint64_t)(uintptr_t)host; -zero_struct.range.len = getpagesize(); -zero_struct.mode = 0; - -if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, _struct)) { +if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize())) { int e = errno; error_report("%s: %s zero host: %p", __func__, strerror(e), host); -- 1.8.3.1
[Qemu-devel] [PATCH v5 3/3] migration: add bitmap for received page
This patch adds ability to track down already received pages, it's necessary for calculation vCPU block time in postcopy migration feature, maybe for restore after postcopy migration failure. Also it's necessary to solve shared memory issue in postcopy livemigration. Information about received pages will be transferred to the software virtual bridge (e.g. OVS-VSWITCHD), to avoid fallocate (unmap) for already received pages. fallocate syscall is required for remmaped shared memory, due to remmaping itself blocks ioctl(UFFDIO_COPY, ioctl in this case will end with EEXIT error (struct page is exists after remmap). Bitmap is placed into RAMBlock as another postcopy/precopy related bitmaps. Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com> --- include/exec/ram_addr.h | 10 ++ migration/migration.c| 1 + migration/postcopy-ram.c | 16 +++- migration/ram.c | 42 +++--- migration/ram.h | 6 ++ 5 files changed, 67 insertions(+), 8 deletions(-) diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index 140efa8..4170656 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -47,6 +47,8 @@ struct RAMBlock { * of the postcopy phase */ unsigned long *unsentmap; +/* bitmap of already received pages in postcopy */ +unsigned long *receivedmap; }; static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset) @@ -60,6 +62,14 @@ static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset) return (char *)block->host + offset; } +static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr, +RAMBlock *rb) +{ +uint64_t host_addr_offset = +(uint64_t)(uintptr_t)(host_addr - (void *)rb->host); +return host_addr_offset >> TARGET_PAGE_BITS; +} + long qemu_getrampagesize(void); unsigned long last_ram_page(void); RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, diff --git a/migration/migration.c b/migration/migration.c index 71e38bc..53fbd41 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -143,6 +143,7 @@ MigrationIncomingState *migration_incoming_get_current(void) qemu_mutex_init(_current.rp_mutex); qemu_event_init(_current.main_thread_load_event, false); once = true; +ramblock_recv_map_init(); } return _current; } diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 293db97..f980d93 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -562,22 +562,27 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) } static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr, -void *from_addr, uint64_t pagesize) + void *from_addr, uint64_t pagesize, RAMBlock *rb) { +int ret; if (from_addr) { struct uffdio_copy copy_struct; copy_struct.dst = (uint64_t)(uintptr_t)host_addr; copy_struct.src = (uint64_t)(uintptr_t)from_addr; copy_struct.len = pagesize; copy_struct.mode = 0; -return ioctl(userfault_fd, UFFDIO_COPY, _struct); +ret = ioctl(userfault_fd, UFFDIO_COPY, _struct); } else { struct uffdio_zeropage zero_struct; zero_struct.range.start = (uint64_t)(uintptr_t)host_addr; zero_struct.range.len = pagesize; zero_struct.mode = 0; -return ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct); +ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct); +} +if (!ret) { +ramblock_recv_bitmap_set(host_addr, rb); } +return ret; } /* @@ -594,7 +599,7 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, * which would be slightly cheaper, but we'd have to be careful * of the order of updating our page state. */ -if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize)) { +if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize, rb)) { int e = errno; error_report("%s: %s copy host: %p from: %p (size: %zd)", __func__, strerror(e), host, from, pagesize); @@ -616,7 +621,8 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, trace_postcopy_place_page_zero(host); if (qemu_ram_pagesize(rb) == getpagesize()) { -if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize())) { +if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize(), +rb)) { int e = errno; error_report("%s: %s zero host: %p", __func__, strerror(e), host); diff --git a/migration/ram.c b/migration/ram.c index f50479d..95962a0 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -151,6 +151,32 @@ out: