[Qemu-devel] [PATCH v2 0/6] postcopy block time calculation + ppc32 build fix

2018-03-22 Thread Alexey Perevalov
V1-V2
accidentally appeared __nocheck after rebase
this patch set also rebased after latest pull request

This patch set includes patches which were reverted by commit
ee86981bd, due to build problem on 32 powerpc/arm architecture.
Also it includes patch to fix build
([PATCH v4] migration: change blocktime type to uint32_t), but that
patch was merged into:
migration: add postcopy blocktime ctx into MigrationIncomingState
migration: calculate vCPU blocktime on dst side
migration: add postcopy total blocktime into query-migrate


based on
commit c6740fc88ecd8f5cf3cf3185ee112c3eea41caa2
"hw/rdma: Implementation of PVRDMA device"

Alexey Perevalov (6):
  migration: introduce postcopy-blocktime capability
  migration: add postcopy blocktime ctx into MigrationIncomingState
  migration: calculate vCPU blocktime on dst side
  migration: postcopy_blocktime documentation
  migration: add blocktime calculation into migration-test
  migration: add postcopy total blocktime into query-migrate

 docs/devel/migration.rst |  14 +++
 hmp.c|  15 +++
 migration/migration.c|  51 -
 migration/migration.h|  13 +++
 migration/postcopy-ram.c | 268 ++-
 migration/trace-events   |   6 +-
 qapi/migration.json  |  17 ++-
 tests/migration-test.c   |  16 +++
 8 files changed, 392 insertions(+), 8 deletions(-)

-- 
2.7.4




[Qemu-devel] [PATCH v2 2/6] migration: add postcopy blocktime ctx into MigrationIncomingState

2018-03-22 Thread Alexey Perevalov
This patch adds request to kernel space for UFFD_FEATURE_THREAD_ID, in
case this feature is provided by kernel.

PostcopyBlocktimeContext is encapsulated inside postcopy-ram.c,
due to it being a postcopy-only feature.
Also it defines PostcopyBlocktimeContext's instance live time.
Information from PostcopyBlocktimeContext instance will be provided
much after postcopy migration end, instance of PostcopyBlocktimeContext
will live till QEMU exit, but part of it (vcpu_addr,
page_fault_vcpu_time) used only during calculation, will be released
when postcopy ended or failed.

To enable postcopy blocktime calculation on destination, need to
request proper compatibility (Patch for documentation will be at the
tail of the patch set).

As an example following command enable that capability, assume QEMU was
started with
-chardev socket,id=charmonitor,path=/var/lib/migrate-vm-monitor.sock
option to control it

[root@host]#printf "{\"execute\" : \"qmp_capabilities\"}\r\n \
{\"execute\": \"migrate-set-capabilities\" , \"arguments\":   {
\"capabilities\": [ { \"capability\": \"postcopy-blocktime\", \"state\":
true } ] } }" | nc -U /var/lib/migrate-vm-monitor.sock

Or just with HMP
(qemu) migrate_set_capability postcopy-blocktime on

Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
Reviewed-by: Juan Quintela <quint...@redhat.com>
Signed-off-by: Juan Quintela <quint...@redhat.com>
---
 migration/migration.h|  8 +++
 migration/postcopy-ram.c | 61 
 2 files changed, 69 insertions(+)

diff --git a/migration/migration.h b/migration/migration.h
index 46a50bc..6d9aaeb 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -22,6 +22,8 @@
 #include "hw/qdev.h"
 #include "io/channel.h"
 
+struct PostcopyBlocktimeContext;
+
 /* State for the incoming migration */
 struct MigrationIncomingState {
 QEMUFile *from_src_file;
@@ -65,6 +67,12 @@ struct MigrationIncomingState {
 /* The coroutine we should enter (back) after failover */
 Coroutine *migration_incoming_co;
 QemuSemaphore colo_incoming_sem;
+
+/*
+ * PostcopyBlocktimeContext to keep information for postcopy
+ * live migration, to calculate vCPU block time
+ * */
+struct PostcopyBlocktimeContext *blocktime_ctx;
 };
 
 MigrationIncomingState *migration_incoming_get_current(void);
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index efd7793..66f1df9 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -90,6 +90,54 @@ int postcopy_notify(enum PostcopyNotifyReason reason, Error 
**errp)
 #include 
 #include 
 
+typedef struct PostcopyBlocktimeContext {
+/* time when page fault initiated per vCPU */
+uint32_t *page_fault_vcpu_time;
+/* page address per vCPU */
+uintptr_t *vcpu_addr;
+uint32_t total_blocktime;
+/* blocktime per vCPU */
+uint32_t *vcpu_blocktime;
+/* point in time when last page fault was initiated */
+uint32_t last_begin;
+/* number of vCPU are suspended */
+int smp_cpus_down;
+uint64_t start_time;
+
+/*
+ * Handler for exit event, necessary for
+ * releasing whole blocktime_ctx
+ */
+Notifier exit_notifier;
+} PostcopyBlocktimeContext;
+
+static void destroy_blocktime_context(struct PostcopyBlocktimeContext *ctx)
+{
+g_free(ctx->page_fault_vcpu_time);
+g_free(ctx->vcpu_addr);
+g_free(ctx->vcpu_blocktime);
+g_free(ctx);
+}
+
+static void migration_exit_cb(Notifier *n, void *data)
+{
+PostcopyBlocktimeContext *ctx = container_of(n, PostcopyBlocktimeContext,
+ exit_notifier);
+destroy_blocktime_context(ctx);
+}
+
+static struct PostcopyBlocktimeContext *blocktime_context_new(void)
+{
+PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1);
+ctx->page_fault_vcpu_time = g_new0(uint32_t, smp_cpus);
+ctx->vcpu_addr = g_new0(uintptr_t, smp_cpus);
+ctx->vcpu_blocktime = g_new0(uint32_t, smp_cpus);
+
+ctx->exit_notifier.notify = migration_exit_cb;
+ctx->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+qemu_add_exit_notifier(>exit_notifier);
+return ctx;
+}
 
 /**
  * receive_ufd_features: check userfault fd features, to request only supported
@@ -182,6 +230,19 @@ static bool ufd_check_and_apply(int ufd, 
MigrationIncomingState *mis)
 }
 }
 
+#ifdef UFFD_FEATURE_THREAD_ID
+if (migrate_postcopy_blocktime() && mis &&
+UFFD_FEATURE_THREAD_ID & supported_features) {
+/* kernel supports that feature */
+/* don't create blocktime_context if it exists */
+if (!mis->blocktime_ctx) {
+mis->blocktime_ctx = blocktime_context_new();
+}
+

[Qemu-devel] [PATCH v2 6/6] migration: add postcopy total blocktime into query-migrate

2018-03-22 Thread Alexey Perevalov
Postcopy total blocktime is available on destination side only.
But query-migrate was possible only for source. This patch
adds ability to call query-migrate on destination.
To be able to see postcopy blocktime, need to request postcopy-blocktime
capability.

The query-migrate command will show following sample result:
{"return":
"postcopy-vcpu-blocktime": [115, 100],
"status": "completed",
"postcopy-blocktime": 100
}}

postcopy_vcpu_blocktime contains list, where the first item is the first
vCPU in QEMU.

This patch has a drawback, it combines states of incoming and
outgoing migration. Ongoing migration state will overwrite incoming
state. Looks like better to separate query-migrate for incoming and
outgoing migration or add parameter to indicate type of migration.

Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
Reviewed-by: Juan Quintela <quint...@redhat.com>
Signed-off-by: Juan Quintela <quint...@redhat.com>
---
 hmp.c| 15 +
 migration/migration.c| 42 
 migration/migration.h|  4 
 migration/postcopy-ram.c | 56 
 migration/trace-events   |  1 +
 qapi/migration.json  | 11 +-
 6 files changed, 124 insertions(+), 5 deletions(-)

diff --git a/hmp.c b/hmp.c
index 679467d..6c51df5 100644
--- a/hmp.c
+++ b/hmp.c
@@ -274,6 +274,21 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
info->cpu_throttle_percentage);
 }
 
+if (info->has_postcopy_blocktime) {
+monitor_printf(mon, "postcopy blocktime: %u\n",
+   info->postcopy_blocktime);
+}
+
+if (info->has_postcopy_vcpu_blocktime) {
+Visitor *v;
+char *str;
+v = string_output_visitor_new(false, );
+visit_type_uint32List(v, NULL, >postcopy_vcpu_blocktime, NULL);
+visit_complete(v, );
+monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str);
+g_free(str);
+visit_free(v);
+}
 qapi_free_MigrationInfo(info);
 qapi_free_MigrationCapabilityStatusList(caps);
 }
diff --git a/migration/migration.c b/migration/migration.c
index f95a7f3..71b0f19 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -630,14 +630,15 @@ static void populate_disk_info(MigrationInfo *info)
 }
 }
 
-MigrationInfo *qmp_query_migrate(Error **errp)
+static void fill_source_migration_info(MigrationInfo *info)
 {
-MigrationInfo *info = g_malloc0(sizeof(*info));
 MigrationState *s = migrate_get_current();
 
 switch (s->state) {
 case MIGRATION_STATUS_NONE:
 /* no migration has happened ever */
+/* do not overwrite destination migration status */
+return;
 break;
 case MIGRATION_STATUS_SETUP:
 info->has_status = true;
@@ -688,8 +689,6 @@ MigrationInfo *qmp_query_migrate(Error **errp)
 break;
 }
 info->status = s->state;
-
-return info;
 }
 
 /**
@@ -753,6 +752,41 @@ static bool migrate_caps_check(bool *cap_list,
 return true;
 }
 
+static void fill_destination_migration_info(MigrationInfo *info)
+{
+MigrationIncomingState *mis = migration_incoming_get_current();
+
+switch (mis->state) {
+case MIGRATION_STATUS_NONE:
+return;
+break;
+case MIGRATION_STATUS_SETUP:
+case MIGRATION_STATUS_CANCELLING:
+case MIGRATION_STATUS_CANCELLED:
+case MIGRATION_STATUS_ACTIVE:
+case MIGRATION_STATUS_POSTCOPY_ACTIVE:
+case MIGRATION_STATUS_FAILED:
+case MIGRATION_STATUS_COLO:
+info->has_status = true;
+break;
+case MIGRATION_STATUS_COMPLETED:
+info->has_status = true;
+fill_destination_postcopy_migration_info(info);
+break;
+}
+info->status = mis->state;
+}
+
+MigrationInfo *qmp_query_migrate(Error **errp)
+{
+MigrationInfo *info = g_malloc0(sizeof(*info));
+
+fill_destination_migration_info(info);
+fill_source_migration_info(info);
+
+return info;
+}
+
 void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
   Error **errp)
 {
diff --git a/migration/migration.h b/migration/migration.h
index 6d9aaeb..7c69598 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -77,6 +77,10 @@ struct MigrationIncomingState {
 
 MigrationIncomingState *migration_incoming_get_current(void);
 void migration_incoming_state_destroy(void);
+/*
+ * Functions to work with blocktime context
+ */
+void fill_destination_postcopy_migration_info(MigrationInfo *info);
 
 #define TYPE_MIGRATION "migration"
 
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 6b01884..bbc1a95 100644
--- a/migration/postcopy-ram.c
+++ b/migration

[Qemu-devel] [PATCH v2 1/6] migration: introduce postcopy-blocktime capability

2018-03-22 Thread Alexey Perevalov
Right now it could be used on destination side to
enable vCPU blocktime calculation for postcopy live migration.
vCPU blocktime - it's time since vCPU thread was put into
interruptible sleep, till memory page was copied and thread awake.

Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
Reviewed-by: Juan Quintela <quint...@redhat.com>
Signed-off-by: Juan Quintela <quint...@redhat.com>
---
 migration/migration.c | 9 +
 migration/migration.h | 1 +
 qapi/migration.json   | 6 +-
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/migration/migration.c b/migration/migration.c
index fc629e5..f95a7f3 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1540,6 +1540,15 @@ bool migrate_zero_blocks(void)
 return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS];
 }
 
+bool migrate_postcopy_blocktime(void)
+{
+MigrationState *s;
+
+s = migrate_get_current();
+
+return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME];
+}
+
 bool migrate_use_compression(void)
 {
 MigrationState *s;
diff --git a/migration/migration.h b/migration/migration.h
index 8d2f320..46a50bc 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -230,6 +230,7 @@ int migrate_compress_level(void);
 int migrate_compress_threads(void);
 int migrate_decompress_threads(void);
 bool migrate_use_events(void);
+bool migrate_postcopy_blocktime(void);
 
 /* Sending on the return path - generic and then for each message type */
 void migrate_send_rp_shut(MigrationIncomingState *mis,
diff --git a/qapi/migration.json b/qapi/migration.json
index 9d0bf82..24bfc19 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -354,16 +354,20 @@
 #
 # @x-multifd: Use more than one fd for migration (since 2.11)
 #
+#
 # @dirty-bitmaps: If enabled, QEMU will migrate named dirty bitmaps.
 # (since 2.12)
 #
+# @postcopy-blocktime: Calculate downtime for postcopy live migration
+# (since 2.13)
+#
 # Since: 1.2
 ##
 { 'enum': 'MigrationCapability',
   'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram',
'block', 'return-path', 'pause-before-switchover', 'x-multifd',
-   'dirty-bitmaps' ] }
+   'dirty-bitmaps', 'postcopy-blocktime' ] }
 
 ##
 # @MigrationCapabilityStatus:
-- 
2.7.4




[Qemu-devel] [PATCH v2 4/6] migration: postcopy_blocktime documentation

2018-03-22 Thread Alexey Perevalov
Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
Reviewed-by: Juan Quintela <quint...@redhat.com>
Signed-off-by: Juan Quintela <quint...@redhat.com>
---
 docs/devel/migration.rst | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/docs/devel/migration.rst b/docs/devel/migration.rst
index e32b087..9342a8a 100644
--- a/docs/devel/migration.rst
+++ b/docs/devel/migration.rst
@@ -401,6 +401,20 @@ will now cause the transition from precopy to postcopy.
 It can be issued immediately after migration is started or any
 time later on.  Issuing it after the end of a migration is harmless.
 
+Blocktime is a postcopy live migration metric, intended to show how
+long the vCPU was in state of interruptable sleep due to pagefault.
+That metric is calculated both for all vCPUs as overlapped value, and
+separately for each vCPU. These values are calculated on destination
+side.  To enable postcopy blocktime calculation, enter following
+command on destination monitor:
+
+``migrate_set_capability postcopy-blocktime on``
+
+Postcopy blocktime can be retrieved by query-migrate qmp command.
+postcopy-blocktime value of qmp command will show overlapped blocking
+time for all vCPU, postcopy-vcpu-blocktime will show list of blocking
+time per vCPU.
+
 .. note::
   During the postcopy phase, the bandwidth limits set using
   ``migrate_set_speed`` is ignored (to avoid delaying requested pages that
-- 
2.7.4




[Qemu-devel] [PATCH v2 3/6] migration: calculate vCPU blocktime on dst side

2018-03-22 Thread Alexey Perevalov
This patch provides blocktime calculation per vCPU,
as a summary and as a overlapped value for all vCPUs.

This approach was suggested by Peter Xu, as an improvements of
previous approch where QEMU kept tree with faulted page address and cpus bitmask
in it. Now QEMU is keeping array with faulted page address as value and vCPU
as index. It helps to find proper vCPU at UFFD_COPY time. Also it keeps
list for blocktime per vCPU (could be traced with page_fault_addr)

Blocktime will not calculated if postcopy_blocktime field of
MigrationIncomingState wasn't initialized.

Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Reviewed-by: Juan Quintela <quint...@redhat.com>
Signed-off-by: Juan Quintela <quint...@redhat.com>
---
 migration/postcopy-ram.c | 151 ++-
 migration/trace-events   |   5 +-
 2 files changed, 154 insertions(+), 2 deletions(-)

diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 66f1df9..6b01884 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -636,6 +636,148 @@ int postcopy_request_shared_page(struct PostCopyFD *pcfd, 
RAMBlock *rb,
 return 0;
 }
 
+static int get_mem_fault_cpu_index(uint32_t pid)
+{
+CPUState *cpu_iter;
+
+CPU_FOREACH(cpu_iter) {
+if (cpu_iter->thread_id == pid) {
+trace_get_mem_fault_cpu_index(cpu_iter->cpu_index, pid);
+return cpu_iter->cpu_index;
+}
+}
+trace_get_mem_fault_cpu_index(-1, pid);
+return -1;
+}
+
+static uint32_t get_low_time_offset(PostcopyBlocktimeContext *dc)
+{
+int64_t start_time_offset = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) -
+dc->start_time;
+return start_time_offset < 1 ? 1 : start_time_offset & UINT32_MAX;
+}
+
+/*
+ * This function is being called when pagefault occurs. It
+ * tracks down vCPU blocking time.
+ *
+ * @addr: faulted host virtual address
+ * @ptid: faulted process thread id
+ * @rb: ramblock appropriate to addr
+ */
+static void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid,
+  RAMBlock *rb)
+{
+int cpu, already_received;
+MigrationIncomingState *mis = migration_incoming_get_current();
+PostcopyBlocktimeContext *dc = mis->blocktime_ctx;
+uint32_t low_time_offset;
+
+if (!dc || ptid == 0) {
+return;
+}
+cpu = get_mem_fault_cpu_index(ptid);
+if (cpu < 0) {
+return;
+}
+
+low_time_offset = get_low_time_offset(dc);
+if (dc->vcpu_addr[cpu] == 0) {
+atomic_inc(>smp_cpus_down);
+}
+
+atomic_xchg(>last_begin, low_time_offset);
+atomic_xchg(>page_fault_vcpu_time[cpu], low_time_offset);
+atomic_xchg(>vcpu_addr[cpu], addr);
+
+/* check it here, not at the begining of the function,
+ * due to, check could accur early than bitmap_set in
+ * qemu_ufd_copy_ioctl */
+already_received = ramblock_recv_bitmap_test(rb, (void *)addr);
+if (already_received) {
+atomic_xchg(>vcpu_addr[cpu], 0);
+atomic_xchg(>page_fault_vcpu_time[cpu], 0);
+atomic_dec(>smp_cpus_down);
+}
+trace_mark_postcopy_blocktime_begin(addr, dc, 
dc->page_fault_vcpu_time[cpu],
+cpu, already_received);
+}
+
+/*
+ *  This function just provide calculated blocktime per cpu and trace it.
+ *  Total blocktime is calculated in mark_postcopy_blocktime_end.
+ *
+ *
+ * Assume we have 3 CPU
+ *
+ *  S1E1   S1   E1
+ * -***xxx***> CPU1
+ *
+ * S2E2
+ * xxx---> CPU2
+ *
+ * S3E3
+ * xxx---> CPU3
+ *
+ * We have sequence S1,S2,E1,S3,S1,E2,E3,E1
+ * S2,E1 - doesn't match condition due to sequence S1,S2,E1 doesn't include 
CPU3
+ * S3,S1,E2 - sequence includes all CPUs, in this case overlap will be S1,E2 -
+ *it's a part of total blocktime.
+ * S1 - here is last_begin
+ * Legend of the picture is following:
+ *  * - means blocktime per vCPU
+ *  x - means overlapped blocktime (total blocktime)
+ *
+ * @addr: host virtual address
+ */
+static void mark_postcopy_blocktime_end(uintptr_t addr)
+{
+MigrationIncomingState *mis = migration_incoming_get_current();
+PostcopyBlocktimeContext *dc = mis->blocktime_ctx;
+int i, affected_cpu = 0;
+bool vcpu_total_blocktime = false;
+uint32_t read_vcpu_time, low_time_offset;
+
+if (!dc) {
+return;
+}
+
+low_time_offset = get_low_time_offset(dc);
+/* lookup cpu, to clear it,
+ * that algorithm looks straighforward, but it's not
+ * op

[Qemu-devel] [PATCH v2 5/6] migration: add blocktime calculation into migration-test

2018-03-22 Thread Alexey Perevalov
This patch just requests blocktime calculation,
and check it in case when UFFD_FEATURE_THREAD_ID feature is set
on the host.

Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
Reviewed-by: Juan Quintela <quint...@redhat.com>
Signed-off-by: Juan Quintela <quint...@redhat.com>
---
 tests/migration-test.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/tests/migration-test.c b/tests/migration-test.c
index 422bf1a..dde7c46 100644
--- a/tests/migration-test.c
+++ b/tests/migration-test.c
@@ -26,6 +26,7 @@
 const unsigned start_address = 1024 * 1024;
 const unsigned end_address = 100 * 1024 * 1024;
 bool got_stop;
+static bool uffd_feature_thread_id;
 
 #if defined(__linux__)
 #include 
@@ -55,6 +56,7 @@ static bool ufd_version_check(void)
 g_test_message("Skipping test: UFFDIO_API failed");
 return false;
 }
+uffd_feature_thread_id = api_struct.features & UFFD_FEATURE_THREAD_ID;
 
 ioctl_mask = (__u64)1 << _UFFDIO_REGISTER |
  (__u64)1 << _UFFDIO_UNREGISTER;
@@ -223,6 +225,16 @@ static uint64_t get_migration_pass(QTestState *who)
 return result;
 }
 
+static void read_blocktime(QTestState *who)
+{
+QDict *rsp, *rsp_return;
+
+rsp = wait_command(who, "{ 'execute': 'query-migrate' }");
+rsp_return = qdict_get_qdict(rsp, "return");
+g_assert(qdict_haskey(rsp_return, "postcopy-blocktime"));
+QDECREF(rsp);
+}
+
 static void wait_for_migration_complete(QTestState *who)
 {
 while (true) {
@@ -533,6 +545,7 @@ static void test_migrate(void)
 
 migrate_set_capability(from, "postcopy-ram", "true");
 migrate_set_capability(to, "postcopy-ram", "true");
+migrate_set_capability(to, "postcopy-blocktime", "true");
 
 /* We want to pick a speed slow enough that the test completes
  * quickly, but that it doesn't complete precopy even on a slow
@@ -559,6 +572,9 @@ static void test_migrate(void)
 wait_for_serial("dest_serial");
 wait_for_migration_complete(from);
 
+if (uffd_feature_thread_id) {
+read_blocktime(to);
+}
 g_free(uri);
 
 test_migrate_end(from, to, true);
-- 
2.7.4




[Qemu-devel] [PATCH v1 6/6] migration: add postcopy total blocktime into query-migrate

2018-03-13 Thread Alexey Perevalov
Postcopy total blocktime is available on destination side only.
But query-migrate was possible only for source. This patch
adds ability to call query-migrate on destination.
To be able to see postcopy blocktime, need to request postcopy-blocktime
capability.

The query-migrate command will show following sample result:
{"return":
"postcopy-vcpu-blocktime": [115, 100],
"status": "completed",
"postcopy-blocktime": 100
}}

postcopy_vcpu_blocktime contains list, where the first item is the first
vCPU in QEMU.

This patch has a drawback, it combines states of incoming and
outgoing migration. Ongoing migration state will overwrite incoming
state. Looks like better to separate query-migrate for incoming and
outgoing migration or add parameter to indicate type of migration.

Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
Reviewed-by: Juan Quintela <quint...@redhat.com>
Signed-off-by: Juan Quintela <quint...@redhat.com>
---
 hmp.c| 15 +
 migration/migration.c| 42 
 migration/migration.h|  4 
 migration/postcopy-ram.c | 56 
 migration/trace-events   |  1 +
 qapi/migration.json  | 11 +-
 6 files changed, 124 insertions(+), 5 deletions(-)

diff --git a/hmp.c b/hmp.c
index 016cb5c..4539f77 100644
--- a/hmp.c
+++ b/hmp.c
@@ -274,6 +274,21 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
info->cpu_throttle_percentage);
 }
 
+if (info->has_postcopy_blocktime) {
+monitor_printf(mon, "postcopy blocktime: %u\n",
+   info->postcopy_blocktime);
+}
+
+if (info->has_postcopy_vcpu_blocktime) {
+Visitor *v;
+char *str;
+v = string_output_visitor_new(false, );
+visit_type_uint32List(v, NULL, >postcopy_vcpu_blocktime, NULL);
+visit_complete(v, );
+monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str);
+g_free(str);
+visit_free(v);
+}
 qapi_free_MigrationInfo(info);
 qapi_free_MigrationCapabilityStatusList(caps);
 }
diff --git a/migration/migration.c b/migration/migration.c
index 0ee9c1f..ae8890e 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -619,14 +619,15 @@ static void populate_disk_info(MigrationInfo *info)
 }
 }
 
-MigrationInfo *qmp_query_migrate(Error **errp)
+static void fill_source_migration_info(MigrationInfo *info)
 {
-MigrationInfo *info = g_malloc0(sizeof(*info));
 MigrationState *s = migrate_get_current();
 
 switch (s->state) {
 case MIGRATION_STATUS_NONE:
 /* no migration has happened ever */
+/* do not overwrite destination migration status */
+return;
 break;
 case MIGRATION_STATUS_SETUP:
 info->has_status = true;
@@ -677,8 +678,6 @@ MigrationInfo *qmp_query_migrate(Error **errp)
 break;
 }
 info->status = s->state;
-
-return info;
 }
 
 /**
@@ -742,6 +741,41 @@ static bool migrate_caps_check(bool *cap_list,
 return true;
 }
 
+static void fill_destination_migration_info(MigrationInfo *info)
+{
+MigrationIncomingState *mis = migration_incoming_get_current();
+
+switch (mis->state) {
+case MIGRATION_STATUS_NONE:
+return;
+break;
+case MIGRATION_STATUS_SETUP:
+case MIGRATION_STATUS_CANCELLING:
+case MIGRATION_STATUS_CANCELLED:
+case MIGRATION_STATUS_ACTIVE:
+case MIGRATION_STATUS_POSTCOPY_ACTIVE:
+case MIGRATION_STATUS_FAILED:
+case MIGRATION_STATUS_COLO:
+info->has_status = true;
+break;
+case MIGRATION_STATUS_COMPLETED:
+info->has_status = true;
+fill_destination_postcopy_migration_info(info);
+break;
+}
+info->status = mis->state;
+}
+
+MigrationInfo *qmp_query_migrate(Error **errp)
+{
+MigrationInfo *info = g_malloc0(sizeof(*info));
+
+fill_destination_migration_info(info);
+fill_source_migration_info(info);
+
+return info;
+}
+
 void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
   Error **errp)
 {
diff --git a/migration/migration.h b/migration/migration.h
index 3ead619..a09277e 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -73,6 +73,10 @@ struct MigrationIncomingState {
 
 MigrationIncomingState *migration_incoming_get_current(void);
 void migration_incoming_state_destroy(void);
+/*
+ * Functions to work with blocktime context
+ */
+void fill_destination_postcopy_migration_info(MigrationInfo *info);
 
 #define TYPE_MIGRATION "migration"
 
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 9e51e84..c46225c 100644
--- a/migration/postcopy-ram.c
+++ b/migration

[Qemu-devel] [PATCH v1 4/6] migration: postcopy_blocktime documentation

2018-03-13 Thread Alexey Perevalov
Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
Reviewed-by: Juan Quintela <quint...@redhat.com>
Signed-off-by: Juan Quintela <quint...@redhat.com>
---
 docs/devel/migration.rst | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/docs/devel/migration.rst b/docs/devel/migration.rst
index 9d1b765..18cd952 100644
--- a/docs/devel/migration.rst
+++ b/docs/devel/migration.rst
@@ -401,6 +401,20 @@ will now cause the transition from precopy to postcopy.
 It can be issued immediately after migration is started or any
 time later on.  Issuing it after the end of a migration is harmless.
 
+Blocktime is a postcopy live migration metric, intended to show how
+long the vCPU was in state of interruptable sleep due to pagefault.
+That metric is calculated both for all vCPUs as overlapped value, and
+separately for each vCPU. These values are calculated on destination
+side.  To enable postcopy blocktime calculation, enter following
+command on destination monitor:
+
+``migrate_set_capability postcopy-blocktime on``
+
+Postcopy blocktime can be retrieved by query-migrate qmp command.
+postcopy-blocktime value of qmp command will show overlapped blocking
+time for all vCPU, postcopy-vcpu-blocktime will show list of blocking
+time per vCPU.
+
 .. note::
   During the postcopy phase, the bandwidth limits set using
   ``migrate_set_speed`` is ignored (to avoid delaying requested pages that
-- 
2.7.4




[Qemu-devel] [PATCH v1 3/6] migration: calculate vCPU blocktime on dst side

2018-03-13 Thread Alexey Perevalov
This patch provides blocktime calculation per vCPU,
as a summary and as a overlapped value for all vCPUs.

This approach was suggested by Peter Xu, as an improvements of
previous approch where QEMU kept tree with faulted page address and cpus bitmask
in it. Now QEMU is keeping array with faulted page address as value and vCPU
as index. It helps to find proper vCPU at UFFD_COPY time. Also it keeps
list for blocktime per vCPU (could be traced with page_fault_addr)

Blocktime will not calculated if postcopy_blocktime field of
MigrationIncomingState wasn't initialized.

Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Reviewed-by: Juan Quintela <quint...@redhat.com>
Signed-off-by: Juan Quintela <quint...@redhat.com>
---
 migration/postcopy-ram.c | 149 ++-
 migration/trace-events   |   5 +-
 2 files changed, 152 insertions(+), 2 deletions(-)

diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 9144102..9e51e84 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -546,6 +546,148 @@ static int ram_block_enable_notify(const char 
*block_name, void *host_addr,
 return 0;
 }
 
+static int get_mem_fault_cpu_index(uint32_t pid)
+{
+CPUState *cpu_iter;
+
+CPU_FOREACH(cpu_iter) {
+if (cpu_iter->thread_id == pid) {
+trace_get_mem_fault_cpu_index(cpu_iter->cpu_index, pid);
+return cpu_iter->cpu_index;
+}
+}
+trace_get_mem_fault_cpu_index(-1, pid);
+return -1;
+}
+
+static uint32_t get_low_time_offset(PostcopyBlocktimeContext *dc)
+{
+int64_t start_time_offset = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) -
+dc->start_time;
+return start_time_offset < 1 ? 1 : start_time_offset & UINT32_MAX;
+}
+
+/*
+ * This function is being called when pagefault occurs. It
+ * tracks down vCPU blocking time.
+ *
+ * @addr: faulted host virtual address
+ * @ptid: faulted process thread id
+ * @rb: ramblock appropriate to addr
+ */
+static void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid,
+  RAMBlock *rb)
+{
+int cpu, already_received;
+MigrationIncomingState *mis = migration_incoming_get_current();
+PostcopyBlocktimeContext *dc = mis->blocktime_ctx;
+uint32_t low_time_offset;
+
+if (!dc || ptid == 0) {
+return;
+}
+cpu = get_mem_fault_cpu_index(ptid);
+if (cpu < 0) {
+return;
+}
+
+low_time_offset = get_low_time_offset(dc);
+if (dc->vcpu_addr[cpu] == 0) {
+atomic_inc(>smp_cpus_down);
+}
+
+atomic_xchg(>last_begin, low_time_offset);
+atomic_xchg(>page_fault_vcpu_time[cpu], low_time_offset);
+atomic_xchg(>vcpu_addr[cpu], addr);
+
+/* check it here, not at the begining of the function,
+ * due to, check could accur early than bitmap_set in
+ * qemu_ufd_copy_ioctl */
+already_received = ramblock_recv_bitmap_test(rb, (void *)addr);
+if (already_received) {
+atomic_xchg__nocheck(>vcpu_addr[cpu], 0);
+atomic_xchg__nocheck(>page_fault_vcpu_time[cpu], 0);
+atomic_dec(>smp_cpus_down);
+}
+trace_mark_postcopy_blocktime_begin(addr, dc, 
dc->page_fault_vcpu_time[cpu],
+cpu, already_received);
+}
+
+/*
+ *  This function just provide calculated blocktime per cpu and trace it.
+ *  Total blocktime is calculated in mark_postcopy_blocktime_end.
+ *
+ *
+ * Assume we have 3 CPU
+ *
+ *  S1E1   S1   E1
+ * -***xxx***> CPU1
+ *
+ * S2E2
+ * xxx---> CPU2
+ *
+ * S3E3
+ * xxx---> CPU3
+ *
+ * We have sequence S1,S2,E1,S3,S1,E2,E3,E1
+ * S2,E1 - doesn't match condition due to sequence S1,S2,E1 doesn't include 
CPU3
+ * S3,S1,E2 - sequence includes all CPUs, in this case overlap will be S1,E2 -
+ *it's a part of total blocktime.
+ * S1 - here is last_begin
+ * Legend of the picture is following:
+ *  * - means blocktime per vCPU
+ *  x - means overlapped blocktime (total blocktime)
+ *
+ * @addr: host virtual address
+ */
+static void mark_postcopy_blocktime_end(uintptr_t addr)
+{
+MigrationIncomingState *mis = migration_incoming_get_current();
+PostcopyBlocktimeContext *dc = mis->blocktime_ctx;
+int i, affected_cpu = 0;
+bool vcpu_total_blocktime = false;
+uint32_t read_vcpu_time, low_time_offset;
+
+if (!dc) {
+return;
+}
+
+low_time_offset = get_low_time_offset(dc);
+/* lookup cpu, to clear it,
+ * that algorithm looks stra

[Qemu-devel] [PATCH v1 1/6] migration: introduce postcopy-blocktime capability

2018-03-13 Thread Alexey Perevalov
Right now it could be used on destination side to
enable vCPU blocktime calculation for postcopy live migration.
vCPU blocktime - it's time since vCPU thread was put into
interruptible sleep, till memory page was copied and thread awake.

Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
Reviewed-by: Juan Quintela <quint...@redhat.com>
Signed-off-by: Juan Quintela <quint...@redhat.com>
---
 migration/migration.c | 9 +
 migration/migration.h | 1 +
 qapi/migration.json   | 6 +-
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/migration/migration.c b/migration/migration.c
index e345d0c..0ee9c1f 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1527,6 +1527,15 @@ bool migrate_zero_blocks(void)
 return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS];
 }
 
+bool migrate_postcopy_blocktime(void)
+{
+MigrationState *s;
+
+s = migrate_get_current();
+
+return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME];
+}
+
 bool migrate_use_compression(void)
 {
 MigrationState *s;
diff --git a/migration/migration.h b/migration/migration.h
index 08c5d2d..aa7a884 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -225,6 +225,7 @@ int migrate_compress_level(void);
 int migrate_compress_threads(void);
 int migrate_decompress_threads(void);
 bool migrate_use_events(void);
+bool migrate_postcopy_blocktime(void);
 
 /* Sending on the return path - generic and then for each message type */
 void migrate_send_rp_shut(MigrationIncomingState *mis,
diff --git a/qapi/migration.json b/qapi/migration.json
index 7f465a1..676ef06 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -354,12 +354,16 @@
 #
 # @x-multifd: Use more than one fd for migration (since 2.11)
 #
+# @postcopy-blocktime: Calculate downtime for postcopy live migration
+# (since 2.12)
+#
 # Since: 1.2
 ##
 { 'enum': 'MigrationCapability',
   'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram',
-   'block', 'return-path', 'pause-before-switchover', 'x-multifd' ] }
+   'block', 'return-path', 'pause-before-switchover', 'x-multifd',
+   'postcopy-blocktime' ] }
 
 ##
 # @MigrationCapabilityStatus:
-- 
2.7.4




[Qemu-devel] [PATCH v1 2/6] migration: add postcopy blocktime ctx into MigrationIncomingState

2018-03-13 Thread Alexey Perevalov
This patch adds request to kernel space for UFFD_FEATURE_THREAD_ID, in
case this feature is provided by kernel.

PostcopyBlocktimeContext is encapsulated inside postcopy-ram.c,
due to it being a postcopy-only feature.
Also it defines PostcopyBlocktimeContext's instance live time.
Information from PostcopyBlocktimeContext instance will be provided
much after postcopy migration end, instance of PostcopyBlocktimeContext
will live till QEMU exit, but part of it (vcpu_addr,
page_fault_vcpu_time) used only during calculation, will be released
when postcopy ended or failed.

To enable postcopy blocktime calculation on destination, need to
request proper compatibility (Patch for documentation will be at the
tail of the patch set).

As an example following command enable that capability, assume QEMU was
started with
-chardev socket,id=charmonitor,path=/var/lib/migrate-vm-monitor.sock
option to control it

[root@host]#printf "{\"execute\" : \"qmp_capabilities\"}\r\n \
{\"execute\": \"migrate-set-capabilities\" , \"arguments\":   {
\"capabilities\": [ { \"capability\": \"postcopy-blocktime\", \"state\":
true } ] } }" | nc -U /var/lib/migrate-vm-monitor.sock

Or just with HMP
(qemu) migrate_set_capability postcopy-blocktime on

Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
Reviewed-by: Juan Quintela <quint...@redhat.com>
Signed-off-by: Juan Quintela <quint...@redhat.com>
---
 migration/migration.h|  8 +++
 migration/postcopy-ram.c | 61 
 2 files changed, 69 insertions(+)

diff --git a/migration/migration.h b/migration/migration.h
index aa7a884..3ead619 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -22,6 +22,8 @@
 #include "hw/qdev.h"
 #include "io/channel.h"
 
+struct PostcopyBlocktimeContext;
+
 /* State for the incoming migration */
 struct MigrationIncomingState {
 QEMUFile *from_src_file;
@@ -61,6 +63,12 @@ struct MigrationIncomingState {
 /* The coroutine we should enter (back) after failover */
 Coroutine *migration_incoming_co;
 QemuSemaphore colo_incoming_sem;
+
+/*
+ * PostcopyBlocktimeContext to keep information for postcopy
+ * live migration, to calculate vCPU block time
+ * */
+struct PostcopyBlocktimeContext *blocktime_ctx;
 };
 
 MigrationIncomingState *migration_incoming_get_current(void);
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 032abfb..9144102 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -61,6 +61,54 @@ struct PostcopyDiscardState {
 #include 
 #include 
 
+typedef struct PostcopyBlocktimeContext {
+/* time when page fault initiated per vCPU */
+uint32_t *page_fault_vcpu_time;
+/* page address per vCPU */
+uintptr_t *vcpu_addr;
+uint32_t total_blocktime;
+/* blocktime per vCPU */
+uint32_t *vcpu_blocktime;
+/* point in time when last page fault was initiated */
+uint32_t last_begin;
+/* number of vCPU are suspended */
+int smp_cpus_down;
+uint64_t start_time;
+
+/*
+ * Handler for exit event, necessary for
+ * releasing whole blocktime_ctx
+ */
+Notifier exit_notifier;
+} PostcopyBlocktimeContext;
+
+static void destroy_blocktime_context(struct PostcopyBlocktimeContext *ctx)
+{
+g_free(ctx->page_fault_vcpu_time);
+g_free(ctx->vcpu_addr);
+g_free(ctx->vcpu_blocktime);
+g_free(ctx);
+}
+
+static void migration_exit_cb(Notifier *n, void *data)
+{
+PostcopyBlocktimeContext *ctx = container_of(n, PostcopyBlocktimeContext,
+ exit_notifier);
+destroy_blocktime_context(ctx);
+}
+
+static struct PostcopyBlocktimeContext *blocktime_context_new(void)
+{
+PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1);
+ctx->page_fault_vcpu_time = g_new0(uint32_t, smp_cpus);
+ctx->vcpu_addr = g_new0(uintptr_t, smp_cpus);
+ctx->vcpu_blocktime = g_new0(uint32_t, smp_cpus);
+
+ctx->exit_notifier.notify = migration_exit_cb;
+ctx->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+qemu_add_exit_notifier(>exit_notifier);
+return ctx;
+}
 
 /**
  * receive_ufd_features: check userfault fd features, to request only supported
@@ -153,6 +201,19 @@ static bool ufd_check_and_apply(int ufd, 
MigrationIncomingState *mis)
 }
 }
 
+#ifdef UFFD_FEATURE_THREAD_ID
+if (migrate_postcopy_blocktime() && mis &&
+UFFD_FEATURE_THREAD_ID & supported_features) {
+/* kernel supports that feature */
+/* don't create blocktime_context if it exists */
+if (!mis->blocktime_ctx) {
+mis->blocktime_ctx = blocktime_context_new();
+}
+
+asked_features |= UFFD_FEATURE_THREAD_ID;
+}
+#endif
+
 /*
  * request features, even if asked_features is 0, due to
  * kernel expects UFFD_API before UFFDIO_REGISTER, per
-- 
2.7.4




[Qemu-devel] [PATCH v1 5/6] migration: add blocktime calculation into migration-test

2018-03-13 Thread Alexey Perevalov
This patch just requests blocktime calculation,
and check it in case when UFFD_FEATURE_THREAD_ID feature is set
on the host.

Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
Reviewed-by: Juan Quintela <quint...@redhat.com>
Signed-off-by: Juan Quintela <quint...@redhat.com>
---
 tests/migration-test.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/tests/migration-test.c b/tests/migration-test.c
index 74f9361..259acfb 100644
--- a/tests/migration-test.c
+++ b/tests/migration-test.c
@@ -26,6 +26,7 @@
 const unsigned start_address = 1024 * 1024;
 const unsigned end_address = 100 * 1024 * 1024;
 bool got_stop;
+static bool uffd_feature_thread_id;
 
 #if defined(__linux__)
 #include 
@@ -55,6 +56,7 @@ static bool ufd_version_check(void)
 g_test_message("Skipping test: UFFDIO_API failed");
 return false;
 }
+uffd_feature_thread_id = api_struct.features & UFFD_FEATURE_THREAD_ID;
 
 ioctl_mask = (__u64)1 << _UFFDIO_REGISTER |
  (__u64)1 << _UFFDIO_UNREGISTER;
@@ -223,6 +225,16 @@ static uint64_t get_migration_pass(QTestState *who)
 return result;
 }
 
+static void read_blocktime(QTestState *who)
+{
+QDict *rsp, *rsp_return;
+
+rsp = wait_command(who, "{ 'execute': 'query-migrate' }");
+rsp_return = qdict_get_qdict(rsp, "return");
+g_assert(qdict_haskey(rsp_return, "postcopy-blocktime"));
+QDECREF(rsp);
+}
+
 static void wait_for_migration_complete(QTestState *who)
 {
 while (true) {
@@ -522,6 +534,7 @@ static void test_migrate(void)
 
 migrate_set_capability(from, "postcopy-ram", "true");
 migrate_set_capability(to, "postcopy-ram", "true");
+migrate_set_capability(to, "postcopy-blocktime", "true");
 
 /* We want to pick a speed slow enough that the test completes
  * quickly, but that it doesn't complete precopy even on a slow
@@ -548,6 +561,9 @@ static void test_migrate(void)
 wait_for_serial("dest_serial");
 wait_for_migration_complete(from);
 
+if (uffd_feature_thread_id) {
+read_blocktime(to);
+}
 g_free(uri);
 
 test_migrate_end(from, to, true);
-- 
2.7.4




[Qemu-devel] [PATCH v1 0/6] postcopy block time calculation + ppc32 build fix

2018-03-13 Thread Alexey Perevalov
This patch set includes patches which were reverted by commit
ee86981bd, due to build problem on 32 powerpc/arm architecture.
Also it includes patch to fix build
([PATCH v4] migration: change blocktime type to uint32_t), but that
patch was merged into:
migration: add postcopy blocktime ctx into MigrationIncomingState
migration: calculate vCPU blocktime on dst side
migration: add postcopy total blocktime into query-migrate

based on
commit 12c06d6f967a63515399b9e1f6a40f5ce871a8b7
Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging


Alexey Perevalov (6):
  migration: introduce postcopy-blocktime capability
  migration: add postcopy blocktime ctx into MigrationIncomingState
  migration: calculate vCPU blocktime on dst side
  migration: postcopy_blocktime documentation
  migration: add blocktime calculation into migration-test
  migration: add postcopy total blocktime into query-migrate

 docs/devel/migration.rst |  14 +++
 hmp.c|  15 +++
 migration/migration.c|  51 -
 migration/migration.h|  13 +++
 migration/postcopy-ram.c | 266 ++-
 migration/trace-events   |   6 +-
 qapi/migration.json  |  17 ++-
 tests/migration-test.c   |  16 +++
 8 files changed, 390 insertions(+), 8 deletions(-)

-- 
2.7.4




Re: [Qemu-devel] [PATCH v4] migration: change blocktime type to uint32_t

2018-03-12 Thread Alexey Perevalov

On 03/08/2018 03:59 PM, Dr. David Alan Gilbert wrote:

* Alexey Perevalov (a.pereva...@samsung.com) wrote:

Initially int64_t was used, but on PowerPC architecture,
clang doesn't have atomic_*_8 function, so it produces
link time error.

QEMU is working with time as with 64bit value, but by
fact 32 bit is enough with CLOCK_REALTIME. In this case
blocktime will keep only 1200 hours time interval.

Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
Acked-by: Eric Blake <ebl...@redhat.com>

Hi Alexey,
   So yes, I think that works;  can you repost this merged with your full
set of block-time code; because we had to revert it, we need to put it
back all  in again.

Do you mean just to add this patch to set of reverted patches,
or merge code of this patch into "migration: calculate vCPU blocktime on 
dst side"?




Thanks,

Dave


---
  hmp.c|  4 ++--
  migration/postcopy-ram.c | 52 
  migration/trace-events   |  4 ++--
  qapi/migration.json  |  4 ++--
  4 files changed, 36 insertions(+), 28 deletions(-)

diff --git a/hmp.c b/hmp.c
index be091e0..ec90043 100644
--- a/hmp.c
+++ b/hmp.c
@@ -267,7 +267,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
  }
  
  if (info->has_postcopy_blocktime) {

-monitor_printf(mon, "postcopy blocktime: %" PRId64 "\n",
+monitor_printf(mon, "postcopy blocktime: %u\n",
 info->postcopy_blocktime);
  }
  
@@ -275,7 +275,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)

  Visitor *v;
  char *str;
  v = string_output_visitor_new(false, );
-visit_type_int64List(v, NULL, >postcopy_vcpu_blocktime, NULL);
+visit_type_uint32List(v, NULL, >postcopy_vcpu_blocktime, NULL);
  visit_complete(v, );
  monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str);
  g_free(str);
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 05475e0..c46225c 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -63,16 +63,17 @@ struct PostcopyDiscardState {
  
  typedef struct PostcopyBlocktimeContext {

  /* time when page fault initiated per vCPU */
-int64_t *page_fault_vcpu_time;
+uint32_t *page_fault_vcpu_time;
  /* page address per vCPU */
  uintptr_t *vcpu_addr;
-int64_t total_blocktime;
+uint32_t total_blocktime;
  /* blocktime per vCPU */
-int64_t *vcpu_blocktime;
+uint32_t *vcpu_blocktime;
  /* point in time when last page fault was initiated */
-int64_t last_begin;
+uint32_t last_begin;
  /* number of vCPU are suspended */
  int smp_cpus_down;
+uint64_t start_time;
  
  /*

   * Handler for exit event, necessary for
@@ -99,22 +100,23 @@ static void migration_exit_cb(Notifier *n, void *data)
  static struct PostcopyBlocktimeContext *blocktime_context_new(void)
  {
  PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1);
-ctx->page_fault_vcpu_time = g_new0(int64_t, smp_cpus);
+ctx->page_fault_vcpu_time = g_new0(uint32_t, smp_cpus);
  ctx->vcpu_addr = g_new0(uintptr_t, smp_cpus);
-ctx->vcpu_blocktime = g_new0(int64_t, smp_cpus);
+ctx->vcpu_blocktime = g_new0(uint32_t, smp_cpus);
  
  ctx->exit_notifier.notify = migration_exit_cb;

+ctx->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
  qemu_add_exit_notifier(>exit_notifier);
  return ctx;
  }
  
-static int64List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx)

+static uint32List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx)
  {
-int64List *list = NULL, *entry = NULL;
+uint32List *list = NULL, *entry = NULL;
  int i;
  
  for (i = smp_cpus - 1; i >= 0; i--) {

-entry = g_new0(int64List, 1);
+entry = g_new0(uint32List, 1);
  entry->value = ctx->vcpu_blocktime[i];
  entry->next = list;
  list = entry;
@@ -145,7 +147,7 @@ void fill_destination_postcopy_migration_info(MigrationInfo 
*info)
  info->postcopy_vcpu_blocktime = get_vcpu_blocktime_list(bc);
  }
  
-static uint64_t get_postcopy_total_blocktime(void)

+static uint32_t get_postcopy_total_blocktime(void)
  {
  MigrationIncomingState *mis = migration_incoming_get_current();
  PostcopyBlocktimeContext *bc = mis->blocktime_ctx;
@@ -610,6 +612,13 @@ static int get_mem_fault_cpu_index(uint32_t pid)
  return -1;
  }
  
+static uint32_t get_low_time_offset(PostcopyBlocktimeContext *dc)

+{
+int64_t start_time_offset = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) -
+dc->start_time;
+return start_time_offset < 1 ? 1 : start_time_offset & UINT32_MAX;
+}
+
  /*
   * This function is being called when pagefault occurs. It
   * tracks down vCPU blocking time.

[Qemu-devel] [PATCH v4] Fix build on ppc platform in migration/postcopy-ram.c

2018-02-22 Thread Alexey Perevalov
V4->V3
- common helper was introduced and sanity check for
probable time jumps (comment from David)

V2->V3
- use UINT32_MAX instead of 0x (comment from Philippe)
- use lelative time to avoid milliseconds overflow in uint32
(comment from David)


V2->V1
This is a second version:
- comment from David about casting
David was right, I tried to find it in standard, but it was implicitly
described for me, so part of standard:

   1. When a value with integer type is converted to another integer
type other than _Bool, if the value can be represented by the new
type, it is unchanged.
   2. Otherwise, if the new type is unsigned, the value is converted
by repeatedly adding or subtracting one more than the maximum value
that can be represented in the new type until the value is in the
range of the new type.



Initial message:

It was a problem with 64 atomics on ppc in migration/postcopy-ram.c reported by
Philippe Mathieu-Daudé <f4...@amsat.org>.

Tested in Debian on qemu-system-ppc and in Ubuntu16.04 on i386.

This commit is based on commit afd3397a8149d8b645004e459bf2002d78f5e267
Merge remote-tracking branch 'remotes/stefanha/tags/tracing-pull-request' into 
staging
but with all necessary commit reverted in
ee86981bda9ecd40c8daf81b7307b1d2aff68174

Alexey Perevalov (1):
  migration: change blocktime type to uint32_t

 hmp.c|  4 ++--
 migration/postcopy-ram.c | 52 
 migration/trace-events   |  4 ++--
 qapi/migration.json  |  4 ++--
 4 files changed, 36 insertions(+), 28 deletions(-)

-- 
2.7.4




[Qemu-devel] [PATCH v4] migration: change blocktime type to uint32_t

2018-02-22 Thread Alexey Perevalov
Initially int64_t was used, but on PowerPC architecture,
clang doesn't have atomic_*_8 function, so it produces
link time error.

QEMU is working with time as with 64bit value, but by
fact 32 bit is enough with CLOCK_REALTIME. In this case
blocktime will keep only 1200 hours time interval.

Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
Acked-by: Eric Blake <ebl...@redhat.com>
---
 hmp.c|  4 ++--
 migration/postcopy-ram.c | 52 
 migration/trace-events   |  4 ++--
 qapi/migration.json  |  4 ++--
 4 files changed, 36 insertions(+), 28 deletions(-)

diff --git a/hmp.c b/hmp.c
index be091e0..ec90043 100644
--- a/hmp.c
+++ b/hmp.c
@@ -267,7 +267,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
 }
 
 if (info->has_postcopy_blocktime) {
-monitor_printf(mon, "postcopy blocktime: %" PRId64 "\n",
+monitor_printf(mon, "postcopy blocktime: %u\n",
info->postcopy_blocktime);
 }
 
@@ -275,7 +275,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
 Visitor *v;
 char *str;
 v = string_output_visitor_new(false, );
-visit_type_int64List(v, NULL, >postcopy_vcpu_blocktime, NULL);
+visit_type_uint32List(v, NULL, >postcopy_vcpu_blocktime, NULL);
 visit_complete(v, );
 monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str);
 g_free(str);
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 05475e0..c46225c 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -63,16 +63,17 @@ struct PostcopyDiscardState {
 
 typedef struct PostcopyBlocktimeContext {
 /* time when page fault initiated per vCPU */
-int64_t *page_fault_vcpu_time;
+uint32_t *page_fault_vcpu_time;
 /* page address per vCPU */
 uintptr_t *vcpu_addr;
-int64_t total_blocktime;
+uint32_t total_blocktime;
 /* blocktime per vCPU */
-int64_t *vcpu_blocktime;
+uint32_t *vcpu_blocktime;
 /* point in time when last page fault was initiated */
-int64_t last_begin;
+uint32_t last_begin;
 /* number of vCPU are suspended */
 int smp_cpus_down;
+uint64_t start_time;
 
 /*
  * Handler for exit event, necessary for
@@ -99,22 +100,23 @@ static void migration_exit_cb(Notifier *n, void *data)
 static struct PostcopyBlocktimeContext *blocktime_context_new(void)
 {
 PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1);
-ctx->page_fault_vcpu_time = g_new0(int64_t, smp_cpus);
+ctx->page_fault_vcpu_time = g_new0(uint32_t, smp_cpus);
 ctx->vcpu_addr = g_new0(uintptr_t, smp_cpus);
-ctx->vcpu_blocktime = g_new0(int64_t, smp_cpus);
+ctx->vcpu_blocktime = g_new0(uint32_t, smp_cpus);
 
 ctx->exit_notifier.notify = migration_exit_cb;
+ctx->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
 qemu_add_exit_notifier(>exit_notifier);
 return ctx;
 }
 
-static int64List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx)
+static uint32List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx)
 {
-int64List *list = NULL, *entry = NULL;
+uint32List *list = NULL, *entry = NULL;
 int i;
 
 for (i = smp_cpus - 1; i >= 0; i--) {
-entry = g_new0(int64List, 1);
+entry = g_new0(uint32List, 1);
 entry->value = ctx->vcpu_blocktime[i];
 entry->next = list;
 list = entry;
@@ -145,7 +147,7 @@ void fill_destination_postcopy_migration_info(MigrationInfo 
*info)
 info->postcopy_vcpu_blocktime = get_vcpu_blocktime_list(bc);
 }
 
-static uint64_t get_postcopy_total_blocktime(void)
+static uint32_t get_postcopy_total_blocktime(void)
 {
 MigrationIncomingState *mis = migration_incoming_get_current();
 PostcopyBlocktimeContext *bc = mis->blocktime_ctx;
@@ -610,6 +612,13 @@ static int get_mem_fault_cpu_index(uint32_t pid)
 return -1;
 }
 
+static uint32_t get_low_time_offset(PostcopyBlocktimeContext *dc)
+{
+int64_t start_time_offset = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) -
+dc->start_time;
+return start_time_offset < 1 ? 1 : start_time_offset & UINT32_MAX;
+}
+
 /*
  * This function is being called when pagefault occurs. It
  * tracks down vCPU blocking time.
@@ -624,7 +633,7 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, 
uint32_t ptid,
 int cpu, already_received;
 MigrationIncomingState *mis = migration_incoming_get_current();
 PostcopyBlocktimeContext *dc = mis->blocktime_ctx;
-int64_t now_ms;
+uint32_t low_time_offset;
 
 if (!dc || ptid == 0) {
 return;
@@ -634,14 +643,14 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, 
uint32_t ptid,
 return;
 }
 
-now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);

[Qemu-devel] [PATCH v3] migration: change blocktime type to uint32_t

2018-02-16 Thread Alexey Perevalov
Initially int64_t was used, but on PowerPC architecture,
clang doesn't have atomic_*_8 function, so it produces
link time error.

QEMU is working with time as with 64bit value, but by
fact 32 bit is enough with CLOCK_REALTIME. In this case
blocktime will keep only 1200 hours time interval.

Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
Acked-by: Eric Blake <ebl...@redhat.com>
---
 hmp.c|  4 ++--
 migration/postcopy-ram.c | 48 +++-
 migration/trace-events   |  4 ++--
 qapi/migration.json  |  4 ++--
 4 files changed, 33 insertions(+), 27 deletions(-)

diff --git a/hmp.c b/hmp.c
index c6bab53..3c376b3 100644
--- a/hmp.c
+++ b/hmp.c
@@ -265,7 +265,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
 }
 
 if (info->has_postcopy_blocktime) {
-monitor_printf(mon, "postcopy blocktime: %" PRId64 "\n",
+monitor_printf(mon, "postcopy blocktime: %u\n",
info->postcopy_blocktime);
 }
 
@@ -273,7 +273,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
 Visitor *v;
 char *str;
 v = string_output_visitor_new(false, );
-visit_type_int64List(v, NULL, >postcopy_vcpu_blocktime, NULL);
+visit_type_uint32List(v, NULL, >postcopy_vcpu_blocktime, NULL);
 visit_complete(v, );
 monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str);
 g_free(str);
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 7814da5..6694fd3 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -63,16 +63,17 @@ struct PostcopyDiscardState {
 
 typedef struct PostcopyBlocktimeContext {
 /* time when page fault initiated per vCPU */
-int64_t *page_fault_vcpu_time;
+uint32_t *page_fault_vcpu_time;
 /* page address per vCPU */
 uintptr_t *vcpu_addr;
-int64_t total_blocktime;
+uint32_t total_blocktime;
 /* blocktime per vCPU */
-int64_t *vcpu_blocktime;
+uint32_t *vcpu_blocktime;
 /* point in time when last page fault was initiated */
-int64_t last_begin;
+uint32_t last_begin;
 /* number of vCPU are suspended */
 int smp_cpus_down;
+uint64_t start_time;
 
 /*
  * Handler for exit event, necessary for
@@ -99,22 +100,23 @@ static void migration_exit_cb(Notifier *n, void *data)
 static struct PostcopyBlocktimeContext *blocktime_context_new(void)
 {
 PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1);
-ctx->page_fault_vcpu_time = g_new0(int64_t, smp_cpus);
+ctx->page_fault_vcpu_time = g_new0(uint32_t, smp_cpus);
 ctx->vcpu_addr = g_new0(uintptr_t, smp_cpus);
-ctx->vcpu_blocktime = g_new0(int64_t, smp_cpus);
+ctx->vcpu_blocktime = g_new0(uint32_t, smp_cpus);
 
 ctx->exit_notifier.notify = migration_exit_cb;
+ctx->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
 qemu_add_exit_notifier(>exit_notifier);
 return ctx;
 }
 
-static int64List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx)
+static uint32List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx)
 {
-int64List *list = NULL, *entry = NULL;
+uint32List *list = NULL, *entry = NULL;
 int i;
 
 for (i = smp_cpus - 1; i >= 0; i--) {
-entry = g_new0(int64List, 1);
+entry = g_new0(uint32List, 1);
 entry->value = ctx->vcpu_blocktime[i];
 entry->next = list;
 list = entry;
@@ -145,7 +147,7 @@ void fill_destination_postcopy_migration_info(MigrationInfo 
*info)
 info->postcopy_vcpu_blocktime = get_vcpu_blocktime_list(bc);
 }
 
-static uint64_t get_postcopy_total_blocktime(void)
+static uint32_t get_postcopy_total_blocktime(void)
 {
 MigrationIncomingState *mis = migration_incoming_get_current();
 PostcopyBlocktimeContext *bc = mis->blocktime_ctx;
@@ -633,7 +635,8 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, 
uint32_t ptid,
 int cpu, already_received;
 MigrationIncomingState *mis = migration_incoming_get_current();
 PostcopyBlocktimeContext *dc = mis->blocktime_ctx;
-int64_t now_ms;
+int64_t start_time_offset;
+uint32_t low_time_offset;
 
 if (!dc || ptid == 0) {
 return;
@@ -643,14 +646,15 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, 
uint32_t ptid,
 return;
 }
 
-now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+start_time_offset = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - 
dc->start_time;
+low_time_offset = start_time_offset & UINT32_MAX;
 if (dc->vcpu_addr[cpu] == 0) {
 atomic_inc(>smp_cpus_down);
 }
 
-atomic_xchg__nocheck(>last_begin, now_ms);
-atomic_xchg__nocheck(>page_fault_vcpu_time[cpu], now_ms);
-atomic_xchg__nocheck(>vcpu_addr[cpu], addr);
+atomic_xchg(>last_begin, low_time_offset);
+atomic_xc

[Qemu-devel] [PATCH v3] Fix build on ppc platform in migration/postcopy-ram.c

2018-02-16 Thread Alexey Perevalov
V2->V3
- use UINT32_MAX instead of 0x (comment from Philippe)
- use lelative time to avoid milliseconds overflow in uint32
(comment from David)


V2->V1
This is a second version:
- comment from David about casting
David was right, I tried to find it in standard, but it was implicitly
described for me, so part of standard:

   1. When a value with integer type is converted to another integer
type other than _Bool, if the value can be represented by the new
type, it is unchanged.
   2. Otherwise, if the new type is unsigned, the value is converted
by repeatedly adding or subtracting one more than the maximum value
that can be represented in the new type until the value is in the
range of the new type.



Initial message:

It was a problem with 64 atomics on ppc in migration/postcopy-ram.c reported by
Philippe Mathieu-Daudé <f4...@amsat.org>.

Tested in Debian on qemu-system-ppc and in Ubuntu16.04 on i386.

This commit is based on commit ee264eb32c14f076c964fc34ee66f6f95cce2080
"Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.12-20180121' into 
staging"

Alexey Perevalov (1):
  migration: change blocktime type to uint32_t

 hmp.c|  4 ++--
 migration/postcopy-ram.c | 48 +++-
 migration/trace-events   |  4 ++--
 qapi/migration.json  |  4 ++--
 4 files changed, 33 insertions(+), 27 deletions(-)

-- 
2.7.4




Re: [Qemu-devel] [PATCH v1] migration: change blocktime type to uint32_t

2018-01-28 Thread Alexey Perevalov

On 01/25/2018 11:02 PM, Dr. David Alan Gilbert wrote:

* Alexey Perevalov (a.pereva...@samsung.com) wrote:

Initially int64_t was used, but on PowerPC architecture,
clang doesn't have atomic_*_8 function, so it produces
link time error.

QEMU is working with time as with 64bit value, but by
fact 32 bit is enough with CLOCK_REALTIME. In this case
blocktime will keep only 1200 hours time interval.

Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
  hmp.c|  4 ++--
  migration/postcopy-ram.c | 47 ++-
  migration/trace-events   |  4 ++--
  qapi/migration.json  |  4 ++--
  4 files changed, 36 insertions(+), 23 deletions(-)

diff --git a/hmp.c b/hmp.c
index c6bab53..3c376b3 100644
--- a/hmp.c
+++ b/hmp.c
@@ -265,7 +265,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
  }
  
  if (info->has_postcopy_blocktime) {

-monitor_printf(mon, "postcopy blocktime: %" PRId64 "\n",
+monitor_printf(mon, "postcopy blocktime: %u\n",
 info->postcopy_blocktime);
  }
  
@@ -273,7 +273,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)

  Visitor *v;
  char *str;
  v = string_output_visitor_new(false, );
-visit_type_int64List(v, NULL, >postcopy_vcpu_blocktime, NULL);
+visit_type_uint32List(v, NULL, >postcopy_vcpu_blocktime, NULL);
  visit_complete(v, );
  monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str);
  g_free(str);
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 7814da5..ce91de8 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -63,14 +63,14 @@ struct PostcopyDiscardState {
  
  typedef struct PostcopyBlocktimeContext {

  /* time when page fault initiated per vCPU */
-int64_t *page_fault_vcpu_time;
+uint32_t *page_fault_vcpu_time;
  /* page address per vCPU */
  uintptr_t *vcpu_addr;
-int64_t total_blocktime;
+uint32_t total_blocktime;
  /* blocktime per vCPU */
-int64_t *vcpu_blocktime;
+uint32_t *vcpu_blocktime;
  /* point in time when last page fault was initiated */
-int64_t last_begin;
+uint32_t last_begin;
  /* number of vCPU are suspended */
  int smp_cpus_down;
  
@@ -99,22 +99,22 @@ static void migration_exit_cb(Notifier *n, void *data)

  static struct PostcopyBlocktimeContext *blocktime_context_new(void)
  {
  PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1);
-ctx->page_fault_vcpu_time = g_new0(int64_t, smp_cpus);
+ctx->page_fault_vcpu_time = g_new0(uint32_t, smp_cpus);
  ctx->vcpu_addr = g_new0(uintptr_t, smp_cpus);
-ctx->vcpu_blocktime = g_new0(int64_t, smp_cpus);
+ctx->vcpu_blocktime = g_new0(uint32_t, smp_cpus);
  
  ctx->exit_notifier.notify = migration_exit_cb;

  qemu_add_exit_notifier(>exit_notifier);
  return ctx;
  }
  
-static int64List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx)

+static uint32List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx)
  {
-int64List *list = NULL, *entry = NULL;
+uint32List *list = NULL, *entry = NULL;
  int i;
  
  for (i = smp_cpus - 1; i >= 0; i--) {

-entry = g_new0(int64List, 1);
+entry = g_new0(uint32List, 1);
  entry->value = ctx->vcpu_blocktime[i];
  entry->next = list;
  list = entry;
@@ -145,7 +145,7 @@ void fill_destination_postcopy_migration_info(MigrationInfo 
*info)
  info->postcopy_vcpu_blocktime = get_vcpu_blocktime_list(bc);
  }
  
-static uint64_t get_postcopy_total_blocktime(void)

+static uint32_t get_postcopy_total_blocktime(void)
  {
  MigrationIncomingState *mis = migration_incoming_get_current();
  PostcopyBlocktimeContext *bc = mis->blocktime_ctx;
@@ -619,6 +619,16 @@ static int get_mem_fault_cpu_index(uint32_t pid)
  return -1;
  }
  
+static uint32_t get_least_significant_part(int64_t value)

+{
+unsigned char *t = (unsigned char *)
+#if defined(HOST_WORDS_BIGENDIAN)
+return t[4] << 24 | t[5] << 16 | t[6] << 8 | t[7] << 0;
+#else
+return t[0] << 0 | t[1] << 8 | t[2] << 16 | t[3] << 24;
+#endif /* HOST_WORDS_BIGENDIAN */
+}

This doesn't feel right.
Firstly, we're doing a check for the magic value of read_vcpu_time==0 in
mark_postcopy_blocktime_end - so we have to be careful not to hit it.
Just masking the bottom 32bits of time means we've got a (rare) chance
of hitting that; but we've got a much less rare change of hitting
the case where one of the measurements happens after the roll-over
of the bottom 32bits.
If you stored a time at the start of the postcopy and just
subtracted that from 'now' you're probably OK though.

Here not so clearly for me.
I though we get some "now" and it doesn't matter how,
anding or shi

Re: [Qemu-devel] [PATCH v2] migration: change blocktime type to uint32_t

2018-01-28 Thread Alexey Perevalov

On 01/26/2018 09:14 PM, Dr. David Alan Gilbert wrote:

* Alexey Perevalov (a.pereva...@samsung.com) wrote:

Initially int64_t was used, but on PowerPC architecture,
clang doesn't have atomic_*_8 function, so it produces
link time error.

QEMU is working with time as with 64bit value, but by
fact 32 bit is enough with CLOCK_REALTIME. In this case
blocktime will keep only 1200 hours time interval.

Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
Acked-by: Eric Blake <ebl...@redhat.com>
---
  hmp.c|  4 ++--
  migration/postcopy-ram.c | 37 -
  migration/trace-events   |  4 ++--
  qapi/migration.json  |  4 ++--
  4 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/hmp.c b/hmp.c
index c6bab53..3c376b3 100644
--- a/hmp.c
+++ b/hmp.c
@@ -265,7 +265,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
  }
  
  if (info->has_postcopy_blocktime) {

-monitor_printf(mon, "postcopy blocktime: %" PRId64 "\n",
+monitor_printf(mon, "postcopy blocktime: %u\n",
 info->postcopy_blocktime);
  }
  
@@ -273,7 +273,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)

  Visitor *v;
  char *str;
  v = string_output_visitor_new(false, );
-visit_type_int64List(v, NULL, >postcopy_vcpu_blocktime, NULL);
+visit_type_uint32List(v, NULL, >postcopy_vcpu_blocktime, NULL);
  visit_complete(v, );
  monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str);
  g_free(str);
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 7814da5..bd08c24 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -63,14 +63,14 @@ struct PostcopyDiscardState {
  
  typedef struct PostcopyBlocktimeContext {

  /* time when page fault initiated per vCPU */
-int64_t *page_fault_vcpu_time;
+uint32_t *page_fault_vcpu_time;
  /* page address per vCPU */
  uintptr_t *vcpu_addr;
-int64_t total_blocktime;
+uint32_t total_blocktime;
  /* blocktime per vCPU */
-int64_t *vcpu_blocktime;
+uint32_t *vcpu_blocktime;
  /* point in time when last page fault was initiated */
-int64_t last_begin;
+uint32_t last_begin;
  /* number of vCPU are suspended */
  int smp_cpus_down;
  
@@ -99,22 +99,22 @@ static void migration_exit_cb(Notifier *n, void *data)

  static struct PostcopyBlocktimeContext *blocktime_context_new(void)
  {
  PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1);
-ctx->page_fault_vcpu_time = g_new0(int64_t, smp_cpus);
+ctx->page_fault_vcpu_time = g_new0(uint32_t, smp_cpus);
  ctx->vcpu_addr = g_new0(uintptr_t, smp_cpus);
-ctx->vcpu_blocktime = g_new0(int64_t, smp_cpus);
+ctx->vcpu_blocktime = g_new0(uint32_t, smp_cpus);
  
  ctx->exit_notifier.notify = migration_exit_cb;

  qemu_add_exit_notifier(>exit_notifier);
  return ctx;
  }
  
-static int64List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx)

+static uint32List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx)
  {
-int64List *list = NULL, *entry = NULL;
+uint32List *list = NULL, *entry = NULL;
  int i;
  
  for (i = smp_cpus - 1; i >= 0; i--) {

-entry = g_new0(int64List, 1);
+entry = g_new0(uint32List, 1);
  entry->value = ctx->vcpu_blocktime[i];
  entry->next = list;
  list = entry;
@@ -145,7 +145,7 @@ void fill_destination_postcopy_migration_info(MigrationInfo 
*info)
  info->postcopy_vcpu_blocktime = get_vcpu_blocktime_list(bc);
  }
  
-static uint64_t get_postcopy_total_blocktime(void)

+static uint32_t get_postcopy_total_blocktime(void)
  {
  MigrationIncomingState *mis = migration_incoming_get_current();
  PostcopyBlocktimeContext *bc = mis->blocktime_ctx;
@@ -634,6 +634,7 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, 
uint32_t ptid,
  MigrationIncomingState *mis = migration_incoming_get_current();
  PostcopyBlocktimeContext *dc = mis->blocktime_ctx;
  int64_t now_ms;
+uint32_t least_now;
  
  if (!dc || ptid == 0) {

  return;
@@ -644,13 +645,14 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, 
uint32_t ptid,
  }
  
  now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);

+least_now = (uint32_t)now_ms;
  if (dc->vcpu_addr[cpu] == 0) {
  atomic_inc(>smp_cpus_down);
  }
  
-atomic_xchg__nocheck(>last_begin, now_ms);

-atomic_xchg__nocheck(>page_fault_vcpu_time[cpu], now_ms);
-atomic_xchg__nocheck(>vcpu_addr[cpu], addr);
+atomic_xchg(>last_begin, least_now);
+atomic_xchg(>page_fault_vcpu_time[cpu], least_now);
+atomic_xchg(>vcpu_addr[cpu], addr);
  
  /* check it here, not at the begining of the function,

   * due to, ch

[Qemu-devel] [PATCH v2] Fix build on ppc platform in migration/postcopy-ram.c

2018-01-26 Thread Alexey Perevalov
This is a second version:
- comment from David about casting
David was right, I tried to find it in standard, but it was implicitly
described for me, so part of standard:

   1. When a value with integer type is converted to another integer
type other than _Bool, if the value can be represented by the new
type, it is unchanged.
   2. Otherwise, if the new type is unsigned, the value is converted
by repeatedly adding or subtracting one more than the maximum value
that can be represented in the new type until the value is in the
range of the new type.



Initial message:

It was a problem with 64 atomics on ppc in migration/postcopy-ram.c reported by
Philippe Mathieu-Daudé <f4...@amsat.org>.

Tested in Debian on qemu-system-ppc and in Ubuntu16.04 on i386.

This commit is based on commit ee264eb32c14f076c964fc34ee66f6f95cce2080
"Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.12-20180121' into 
staging"

Alexey Perevalov (1):
  migration: change blocktime type to uint32_t

 hmp.c|  4 ++--
 migration/postcopy-ram.c | 37 -
 migration/trace-events   |  4 ++--
 qapi/migration.json  |  4 ++--
 4 files changed, 26 insertions(+), 23 deletions(-)

-- 
2.7.4




Re: [Qemu-devel] [PATCH v2] migration: change blocktime type to uint32_t

2018-01-26 Thread Alexey Perevalov

On 01/26/2018 07:13 PM, Philippe Mathieu-Daudé wrote:

Hi Alexey,

On 01/26/2018 01:05 PM, Alexey Perevalov wrote:

Initially int64_t was used, but on PowerPC architecture,
clang doesn't have atomic_*_8 function, so it produces
link time error.

QEMU is working with time as with 64bit value, but by
fact 32 bit is enough with CLOCK_REALTIME. In this case
blocktime will keep only 1200 hours time interval.

Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
Acked-by: Eric Blake <ebl...@redhat.com>
---
  hmp.c|  4 ++--
  migration/postcopy-ram.c | 37 -
  migration/trace-events   |  4 ++--
  qapi/migration.json  |  4 ++--
  4 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/hmp.c b/hmp.c
index c6bab53..3c376b3 100644
--- a/hmp.c
+++ b/hmp.c
@@ -265,7 +265,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
  }
  
  if (info->has_postcopy_blocktime) {

-monitor_printf(mon, "postcopy blocktime: %" PRId64 "\n",
+monitor_printf(mon, "postcopy blocktime: %u\n",
 info->postcopy_blocktime);
  }
  
@@ -273,7 +273,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)

  Visitor *v;
  char *str;
  v = string_output_visitor_new(false, );
-visit_type_int64List(v, NULL, >postcopy_vcpu_blocktime, NULL);
+visit_type_uint32List(v, NULL, >postcopy_vcpu_blocktime, NULL);
  visit_complete(v, );
  monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str);
  g_free(str);
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 7814da5..bd08c24 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -63,14 +63,14 @@ struct PostcopyDiscardState {
  
  typedef struct PostcopyBlocktimeContext {

  /* time when page fault initiated per vCPU */
-int64_t *page_fault_vcpu_time;
+uint32_t *page_fault_vcpu_time;
  /* page address per vCPU */
  uintptr_t *vcpu_addr;
-int64_t total_blocktime;
+uint32_t total_blocktime;
  /* blocktime per vCPU */
-int64_t *vcpu_blocktime;
+uint32_t *vcpu_blocktime;
  /* point in time when last page fault was initiated */
-int64_t last_begin;
+uint32_t last_begin;
  /* number of vCPU are suspended */
  int smp_cpus_down;
  
@@ -99,22 +99,22 @@ static void migration_exit_cb(Notifier *n, void *data)

  static struct PostcopyBlocktimeContext *blocktime_context_new(void)
  {
  PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1);
-ctx->page_fault_vcpu_time = g_new0(int64_t, smp_cpus);
+ctx->page_fault_vcpu_time = g_new0(uint32_t, smp_cpus);
  ctx->vcpu_addr = g_new0(uintptr_t, smp_cpus);
-ctx->vcpu_blocktime = g_new0(int64_t, smp_cpus);
+ctx->vcpu_blocktime = g_new0(uint32_t, smp_cpus);
  
  ctx->exit_notifier.notify = migration_exit_cb;

  qemu_add_exit_notifier(>exit_notifier);
  return ctx;
  }
  
-static int64List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx)

+static uint32List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx)
  {
-int64List *list = NULL, *entry = NULL;
+uint32List *list = NULL, *entry = NULL;
  int i;
  
  for (i = smp_cpus - 1; i >= 0; i--) {

-entry = g_new0(int64List, 1);
+entry = g_new0(uint32List, 1);
  entry->value = ctx->vcpu_blocktime[i];
  entry->next = list;
  list = entry;
@@ -145,7 +145,7 @@ void fill_destination_postcopy_migration_info(MigrationInfo 
*info)
  info->postcopy_vcpu_blocktime = get_vcpu_blocktime_list(bc);
  }
  
-static uint64_t get_postcopy_total_blocktime(void)

+static uint32_t get_postcopy_total_blocktime(void)
  {
  MigrationIncomingState *mis = migration_incoming_get_current();
  PostcopyBlocktimeContext *bc = mis->blocktime_ctx;
@@ -634,6 +634,7 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, 
uint32_t ptid,
  MigrationIncomingState *mis = migration_incoming_get_current();
  PostcopyBlocktimeContext *dc = mis->blocktime_ctx;
  int64_t now_ms;
+uint32_t least_now;
  
  if (!dc || ptid == 0) {

  return;
@@ -644,13 +645,14 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, 
uint32_t ptid,
  }
  
  now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);

+least_now = (uint32_t)now_ms;
  if (dc->vcpu_addr[cpu] == 0) {
  atomic_inc(>smp_cpus_down);
  }
  
-atomic_xchg__nocheck(>last_begin, now_ms);

-atomic_xchg__nocheck(>page_fault_vcpu_time[cpu], now_ms);
-atomic_xchg__nocheck(>vcpu_addr[cpu], addr);
+atomic_xchg(>last_begin, least_now);
+atomic_xchg(>page_fault_vcpu_time[cpu], least_now);
+atomic_xchg(>vcpu_addr[cpu], addr);
  
  /* check it here, not at the begining of the function,

   * due to, ch

[Qemu-devel] [PATCH v2] migration: change blocktime type to uint32_t

2018-01-26 Thread Alexey Perevalov
Initially int64_t was used, but on PowerPC architecture,
clang doesn't have atomic_*_8 function, so it produces
link time error.

QEMU is working with time as with 64bit value, but by
fact 32 bit is enough with CLOCK_REALTIME. In this case
blocktime will keep only 1200 hours time interval.

Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
Acked-by: Eric Blake <ebl...@redhat.com>
---
 hmp.c|  4 ++--
 migration/postcopy-ram.c | 37 -
 migration/trace-events   |  4 ++--
 qapi/migration.json  |  4 ++--
 4 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/hmp.c b/hmp.c
index c6bab53..3c376b3 100644
--- a/hmp.c
+++ b/hmp.c
@@ -265,7 +265,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
 }
 
 if (info->has_postcopy_blocktime) {
-monitor_printf(mon, "postcopy blocktime: %" PRId64 "\n",
+monitor_printf(mon, "postcopy blocktime: %u\n",
info->postcopy_blocktime);
 }
 
@@ -273,7 +273,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
 Visitor *v;
 char *str;
 v = string_output_visitor_new(false, );
-visit_type_int64List(v, NULL, >postcopy_vcpu_blocktime, NULL);
+visit_type_uint32List(v, NULL, >postcopy_vcpu_blocktime, NULL);
 visit_complete(v, );
 monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str);
 g_free(str);
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 7814da5..bd08c24 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -63,14 +63,14 @@ struct PostcopyDiscardState {
 
 typedef struct PostcopyBlocktimeContext {
 /* time when page fault initiated per vCPU */
-int64_t *page_fault_vcpu_time;
+uint32_t *page_fault_vcpu_time;
 /* page address per vCPU */
 uintptr_t *vcpu_addr;
-int64_t total_blocktime;
+uint32_t total_blocktime;
 /* blocktime per vCPU */
-int64_t *vcpu_blocktime;
+uint32_t *vcpu_blocktime;
 /* point in time when last page fault was initiated */
-int64_t last_begin;
+uint32_t last_begin;
 /* number of vCPU are suspended */
 int smp_cpus_down;
 
@@ -99,22 +99,22 @@ static void migration_exit_cb(Notifier *n, void *data)
 static struct PostcopyBlocktimeContext *blocktime_context_new(void)
 {
 PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1);
-ctx->page_fault_vcpu_time = g_new0(int64_t, smp_cpus);
+ctx->page_fault_vcpu_time = g_new0(uint32_t, smp_cpus);
 ctx->vcpu_addr = g_new0(uintptr_t, smp_cpus);
-ctx->vcpu_blocktime = g_new0(int64_t, smp_cpus);
+ctx->vcpu_blocktime = g_new0(uint32_t, smp_cpus);
 
 ctx->exit_notifier.notify = migration_exit_cb;
 qemu_add_exit_notifier(>exit_notifier);
 return ctx;
 }
 
-static int64List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx)
+static uint32List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx)
 {
-int64List *list = NULL, *entry = NULL;
+uint32List *list = NULL, *entry = NULL;
 int i;
 
 for (i = smp_cpus - 1; i >= 0; i--) {
-entry = g_new0(int64List, 1);
+entry = g_new0(uint32List, 1);
 entry->value = ctx->vcpu_blocktime[i];
 entry->next = list;
 list = entry;
@@ -145,7 +145,7 @@ void fill_destination_postcopy_migration_info(MigrationInfo 
*info)
 info->postcopy_vcpu_blocktime = get_vcpu_blocktime_list(bc);
 }
 
-static uint64_t get_postcopy_total_blocktime(void)
+static uint32_t get_postcopy_total_blocktime(void)
 {
 MigrationIncomingState *mis = migration_incoming_get_current();
 PostcopyBlocktimeContext *bc = mis->blocktime_ctx;
@@ -634,6 +634,7 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, 
uint32_t ptid,
 MigrationIncomingState *mis = migration_incoming_get_current();
 PostcopyBlocktimeContext *dc = mis->blocktime_ctx;
 int64_t now_ms;
+uint32_t least_now;
 
 if (!dc || ptid == 0) {
 return;
@@ -644,13 +645,14 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, 
uint32_t ptid,
 }
 
 now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+least_now = (uint32_t)now_ms;
 if (dc->vcpu_addr[cpu] == 0) {
 atomic_inc(>smp_cpus_down);
 }
 
-atomic_xchg__nocheck(>last_begin, now_ms);
-atomic_xchg__nocheck(>page_fault_vcpu_time[cpu], now_ms);
-atomic_xchg__nocheck(>vcpu_addr[cpu], addr);
+atomic_xchg(>last_begin, least_now);
+atomic_xchg(>page_fault_vcpu_time[cpu], least_now);
+atomic_xchg(>vcpu_addr[cpu], addr);
 
 /* check it here, not at the begining of the function,
  * due to, check could accur early than bitmap_set in
@@ -699,20 +701,21 @@ static void mark_postcopy_blocktime_end(uintptr_t addr)
 int i, affected_cpu = 0;
 int64_t now_ms;
 bool vcp

[Qemu-devel] [PATCH v1] migration: change blocktime type to uint32_t

2018-01-25 Thread Alexey Perevalov
Initially int64_t was used, but on PowerPC architecture,
clang doesn't have atomic_*_8 function, so it produces
link time error.

QEMU is working with time as with 64bit value, but by
fact 32 bit is enough with CLOCK_REALTIME. In this case
blocktime will keep only 1200 hours time interval.

Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 hmp.c|  4 ++--
 migration/postcopy-ram.c | 47 ++-
 migration/trace-events   |  4 ++--
 qapi/migration.json  |  4 ++--
 4 files changed, 36 insertions(+), 23 deletions(-)

diff --git a/hmp.c b/hmp.c
index c6bab53..3c376b3 100644
--- a/hmp.c
+++ b/hmp.c
@@ -265,7 +265,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
 }
 
 if (info->has_postcopy_blocktime) {
-monitor_printf(mon, "postcopy blocktime: %" PRId64 "\n",
+monitor_printf(mon, "postcopy blocktime: %u\n",
info->postcopy_blocktime);
 }
 
@@ -273,7 +273,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
 Visitor *v;
 char *str;
 v = string_output_visitor_new(false, );
-visit_type_int64List(v, NULL, >postcopy_vcpu_blocktime, NULL);
+visit_type_uint32List(v, NULL, >postcopy_vcpu_blocktime, NULL);
 visit_complete(v, );
 monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str);
 g_free(str);
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 7814da5..ce91de8 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -63,14 +63,14 @@ struct PostcopyDiscardState {
 
 typedef struct PostcopyBlocktimeContext {
 /* time when page fault initiated per vCPU */
-int64_t *page_fault_vcpu_time;
+uint32_t *page_fault_vcpu_time;
 /* page address per vCPU */
 uintptr_t *vcpu_addr;
-int64_t total_blocktime;
+uint32_t total_blocktime;
 /* blocktime per vCPU */
-int64_t *vcpu_blocktime;
+uint32_t *vcpu_blocktime;
 /* point in time when last page fault was initiated */
-int64_t last_begin;
+uint32_t last_begin;
 /* number of vCPU are suspended */
 int smp_cpus_down;
 
@@ -99,22 +99,22 @@ static void migration_exit_cb(Notifier *n, void *data)
 static struct PostcopyBlocktimeContext *blocktime_context_new(void)
 {
 PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1);
-ctx->page_fault_vcpu_time = g_new0(int64_t, smp_cpus);
+ctx->page_fault_vcpu_time = g_new0(uint32_t, smp_cpus);
 ctx->vcpu_addr = g_new0(uintptr_t, smp_cpus);
-ctx->vcpu_blocktime = g_new0(int64_t, smp_cpus);
+ctx->vcpu_blocktime = g_new0(uint32_t, smp_cpus);
 
 ctx->exit_notifier.notify = migration_exit_cb;
 qemu_add_exit_notifier(>exit_notifier);
 return ctx;
 }
 
-static int64List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx)
+static uint32List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx)
 {
-int64List *list = NULL, *entry = NULL;
+uint32List *list = NULL, *entry = NULL;
 int i;
 
 for (i = smp_cpus - 1; i >= 0; i--) {
-entry = g_new0(int64List, 1);
+entry = g_new0(uint32List, 1);
 entry->value = ctx->vcpu_blocktime[i];
 entry->next = list;
 list = entry;
@@ -145,7 +145,7 @@ void fill_destination_postcopy_migration_info(MigrationInfo 
*info)
 info->postcopy_vcpu_blocktime = get_vcpu_blocktime_list(bc);
 }
 
-static uint64_t get_postcopy_total_blocktime(void)
+static uint32_t get_postcopy_total_blocktime(void)
 {
 MigrationIncomingState *mis = migration_incoming_get_current();
 PostcopyBlocktimeContext *bc = mis->blocktime_ctx;
@@ -619,6 +619,16 @@ static int get_mem_fault_cpu_index(uint32_t pid)
 return -1;
 }
 
+static uint32_t get_least_significant_part(int64_t value)
+{
+unsigned char *t = (unsigned char *)
+#if defined(HOST_WORDS_BIGENDIAN)
+return t[4] << 24 | t[5] << 16 | t[6] << 8 | t[7] << 0;
+#else
+return t[0] << 0 | t[1] << 8 | t[2] << 16 | t[3] << 24;
+#endif /* HOST_WORDS_BIGENDIAN */
+}
+
 /*
  * This function is being called when pagefault occurs. It
  * tracks down vCPU blocking time.
@@ -634,6 +644,7 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, 
uint32_t ptid,
 MigrationIncomingState *mis = migration_incoming_get_current();
 PostcopyBlocktimeContext *dc = mis->blocktime_ctx;
 int64_t now_ms;
+uint32_t least_now;
 
 if (!dc || ptid == 0) {
 return;
@@ -644,13 +655,14 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, 
uint32_t ptid,
 }
 
 now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+least_now = get_least_significant_part(now_ms);
 if (dc->vcpu_addr[cpu] == 0) {
 atomic_inc(>smp_cpus_down);
 }
 
-atomic_xchg__nocheck(>last_begin, now_ms);
-atomic_xc

[Qemu-devel] [PATCH v1] Fix build on ppc platform in migration/postcopy-ram.c

2018-01-25 Thread Alexey Perevalov
It was a problem with 64 atomics on ppc in migration/postcopy-ram.c reported by
Philippe Mathieu-Daudé <f4...@amsat.org>.


I didn't check on ppc due to debina installation inside docker is failed,
but I have my own debian on qemu-system-ppc, but build is still going.
It also was tested on Ubuntu16.04 on i386.

This commit is based on commit ee264eb32c14f076c964fc34ee66f6f95cce2080
"Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.12-20180121' into 
staging"

Alexey Perevalov (1):
  migration: change blocktime type to uint32_t

 hmp.c|  4 ++--
 migration/postcopy-ram.c | 47 ++-
 migration/trace-events   |  4 ++--
 qapi/migration.json  |  4 ++--
 4 files changed, 36 insertions(+), 23 deletions(-)

-- 
2.7.4




Re: [Qemu-devel] [PULL 00/27] Migration pull

2018-01-22 Thread Alexey Perevalov

On 01/22/2018 07:26 PM, Peter Maydell wrote:

On 22 January 2018 at 16:25, Alexey Perevalov <a.pereva...@samsung.com> wrote:

I want to keep 64bit atomic operations in migration.

Sorry, you can't -- some 32 bit CPUs simply do not provide these
operations. You need to rework your design to not require this.

I would like to ask David,
how do you think is it normal to use just one half of now_ms (int64_t), 
one half
of 64 bit value should represent 1200 hours and probably it's enough to 
keep time

difference.



thanks
-- PMM





--
Best regards,
Alexey Perevalov



Re: [Qemu-devel] [PULL 00/27] Migration pull

2018-01-22 Thread Alexey Perevalov

On 01/22/2018 01:03 PM, Peter Maydell wrote:

On 20 January 2018 at 23:36, Juan Quintela <quint...@redhat.com> wrote:

Peter Maydell <peter.mayd...@linaro.org> wrote:

On 19 January 2018 at 16:43, Alexey Perevalov <a.pereva...@samsung.com> wrote:

As I remember, I tested build in QEMU's docker build system,
but now I checked it on i386 Ubuntu, and yes linker says about unresolved
atomic symbols. Next week, I'll have a time to investigate it deeper.

This sounds like exactly the problem I pointed out in a previous
round of this patchset :-(

https://lists.gnu.org/archive/html/qemu-devel/2018-01/msg02103.html

Ignoring comments and sending patches anyway makes me grumpy,
especially when the result is exactly "fails obscurely on
some architectures only"...

It compiles for me.  F25 i686 gcc.  I did change it to use intptr_t
instead of uint64_t.  So, I don't know what is going on here.

Did you change it to not use the 'nocheck' versions of the macros?
The code in master uses 'nocheck' which has exactly the effect
of masking this bug on i686...


So, I can agree that we have to fix anything that don't work, but I
can't agree that I didn't care about comments, at least I tried to fix
the problems you pointed me to.

I said the could should probably not use the nocheck macros. The
code in master is still using those macros, wrongly, which is why
this problem shows only on ppc32 and not all 32-bit hosts.

clang doesn't have atomic_*_8 function set on 32 platform,
but gcc has.

I also checked on ubuntu server 12.04, looks like no "ATOMIC_RELAXED" in
both gcc and clang toolchain, so

following code
atomic_xchg__nocheck(, b64);
is expanding to
(({ asm volatile("" ::: "memory"); (void)0; }), 
__sync_lock_test_and_set(, b64));
and we don't have problem neither in gcc nor in clang. Maybe it's not so 
effective

from performance point of view.

I like glib approach.
#if defined(__ATOMIC_SEQ_CST) && !defined(__clang__)
there __ATOMIC_SEQ_CST instead of __ATOMIC_RELAXED in QEMU, but it 
doesn't matter.
But clang has atomic_*_[1, 2, 4] functions, so it's not reasonable to 
avoid using clang

for all cases.

I want to keep 64bit atomic operations in migration.
Maybe add into atomic.h additional check for clang and 64bit pointer and 
in this case use
(({ asm volatile("" ::: "memory"); (void)0; }), 
__sync_lock_test_and_set(, b64));

?




thanks
-- PMM





--
Best regards,
Alexey Perevalov



Re: [Qemu-devel] [PULL 00/27] Migration pull

2018-01-19 Thread Alexey Perevalov

On 01/19/2018 07:27 PM, Philippe Mathieu-Daudé wrote:

On 01/15/2018 01:14 PM, Peter Maydell wrote:

On 15 January 2018 at 11:52, Juan Quintela <quint...@redhat.com> wrote:

Hi
- rebase on top of lastest
- fix compilation on 32bit machines
- add Peter Xu cleanups

Please, apply.

The following changes since commit fd06527b80c88c8dde1b35fdc692685b68d2fd93:

   Merge remote-tracking branch 'remotes/thibault/tags/samuel-thibault' into 
staging (2018-01-15 10:39:29 +)

are available in the Git repository at:

   git://github.com/juanquintela/qemu.git tags/migration/20180115

for you to fetch changes up to 816306826a45f4d15352e32d157172af3a35899f:

   migration: remove notify in fd_error (2018-01-15 12:48:13 +0100)


migration/next for 20180115

Applied, thanks.

We have armel/armhf/powerpc hosts failing since this pull due to commit
3be98be4e9f.

Those target are currently tested on Shippable CI, eventually adding an
IRC bot we could notice.

I know companies using QEMU system in embedded armel/armhf hosts, I
don't know about the ppc32 hosts.
This is however unlikely the migration features are used there.

If 64bit atomic op are required for migration (performance/security) but
not on 32bit system, one way to fix it could be to have the migration
code being optional... so we can disable it on 32bit hosts.

Regards,

Phil.


Thank you Phil for report,

I have a release this week and it's not yet over.

As I remember, I tested build in QEMU's docker build system,

but now I checked it on i386 Ubuntu, and yes linker says about unresolved

atomic symbols. Next week, I'll have a time to investigate it deeper.


--
Best regards,
Alexey Perevalov



Re: [Qemu-devel] [PULL 09/27] migration: calculate vCPU blocktime on dst side

2018-01-16 Thread Alexey Perevalov

On 01/16/2018 08:43 PM, Dr. David Alan Gilbert wrote:

* Max Reitz (mre...@redhat.com) wrote:

On 2018-01-15 12:52, Juan Quintela wrote:

From: Alexey Perevalov <a.pereva...@samsung.com>

This patch provides blocktime calculation per vCPU,
as a summary and as a overlapped value for all vCPUs.

This approach was suggested by Peter Xu, as an improvements of
previous approch where QEMU kept tree with faulted page address and cpus bitmask
in it. Now QEMU is keeping array with faulted page address as value and vCPU
as index. It helps to find proper vCPU at UFFD_COPY time. Also it keeps
list for blocktime per vCPU (could be traced with page_fault_addr)

Blocktime will not calculated if postcopy_blocktime field of
MigrationIncomingState wasn't initialized.

Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Reviewed-by: Juan Quintela <quint...@redhat.com>
Signed-off-by: Juan Quintela <quint...@redhat.com>
---
  migration/postcopy-ram.c | 143 ++-
  migration/trace-events   |   5 +-
  2 files changed, 146 insertions(+), 2 deletions(-)

For me, this breaks compilation with clang -m32:

   LINKx86_64-softmmu/qemu-system-x86_64
../migration/postcopy-ram.o: In function `mark_postcopy_blocktime_begin':
/home/maxx/projects/qemu/migration/postcopy-ram.c:599: undefined
reference to `__atomic_exchange_8'
/home/maxx/projects/qemu/migration/postcopy-ram.c:600: undefined
reference to `__atomic_exchange_8'
/home/maxx/projects/qemu/migration/postcopy-ram.c:609: undefined
reference to `__atomic_exchange_8'
../migration/postcopy-ram.o: In function `mark_postcopy_blocktime_end':
/home/maxx/projects/qemu/migration/postcopy-ram.c:665: undefined
reference to `__atomic_fetch_add_8'
/home/maxx/projects/qemu/migration/postcopy-ram.c:686: undefined
reference to `__atomic_fetch_add_8'

Am I doing something wrong?

Hmm I also see that with clang on 32bit (gcc is fine);
the problem is the postcopy blocktime stuff is doing some 64bit
atomics, which you can never be sure 32bit will support.

Dave

I didn't check clang build, ok, I'll check it.




Max




--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK





--
Best regards,
Alexey Perevalov,
phone: +7 (495) 797 25 00 ext 3969
e-mail: a.pereva...@samsung.com <mailto:a.pereva...@samsumng.com>

Samsung R Institute Rus
12 Dvintsev street, building 1
127018, Moscow, Russian Federation



Re: [Qemu-devel] [PULL 00/14] Migration pull request

2018-01-09 Thread Alexey Perevalov

On 01/05/2018 12:59 PM, Juan Quintela wrote:

Eric Blake <ebl...@redhat.com> wrote:

On 01/03/2018 03:38 AM, Juan Quintela wrote:

Hi

This are the changes for migration that are already reviewed.

Please, apply.

Alexey Perevalov (6):
   migration: introduce postcopy-blocktime capability
   migration: add postcopy blocktime ctx into MigrationIncomingState
   migration: calculate vCPU blocktime on dst side
   migration: postcopy_blocktime documentation
   migration: add blocktime calculation into migration-test
   migration: add postcopy total blocktime into query-migrate

I had unanswered questions about these patches in the v12 series, where
I'm not sure if the interface is still quite right.

To be fair, I had alroady integrated the patches before I saw your questions.


We're still early
enough that we could adjust the interface after the fact depending on
how the questions are answered;

I think this is the best approach, so far I can see two questions:

- do we want to make it conditional?  it requires some locking, but I
   haven't meassured it to see how slow/fast is it.

- the other was documentation.

I will like Alexey to answer.  Depending of how slow it is, I can agree
to make it non-optional.

Ok, I'll give a logs with traces, maybe gprof result, today
or tomorrow.



but we're also early enough that it may
be smarter to get the interface right before including it in a pull
request.  I'll leave it to Peter and Juan to sort out whether this means
an updated pull request is needed, or to take this as-is.

Thanks, Juan.





--
Best regards,
Alexey Perevalov



Re: [Qemu-devel] [PATCH v12 6/6] migration: add postcopy total blocktime into query-migrate

2018-01-05 Thread Alexey Perevalov

On 01/03/2018 12:26 AM, Eric Blake wrote:

On 10/30/2017 08:16 AM, Alexey Perevalov wrote:

Postcopy total blocktime is available on destination side only.
But query-migrate was possible only for source. This patch
adds ability to call query-migrate on destination.
To be able to see postcopy blocktime, need to request postcopy-blocktime
capability.

Why not display the stats unconditionally when they are available,
instead of having to set a capability knob to request them?

That knob necessary to avoid regression if this information
is not necessary, we decided during so long discussion in previous
version of the patch set - it's not necessary always.
But if user requested blocktime and host can't calculate it,
e.g. due to appropriate feature isn't supported in host kernel,
yes, the value will be 0.




The query-migrate command will show following sample result:
{"return":
 "postcopy-vcpu-blocktime": [115, 100],
 "status": "completed",
 "postcopy-blocktime": 100
}}

postcopy_vcpu_blocktime contains list, where the first item is the first
vCPU in QEMU.

This patch has a drawback, it combines states of incoming and
outgoing migration. Ongoing migration state will overwrite incoming
state. Looks like better to separate query-migrate for incoming and
outgoing migration or add parameter to indicate type of migration.

Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
+++ b/qapi/migration.json
@@ -156,6 +156,13 @@
  #  @status is 'failed'. Clients should not attempt to parse the
  #  error strings. (Since 2.7)
  #
+# @postcopy-blocktime: total time when all vCPU were blocked during postcopy
+#   live migration (Since 2.11)

2.12 now.

Should this mention the capability knob needed to enable this stat (or
else get rid of the capability knob and always expose this when possible)?


+#
+# @postcopy-vcpu-blocktime: list of the postcopy blocktime per vCPU (Since 
2.11)

Also 2.12.


+#
+
+#
  # Since: 0.14.0
  ##
  { 'struct': 'MigrationInfo',
@@ -167,7 +174,9 @@
 '*downtime': 'int',
 '*setup-time': 'int',
 '*cpu-throttle-percentage': 'int',
-   '*error-desc': 'str'} }
+   '*error-desc': 'str',
+   '*postcopy-blocktime' : 'int64',
+   '*postcopy-vcpu-blocktime': ['int64']} }
  
  ##

  # @query-migrate:



--
Best regards,
Alexey Perevalov



Re: [Qemu-devel] [PATCH v12 1/6] migration: introduce postcopy-blocktime capability

2018-01-05 Thread Alexey Perevalov

On 01/03/2018 12:20 AM, Eric Blake wrote:

On 10/30/2017 08:16 AM, Alexey Perevalov wrote:

Right now it could be used on destination side to
enable vCPU blocktime calculation for postcopy live migration.
vCPU blocktime - it's time since vCPU thread was put into
interruptible sleep, till memory page was copied and thread awake.

Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
  migration/migration.c | 9 +
  migration/migration.h | 1 +
  qapi/migration.json   | 6 +-
  3 files changed, 15 insertions(+), 1 deletion(-)


Is there any reason this has to be a new capability rather than
unconditionally enabled?  What are the trade-offs for enabling vs.
disabling the capability that warrant it being a knob?  Can we do a
better job of documenting in which cases the user would want to change
the knob from its default value, if we even need it to be a knob?

Hello Eric,
sorry for late response, it's holiday in whole Russia.

The reason we decided to introduce new capability it's
performance penalty, memory & cpu usage, in current version
it's not so high as in initial, it affects hot path of post-copy live 
migration.

Regarding documentation part, I'll answer in
"[PATCH v12 6/6] migration: add postcopy total blocktime into query-migrate"
thread.




+++ b/qapi/migration.json
@@ -352,12 +352,16 @@
  #
  # @x-multifd: Use more than one fd for migration (since 2.11)
  #
+# @postcopy-blocktime: Calculate downtime for postcopy live migration
+# (since 2.11)
+#
  # Since: 1.2
  ##
  { 'enum': 'MigrationCapability',
'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
 'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram',
-   'block', 'return-path', 'pause-before-switchover', 'x-multifd' ] }
+   'block', 'return-path', 'pause-before-switchover', 'x-multifd',
+   'postcopy-blocktime' ] }
  
  ##

  # @MigrationCapabilityStatus:



--
Best regards,
Alexey Perevalov



[Qemu-devel] [PATCH v12 6/6] migration: add postcopy total blocktime into query-migrate

2017-10-30 Thread Alexey Perevalov
Postcopy total blocktime is available on destination side only.
But query-migrate was possible only for source. This patch
adds ability to call query-migrate on destination.
To be able to see postcopy blocktime, need to request postcopy-blocktime
capability.

The query-migrate command will show following sample result:
{"return":
"postcopy-vcpu-blocktime": [115, 100],
"status": "completed",
"postcopy-blocktime": 100
}}

postcopy_vcpu_blocktime contains list, where the first item is the first
vCPU in QEMU.

This patch has a drawback, it combines states of incoming and
outgoing migration. Ongoing migration state will overwrite incoming
state. Looks like better to separate query-migrate for incoming and
outgoing migration or add parameter to indicate type of migration.

Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 hmp.c| 15 +
 migration/migration.c| 42 
 migration/migration.h|  4 
 migration/postcopy-ram.c | 56 
 migration/trace-events   |  1 +
 qapi/migration.json  | 11 +-
 6 files changed, 124 insertions(+), 5 deletions(-)

diff --git a/hmp.c b/hmp.c
index 41fcce6..4f42eb8 100644
--- a/hmp.c
+++ b/hmp.c
@@ -264,6 +264,21 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
info->cpu_throttle_percentage);
 }
 
+if (info->has_postcopy_blocktime) {
+monitor_printf(mon, "postcopy blocktime: %" PRId64 "\n",
+   info->postcopy_blocktime);
+}
+
+if (info->has_postcopy_vcpu_blocktime) {
+Visitor *v;
+char *str;
+v = string_output_visitor_new(false, );
+visit_type_int64List(v, NULL, >postcopy_vcpu_blocktime, NULL);
+visit_complete(v, );
+monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str);
+g_free(str);
+visit_free(v);
+}
 qapi_free_MigrationInfo(info);
 qapi_free_MigrationCapabilityStatusList(caps);
 }
diff --git a/migration/migration.c b/migration/migration.c
index c5244ae..cd09ba4 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -589,14 +589,15 @@ static void populate_disk_info(MigrationInfo *info)
 }
 }
 
-MigrationInfo *qmp_query_migrate(Error **errp)
+static void fill_source_migration_info(MigrationInfo *info)
 {
-MigrationInfo *info = g_malloc0(sizeof(*info));
 MigrationState *s = migrate_get_current();
 
 switch (s->state) {
 case MIGRATION_STATUS_NONE:
 /* no migration has happened ever */
+/* do not overwrite destination migration status */
+return;
 break;
 case MIGRATION_STATUS_SETUP:
 info->has_status = true;
@@ -647,8 +648,6 @@ MigrationInfo *qmp_query_migrate(Error **errp)
 break;
 }
 info->status = s->state;
-
-return info;
 }
 
 /**
@@ -712,6 +711,41 @@ static bool migrate_caps_check(bool *cap_list,
 return true;
 }
 
+static void fill_destination_migration_info(MigrationInfo *info)
+{
+MigrationIncomingState *mis = migration_incoming_get_current();
+
+switch (mis->state) {
+case MIGRATION_STATUS_NONE:
+return;
+break;
+case MIGRATION_STATUS_SETUP:
+case MIGRATION_STATUS_CANCELLING:
+case MIGRATION_STATUS_CANCELLED:
+case MIGRATION_STATUS_ACTIVE:
+case MIGRATION_STATUS_POSTCOPY_ACTIVE:
+case MIGRATION_STATUS_FAILED:
+case MIGRATION_STATUS_COLO:
+info->has_status = true;
+break;
+case MIGRATION_STATUS_COMPLETED:
+info->has_status = true;
+fill_destination_postcopy_migration_info(info);
+break;
+}
+info->status = mis->state;
+}
+
+MigrationInfo *qmp_query_migrate(Error **errp)
+{
+MigrationInfo *info = g_malloc0(sizeof(*info));
+
+fill_destination_migration_info(info);
+fill_source_migration_info(info);
+
+return info;
+}
+
 void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
   Error **errp)
 {
diff --git a/migration/migration.h b/migration/migration.h
index fb8d2ef..99f294f 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -71,6 +71,10 @@ struct MigrationIncomingState {
 
 MigrationIncomingState *migration_incoming_get_current(void);
 void migration_incoming_state_destroy(void);
+/*
+ * Functions to work with blocktime context
+ */
+void fill_destination_postcopy_migration_info(MigrationInfo *info);
 
 #define TYPE_MIGRATION "migration"
 
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 6bf24e9..2823133 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -108,6 +108,55 @@ static struct PostcopyBlocktimeContext 
*blocktime_context_new(void)
 re

[Qemu-devel] [PATCH v12 2/6] migration: add postcopy blocktime ctx into MigrationIncomingState

2017-10-30 Thread Alexey Perevalov
This patch adds request to kernel space for UFFD_FEATURE_THREAD_ID,
in case when this feature is provided by kernel.

PostcopyBlocktimeContext is incapsulated inside postcopy-ram.c,
due to it's postcopy only feature.
Also it defines PostcopyBlocktimeContext's instance live time.
Information from PostcopyBlocktimeContext instance will be provided
much after postcopy migration end, instance of PostcopyBlocktimeContext
will live till QEMU exit, but part of it (vcpu_addr,
page_fault_vcpu_time) used only during calculation, will be released
when postcopy ended or failed.

To enable postcopy blocktime calculation on destination, need to request
proper capabiltiy (Patch for documentation will be at the tail of the patch
set).

As an example following command enable that capability, assume QEMU was
started with
-chardev socket,id=charmonitor,path=/var/lib/migrate-vm-monitor.sock
option to control it

[root@host]#printf "{\"execute\" : \"qmp_capabilities\"}\r\n \
{\"execute\": \"migrate-set-capabilities\" , \"arguments\":   {
\"capabilities\": [ { \"capability\": \"postcopy-blocktime\", \"state\":
true } ] } }" | nc -U /var/lib/migrate-vm-monitor.sock

Or just with HMP
(qemu) migrate_set_capability postcopy-blocktime on

Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 migration/migration.h|  8 +++
 migration/postcopy-ram.c | 59 
 2 files changed, 67 insertions(+)

diff --git a/migration/migration.h b/migration/migration.h
index 5f5e527..fb8d2ef 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -22,6 +22,8 @@
 #include "hw/qdev.h"
 #include "io/channel.h"
 
+struct PostcopyBlocktimeContext;
+
 /* State for the incoming migration */
 struct MigrationIncomingState {
 QEMUFile *from_src_file;
@@ -59,6 +61,12 @@ struct MigrationIncomingState {
 /* The coroutine we should enter (back) after failover */
 Coroutine *migration_incoming_co;
 QemuSemaphore colo_incoming_sem;
+
+/*
+ * PostcopyBlocktimeContext to keep information for postcopy
+ * live migration, to calculate vCPU block time
+ * */
+struct PostcopyBlocktimeContext *blocktime_ctx;
 };
 
 MigrationIncomingState *migration_incoming_get_current(void);
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index bec6c2c..c18ec5a 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -61,6 +61,52 @@ struct PostcopyDiscardState {
 #include 
 #include 
 
+typedef struct PostcopyBlocktimeContext {
+/* time when page fault initiated per vCPU */
+int64_t *page_fault_vcpu_time;
+/* page address per vCPU */
+uint64_t *vcpu_addr;
+int64_t total_blocktime;
+/* blocktime per vCPU */
+int64_t *vcpu_blocktime;
+/* point in time when last page fault was initiated */
+int64_t last_begin;
+/* number of vCPU are suspended */
+int smp_cpus_down;
+
+/*
+ * Handler for exit event, necessary for
+ * releasing whole blocktime_ctx
+ */
+Notifier exit_notifier;
+} PostcopyBlocktimeContext;
+
+static void destroy_blocktime_context(struct PostcopyBlocktimeContext *ctx)
+{
+g_free(ctx->page_fault_vcpu_time);
+g_free(ctx->vcpu_addr);
+g_free(ctx->vcpu_blocktime);
+g_free(ctx);
+}
+
+static void migration_exit_cb(Notifier *n, void *data)
+{
+PostcopyBlocktimeContext *ctx = container_of(n, PostcopyBlocktimeContext,
+ exit_notifier);
+destroy_blocktime_context(ctx);
+}
+
+static struct PostcopyBlocktimeContext *blocktime_context_new(void)
+{
+PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1);
+ctx->page_fault_vcpu_time = g_new0(int64_t, smp_cpus);
+ctx->vcpu_addr = g_new0(uint64_t, smp_cpus);
+ctx->vcpu_blocktime = g_new0(int64_t, smp_cpus);
+
+ctx->exit_notifier.notify = migration_exit_cb;
+qemu_add_exit_notifier(>exit_notifier);
+return ctx;
+}
 
 /**
  * receive_ufd_features: check userfault fd features, to request only supported
@@ -153,6 +199,19 @@ static bool ufd_check_and_apply(int ufd, 
MigrationIncomingState *mis)
 }
 }
 
+#ifdef UFFD_FEATURE_THREAD_ID
+if (migrate_postcopy_blocktime() && mis &&
+UFFD_FEATURE_THREAD_ID & supported_features) {
+/* kernel supports that feature */
+/* don't create blocktime_context if it exists */
+if (!mis->blocktime_ctx) {
+mis->blocktime_ctx = blocktime_context_new();
+}
+
+asked_features |= UFFD_FEATURE_THREAD_ID;
+}
+#endif
+
 /*
  * request features, even if asked_features is 0, due to
  * kernel expects UFFD_API before UFFDIO_REGISTER, per
-- 
2.7.4




[Qemu-devel] [PATCH v12 1/6] migration: introduce postcopy-blocktime capability

2017-10-30 Thread Alexey Perevalov
Right now it could be used on destination side to
enable vCPU blocktime calculation for postcopy live migration.
vCPU blocktime - it's time since vCPU thread was put into
interruptible sleep, till memory page was copied and thread awake.

Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 migration/migration.c | 9 +
 migration/migration.h | 1 +
 qapi/migration.json   | 6 +-
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/migration/migration.c b/migration/migration.c
index 62761d5..c5244ae 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1492,6 +1492,15 @@ bool migrate_zero_blocks(void)
 return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS];
 }
 
+bool migrate_postcopy_blocktime(void)
+{
+MigrationState *s;
+
+s = migrate_get_current();
+
+return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME];
+}
+
 bool migrate_use_compression(void)
 {
 MigrationState *s;
diff --git a/migration/migration.h b/migration/migration.h
index 8ccdd7a..5f5e527 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -202,6 +202,7 @@ int migrate_compress_level(void);
 int migrate_compress_threads(void);
 int migrate_decompress_threads(void);
 bool migrate_use_events(void);
+bool migrate_postcopy_blocktime(void);
 
 /* Sending on the return path - generic and then for each message type */
 void migrate_send_rp_shut(MigrationIncomingState *mis,
diff --git a/qapi/migration.json b/qapi/migration.json
index 6ae866e..c20caf4 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -352,12 +352,16 @@
 #
 # @x-multifd: Use more than one fd for migration (since 2.11)
 #
+# @postcopy-blocktime: Calculate downtime for postcopy live migration
+# (since 2.11)
+#
 # Since: 1.2
 ##
 { 'enum': 'MigrationCapability',
   'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram',
-   'block', 'return-path', 'pause-before-switchover', 'x-multifd' ] }
+   'block', 'return-path', 'pause-before-switchover', 'x-multifd',
+   'postcopy-blocktime' ] }
 
 ##
 # @MigrationCapabilityStatus:
-- 
2.7.4




[Qemu-devel] [PATCH v12 3/6] migration: calculate vCPU blocktime on dst side

2017-10-30 Thread Alexey Perevalov
This patch provides blocktime calculation per vCPU,
as a summary and as a overlapped value for all vCPUs.

This approach was suggested by Peter Xu, as an improvements of
previous approch where QEMU kept tree with faulted page address and cpus bitmask
in it. Now QEMU is keeping array with faulted page address as value and vCPU
as index. It helps to find proper vCPU at UFFD_COPY time. Also it keeps
list for blocktime per vCPU (could be traced with page_fault_addr)

Blocktime will not calculated if postcopy_blocktime field of
MigrationIncomingState wasn't initialized.

Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 migration/postcopy-ram.c | 143 ++-
 migration/trace-events   |   5 +-
 2 files changed, 146 insertions(+), 2 deletions(-)

diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index c18ec5a..6bf24e9 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -553,6 +553,142 @@ static int ram_block_enable_notify(const char 
*block_name, void *host_addr,
 return 0;
 }
 
+static int get_mem_fault_cpu_index(uint32_t pid)
+{
+CPUState *cpu_iter;
+
+CPU_FOREACH(cpu_iter) {
+if (cpu_iter->thread_id == pid) {
+trace_get_mem_fault_cpu_index(cpu_iter->cpu_index, pid);
+return cpu_iter->cpu_index;
+}
+}
+trace_get_mem_fault_cpu_index(-1, pid);
+return -1;
+}
+
+/*
+ * This function is being called when pagefault occurs. It
+ * tracks down vCPU blocking time.
+ *
+ * @addr: faulted host virtual address
+ * @ptid: faulted process thread id
+ * @rb: ramblock appropriate to addr
+ */
+static void mark_postcopy_blocktime_begin(uint64_t addr, uint32_t ptid,
+  RAMBlock *rb)
+{
+int cpu, already_received;
+MigrationIncomingState *mis = migration_incoming_get_current();
+PostcopyBlocktimeContext *dc = mis->blocktime_ctx;
+int64_t now_ms;
+
+if (!dc || ptid == 0) {
+return;
+}
+cpu = get_mem_fault_cpu_index(ptid);
+if (cpu < 0) {
+return;
+}
+
+now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+if (dc->vcpu_addr[cpu] == 0) {
+atomic_inc(>smp_cpus_down);
+}
+
+atomic_xchg__nocheck(>last_begin, now_ms);
+atomic_xchg__nocheck(>page_fault_vcpu_time[cpu], now_ms);
+atomic_xchg__nocheck(>vcpu_addr[cpu], addr);
+
+/* check it here, not at the begining of the function,
+ * due to, check could accur early than bitmap_set in
+ * qemu_ufd_copy_ioctl */
+already_received = ramblock_recv_bitmap_test(rb, (void *)addr);
+if (already_received) {
+atomic_xchg__nocheck(>vcpu_addr[cpu], 0);
+atomic_xchg__nocheck(>page_fault_vcpu_time[cpu], 0);
+atomic_dec(>smp_cpus_down);
+}
+trace_mark_postcopy_blocktime_begin(addr, dc, 
dc->page_fault_vcpu_time[cpu],
+cpu, already_received);
+}
+
+/*
+ *  This function just provide calculated blocktime per cpu and trace it.
+ *  Total blocktime is calculated in mark_postcopy_blocktime_end.
+ *
+ *
+ * Assume we have 3 CPU
+ *
+ *  S1E1   S1   E1
+ * -***xxx***> CPU1
+ *
+ * S2E2
+ * xxx---> CPU2
+ *
+ * S3E3
+ * xxx---> CPU3
+ *
+ * We have sequence S1,S2,E1,S3,S1,E2,E3,E1
+ * S2,E1 - doesn't match condition due to sequence S1,S2,E1 doesn't include 
CPU3
+ * S3,S1,E2 - sequence includes all CPUs, in this case overlap will be S1,E2 -
+ *it's a part of total blocktime.
+ * S1 - here is last_begin
+ * Legend of the picture is following:
+ *  * - means blocktime per vCPU
+ *  x - means overlapped blocktime (total blocktime)
+ *
+ * @addr: host virtual address
+ */
+static void mark_postcopy_blocktime_end(uint64_t addr)
+{
+MigrationIncomingState *mis = migration_incoming_get_current();
+PostcopyBlocktimeContext *dc = mis->blocktime_ctx;
+int i, affected_cpu = 0;
+int64_t now_ms;
+bool vcpu_total_blocktime = false;
+int64_t read_vcpu_time;
+
+if (!dc) {
+return;
+}
+
+now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+
+/* lookup cpu, to clear it,
+ * that algorithm looks straighforward, but it's not
+ * optimal, more optimal algorithm is keeping tree or hash
+ * where key is address value is a list of  */
+for (i = 0; i < smp_cpus; i++) {
+uint64_t vcpu_blocktime = 0;
+
+read_vcpu_time = atomic_fetch_add(>page_fault_vcpu_time[i], 0);
+if (atomic_fetch_add(>vcpu_addr[i], 0) != addr ||
+read_vcpu_time == 0) {
+continue;
+}
+atomic_xchg__nocheck

[Qemu-devel] [PATCH v12 0/6] calculate blocktime for postcopy live migration

2017-10-30 Thread Alexey Perevalov
This is 12th version.

The rationale for that idea is following:
vCPU could suspend during postcopy live migration until faulted
page is not copied into kernel. Downtime on source side it's a value -
time interval since source turn vCPU off, till destination start runnig
vCPU. But that value was proper value for precopy migration it really shows
amount of time when vCPU is down. But not for postcopy migration, because
several vCPU threads could susppend after vCPU was started. That is important
to estimate packet drop for SDN software.

(V11 -> V12)
- don't read read vcpu_times twice in mark_postcopy_blocktime_end (comment
from David)
- migration-test doesn't touch got_stop due to multiple tests, and some
code changes due to latest migration-test refactoring.

(V10 -> V11)
- rebase
- update documentation (comment from David)
- postcopy_notifier was removed from PostcopyBlocktimeContext (comment from
David)
- fix "since 2.10" for postcopy-vcpu-blocktime (comment from Eric)
- fix order in mark_postcopy_blocktime_begin/end (comment from David),
but I think it still have a slim race condition
- remove error_report from fill_destination_postcopy_migration_info (comment
from David)

(V9 -> V10)
- rebase
- patch "update kernel header for UFFD_FEATURE_*" has changed,
and was generated by  scripts/update-linux-headers.sh as David suggested. 

(V8 -> V9)
- rebase
- traces

(V7 -> V8)
- just one comma in
"migration: fix hardcoded function name in error report"
It was really missed, but fixed in futher patch.

(V6 -> V7)
- copied bitmap was placed into RAMBlock as another migration
related bitmaps.
- Ordering of mark_postcopy_blocktime_end call and ordering
of checking copied bitmap were changed.
- linewrap style defects
- new patch "postcopy_place_page factoring out"
- postcopy_ram_supported_by_host accepts
MigrationIncomingState in qmp_migrate_set_capabilities
- minor fixes of documentation. 
and huge description of get_postcopy_total_blocktime was
moved. Davids comment.

(V5 -> V6)
- blocktime was added into hmp command. Comment from David.
- bitmap for copied pages was added as well as check in *_begin/_end
functions. Patch uses just introduced RAMBLOCK_FOREACH. Comment from David.
- description of receive_ufd_features/request_ufd_features. Comment from 
David.
- commit message headers/@since references were modified. Comment from Eric.
- also typos in documentation. Comment from Eric.
- style and description of field in MigrationInfo. Comment from Eric.
- ufd_check_and_apply (former ufd_version_check) is calling twice,
so my previous patch contained double allocation of blocktime context and
as a result memory leak. In this patch series it was fixed.

(V4 -> V5)
- fill_destination_postcopy_migration_info empty stub was missed for none 
linux
build

(V3 -> V4)
- get rid of Downtime as a name for vCPU waiting time during postcopy 
migration
- PostcopyBlocktimeContext renamed (it was just BlocktimeContext)
- atomic operations are used for dealing with fields of 
PostcopyBlocktimeContext
affected in both threads.
- hardcoded function names in error_report were replaced to %s and __line__
- this patch set includes postcopy-downtime capability, but it used on
destination, coupled with not possibility to return calculated downtime back
to source to show it in query-migrate, it looks like a big trade off
- UFFD_API have to be sent notwithstanding need or not to ask kernel
for a feature, due to kernel expects it in any case (see patch comment)
- postcopy_downtime included into query-migrate output
- also this patch set includes trivial fix
migration: fix hardcoded function name in error report
maybe that is a candidate for qemu-trivial mailing list, but I already
sent "migration: Fixed code style" and it was unclaimed.

(V2 -> V3)
- Downtime calculation approach was changed, thanks to Peter Xu
- Due to previous point no more need to keep GTree as well as bitmap of 
cpus.
So glib changes aren't included in this patch set, it could be resent in
another patch set, if it will be a good reason for it.
- No procfs traces in this patchset, if somebody wants it, you could get it
from patchwork site to track down page fault initiators.
- UFFD_FEATURE_THREAD_ID is requesting only when kernel supports it
- It doesn't send back the downtime, just trace it

Patch set is based on commit 3be480ebb8fdcc99f0a4fcbbf36ec5642a16a10b
and Juan Quintela's series "tests: Add migration compress threads tests"

Alexey Perevalov (6):
  migration: introduce postcopy-blocktime capability
  migration: add postcopy blocktime ctx into MigrationIncomingState
  migration: calculate vCPU blocktime on dst side
  migration: postcopy_blocktime documentation
  migration: add blocktime calculation int

[Qemu-devel] [PATCH v12 4/6] migration: postcopy_blocktime documentation

2017-10-30 Thread Alexey Perevalov
Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 docs/devel/migration.txt | 13 +
 1 file changed, 13 insertions(+)

diff --git a/docs/devel/migration.txt b/docs/devel/migration.txt
index 4030703..cebfe7a 100644
--- a/docs/devel/migration.txt
+++ b/docs/devel/migration.txt
@@ -402,6 +402,19 @@ will now cause the transition from precopy to postcopy.
 It can be issued immediately after migration is started or any
 time later on.  Issuing it after the end of a migration is harmless.
 
+Blocktime is a postcopy live migration metric, intended to show
+how long the vCPU was in state of interruptable sleep due to pagefault.
+That metric is calculated both for all vCPUs as overlapped value, and
+separately for each vCPU. These values are calculated on destination side.
+To enable postcopy blocktime calculation, enter following command on 
destination
+monitor:
+
+migrate_set_capability postcopy-blocktime on
+
+Postcopy blocktime can be retrieved by query-migrate qmp command.
+postcopy-blocktime value of qmp command will show overlapped blocking time for
+all vCPU, postcopy-vcpu-blocktime will show list of blocking time per vCPU.
+
 Note: During the postcopy phase, the bandwidth limits set using
 migrate_set_speed is ignored (to avoid delaying requested pages that
 the destination is waiting for).
-- 
2.7.4




[Qemu-devel] [PATCH v12 5/6] migration: add blocktime calculation into migration-test

2017-10-30 Thread Alexey Perevalov
This patch just requests blocktime calculation,
and check it in case when UFFD_FEATURE_THREAD_ID feature is set
on the host.

Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 tests/migration-test.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/tests/migration-test.c b/tests/migration-test.c
index db30b38..3b4d88a 100644
--- a/tests/migration-test.c
+++ b/tests/migration-test.c
@@ -25,6 +25,7 @@
 const unsigned start_address = 1024 * 1024;
 const unsigned end_address = 100 * 1024 * 1024;
 bool got_stop;
+static bool uffd_feature_thread_id;
 
 #if defined(__linux__)
 #include 
@@ -54,6 +55,7 @@ static bool ufd_version_check(void)
 g_test_message("Skipping test: UFFDIO_API failed");
 return false;
 }
+uffd_feature_thread_id = api_struct.features & UFFD_FEATURE_THREAD_ID;
 
 ioctl_mask = (__u64)1 << _UFFDIO_REGISTER |
  (__u64)1 << _UFFDIO_UNREGISTER;
@@ -266,6 +268,16 @@ static uint64_t get_migration_pass(QTestState *who)
 return result;
 }
 
+static void read_blocktime(QTestState *who)
+{
+QDict *rsp, *rsp_return;
+
+rsp = wait_command(who, "{ 'execute': 'query-migrate' }");
+rsp_return = qdict_get_qdict(rsp, "return");
+g_assert(qdict_haskey(rsp_return, "postcopy-blocktime"));
+QDECREF(rsp);
+}
+
 static void wait_for_migration_complete(QTestState *who)
 {
 QDict *rsp, *rsp_return;
@@ -540,6 +552,7 @@ static void test_postcopy(void)
 
 migrate_set_capability(from, "postcopy-ram", "true");
 migrate_set_capability(to, "postcopy-ram", "true");
+migrate_set_capability(to, "postcopy-blocktime", "true");
 
 /* We want to pick a speed slow enough that the test completes
  * quickly, but that it doesn't complete precopy even on a slow
@@ -568,6 +581,9 @@ static void test_postcopy(void)
 wait_for_serial("dest_serial");
 wait_for_migration_complete(from);
 
+if (uffd_feature_thread_id) {
+read_blocktime(to);
+}
 g_free(uri);
 
 test_migrate_end(from, to);
-- 
2.7.4




Re: [Qemu-devel] [PATCH v11 3/6] migration: calculate vCPU blocktime on dst side

2017-10-19 Thread Alexey Perevalov

On 10/18/2017 09:59 PM, Dr. David Alan Gilbert wrote:

* Alexey Perevalov (a.pereva...@samsung.com) wrote:

This patch provides blocktime calculation per vCPU,
as a summary and as a overlapped value for all vCPUs.

This approach was suggested by Peter Xu, as an improvements of
previous approch where QEMU kept tree with faulted page address and cpus bitmask
in it. Now QEMU is keeping array with faulted page address as value and vCPU
as index. It helps to find proper vCPU at UFFD_COPY time. Also it keeps
list for blocktime per vCPU (could be traced with page_fault_addr)

Blocktime will not calculated if postcopy_blocktime field of
MigrationIncomingState wasn't initialized.

Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
  migration/postcopy-ram.c | 142 ++-
  migration/trace-events   |   5 +-
  2 files changed, 145 insertions(+), 2 deletions(-)

diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index c18ec5a..2e10870 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -553,6 +553,141 @@ static int ram_block_enable_notify(const char 
*block_name, void *host_addr,
  return 0;
  }
  
+static int get_mem_fault_cpu_index(uint32_t pid)

+{
+CPUState *cpu_iter;
+
+CPU_FOREACH(cpu_iter) {
+if (cpu_iter->thread_id == pid) {
+trace_get_mem_fault_cpu_index(cpu_iter->cpu_index, pid);
+return cpu_iter->cpu_index;
+}
+}
+trace_get_mem_fault_cpu_index(-1, pid);
+return -1;
+}
+
+/*
+ * This function is being called when pagefault occurs. It
+ * tracks down vCPU blocking time.
+ *
+ * @addr: faulted host virtual address
+ * @ptid: faulted process thread id
+ * @rb: ramblock appropriate to addr
+ */
+static void mark_postcopy_blocktime_begin(uint64_t addr, uint32_t ptid,
+  RAMBlock *rb)
+{
+int cpu, already_received;
+MigrationIncomingState *mis = migration_incoming_get_current();
+PostcopyBlocktimeContext *dc = mis->blocktime_ctx;
+int64_t now_ms;
+
+if (!dc || ptid == 0) {
+return;
+}
+cpu = get_mem_fault_cpu_index(ptid);
+if (cpu < 0) {
+return;
+}
+
+now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+if (dc->vcpu_addr[cpu] == 0) {
+atomic_inc(>smp_cpus_down);
+}
+
+atomic_xchg__nocheck(>last_begin, now_ms);
+atomic_xchg__nocheck(>page_fault_vcpu_time[cpu], now_ms);
+atomic_xchg__nocheck(>vcpu_addr[cpu], addr);
+
+/* check it here, not at the begining of the function,
+ * due to, check could accur early than bitmap_set in
+ * qemu_ufd_copy_ioctl */
+already_received = ramblock_recv_bitmap_test(rb, (void *)addr);
+if (already_received) {
+atomic_xchg__nocheck(>vcpu_addr[cpu], 0);
+atomic_xchg__nocheck(>page_fault_vcpu_time[cpu], 0);
+atomic_sub(>smp_cpus_down, 1);

Minor; but you could use atomic_dec to go with the atomic_inc


+}
+trace_mark_postcopy_blocktime_begin(addr, dc, 
dc->page_fault_vcpu_time[cpu],
+cpu, already_received);
+}
+
+/*
+ *  This function just provide calculated blocktime per cpu and trace it.
+ *  Total blocktime is calculated in mark_postcopy_blocktime_end.
+ *
+ *
+ * Assume we have 3 CPU
+ *
+ *  S1E1   S1   E1
+ * -***xxx***> CPU1
+ *
+ * S2E2
+ * xxx---> CPU2
+ *
+ * S3E3
+ * xxx---> CPU3
+ *
+ * We have sequence S1,S2,E1,S3,S1,E2,E3,E1
+ * S2,E1 - doesn't match condition due to sequence S1,S2,E1 doesn't include 
CPU3
+ * S3,S1,E2 - sequence includes all CPUs, in this case overlap will be S1,E2 -
+ *it's a part of total blocktime.
+ * S1 - here is last_begin
+ * Legend of the picture is following:
+ *  * - means blocktime per vCPU
+ *  x - means overlapped blocktime (total blocktime)
+ *
+ * @addr: host virtual address
+ */
+static void mark_postcopy_blocktime_end(uint64_t addr)
+{
+MigrationIncomingState *mis = migration_incoming_get_current();
+PostcopyBlocktimeContext *dc = mis->blocktime_ctx;
+int i, affected_cpu = 0;
+int64_t now_ms;
+bool vcpu_total_blocktime = false;
+
+if (!dc) {
+return;
+}
+
+now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+
+/* lookup cpu, to clear it,
+ * that algorithm looks straighforward, but it's not
+ * optimal, more optimal algorithm is keeping tree or hash
+ * where key is address value is a list of  */
+for (i = 0; i < smp_cpus; i++) {
+uint64_t vcpu_blocktime = 0;
+
+if (atomic_fetch_add(>vcpu_addr[i], 0) != addr 

Re: [Qemu-devel] [RFC v2 00/32] postcopy+vhost-user/shared ram

2017-10-16 Thread Alexey Perevalov

Hello Maxime

On 09/01/2017 04:42 PM, Maxime Coquelin wrote:

Hello Alexey,

On 09/01/2017 03:34 PM, Alexey Perevalov wrote:

Hello David,

You wrote in previous version:


We've had a postcopy migrate work now, with a few hacks we're still
cleaning up, both on vhost-user-bridge and dpdk; so I'll get this
updated and reposted.


I want to know more about DPDK work, do you know, is somebody 
assigned to that task?


I did the DPDK (rough) prototype, you may find it here:
https://gitlab.com/mcoquelin/dpdk-next-virtio/commits/postcopy_proto_v1


I found it is for previous version of the patchset. Do you have any updates?


Cheers,
Maxime





--
Best regards,
Alexey Perevalov



[Qemu-devel] [PATCH v11 6/6] migration: add postcopy total blocktime into query-migrate

2017-10-05 Thread Alexey Perevalov
Postcopy total blocktime is available on destination side only.
But query-migrate was possible only for source. This patch
adds ability to call query-migrate on destination.
To be able to see postcopy blocktime, need to request postcopy-blocktime
capability.

The query-migrate command will show following sample result:
{"return":
"postcopy-vcpu-blocktime": [115, 100],
"status": "completed",
"postcopy-blocktime": 100
}}

postcopy_vcpu_blocktime contains list, where the first item is the first
vCPU in QEMU.

This patch has a drawback, it combines states of incoming and
outgoing migration. Ongoing migration state will overwrite incoming
state. Looks like better to separate query-migrate for incoming and
outgoing migration or add parameter to indicate type of migration.

Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 hmp.c| 15 +
 migration/migration.c| 42 
 migration/migration.h|  4 
 migration/postcopy-ram.c | 56 
 migration/trace-events   |  1 +
 qapi/migration.json  | 11 +-
 6 files changed, 124 insertions(+), 5 deletions(-)

diff --git a/hmp.c b/hmp.c
index ace729d..1939c02 100644
--- a/hmp.c
+++ b/hmp.c
@@ -264,6 +264,21 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
info->cpu_throttle_percentage);
 }
 
+if (info->has_postcopy_blocktime) {
+monitor_printf(mon, "postcopy blocktime: %" PRId64 "\n",
+   info->postcopy_blocktime);
+}
+
+if (info->has_postcopy_vcpu_blocktime) {
+Visitor *v;
+char *str;
+v = string_output_visitor_new(false, );
+visit_type_int64List(v, NULL, >postcopy_vcpu_blocktime, NULL);
+visit_complete(v, );
+monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str);
+g_free(str);
+visit_free(v);
+}
 qapi_free_MigrationInfo(info);
 qapi_free_MigrationCapabilityStatusList(caps);
 }
diff --git a/migration/migration.c b/migration/migration.c
index 713f070..91fe885 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -584,14 +584,15 @@ static void populate_disk_info(MigrationInfo *info)
 }
 }
 
-MigrationInfo *qmp_query_migrate(Error **errp)
+static void fill_source_migration_info(MigrationInfo *info)
 {
-MigrationInfo *info = g_malloc0(sizeof(*info));
 MigrationState *s = migrate_get_current();
 
 switch (s->state) {
 case MIGRATION_STATUS_NONE:
 /* no migration has happened ever */
+/* do not overwrite destination migration status */
+return;
 break;
 case MIGRATION_STATUS_SETUP:
 info->has_status = true;
@@ -640,8 +641,6 @@ MigrationInfo *qmp_query_migrate(Error **errp)
 break;
 }
 info->status = s->state;
-
-return info;
 }
 
 /**
@@ -705,6 +704,41 @@ static bool migrate_caps_check(bool *cap_list,
 return true;
 }
 
+static void fill_destination_migration_info(MigrationInfo *info)
+{
+MigrationIncomingState *mis = migration_incoming_get_current();
+
+switch (mis->state) {
+case MIGRATION_STATUS_NONE:
+return;
+break;
+case MIGRATION_STATUS_SETUP:
+case MIGRATION_STATUS_CANCELLING:
+case MIGRATION_STATUS_CANCELLED:
+case MIGRATION_STATUS_ACTIVE:
+case MIGRATION_STATUS_POSTCOPY_ACTIVE:
+case MIGRATION_STATUS_FAILED:
+case MIGRATION_STATUS_COLO:
+info->has_status = true;
+break;
+case MIGRATION_STATUS_COMPLETED:
+info->has_status = true;
+fill_destination_postcopy_migration_info(info);
+break;
+}
+info->status = mis->state;
+}
+
+MigrationInfo *qmp_query_migrate(Error **errp)
+{
+MigrationInfo *info = g_malloc0(sizeof(*info));
+
+fill_destination_migration_info(info);
+fill_source_migration_info(info);
+
+return info;
+}
+
 void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
   Error **errp)
 {
diff --git a/migration/migration.h b/migration/migration.h
index 2bae992..cb68768 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -71,6 +71,10 @@ struct MigrationIncomingState {
 
 MigrationIncomingState *migration_incoming_get_current(void);
 void migration_incoming_state_destroy(void);
+/*
+ * Functions to work with blocktime context
+ */
+void fill_destination_postcopy_migration_info(MigrationInfo *info);
 
 #define TYPE_MIGRATION "migration"
 
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 2e10870..a203bae 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -108,6 +108,55 @@ static struct PostcopyBlocktimeContext 
*blocktime_context_new(void)
 re

[Qemu-devel] [PATCH v11 5/6] migration: add blocktime calculation into postcopy-test

2017-10-05 Thread Alexey Perevalov
This patch just requests blocktime calculation,
and check it in case when UFFD_FEATURE_THREAD_ID feature is set
on the host.

Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 tests/postcopy-test.c | 63 +++
 1 file changed, 48 insertions(+), 15 deletions(-)

diff --git a/tests/postcopy-test.c b/tests/postcopy-test.c
index 8142f2a..4231cce 100644
--- a/tests/postcopy-test.c
+++ b/tests/postcopy-test.c
@@ -24,7 +24,8 @@
 
 const unsigned start_address = 1024 * 1024;
 const unsigned end_address = 100 * 1024 * 1024;
-bool got_stop;
+static bool got_stop;
+static bool uffd_feature_thread_id;
 
 #if defined(__linux__)
 #include 
@@ -54,6 +55,7 @@ static bool ufd_version_check(void)
 g_test_message("Skipping test: UFFDIO_API failed");
 return false;
 }
+uffd_feature_thread_id = api_struct.features & UFFD_FEATURE_THREAD_ID;
 
 ioctl_mask = (__u64)1 << _UFFDIO_REGISTER |
  (__u64)1 << _UFFDIO_UNREGISTER;
@@ -265,22 +267,48 @@ static uint64_t get_migration_pass(void)
 return result;
 }
 
-static void wait_for_migration_complete(void)
+static bool get_src_status(void)
 {
 QDict *rsp, *rsp_return;
+const char *status;
+bool result;
+
+rsp = return_or_event(qmp("{ 'execute': 'query-migrate' }"));
+rsp_return = qdict_get_qdict(rsp, "return");
+status = qdict_get_str(rsp_return, "status");
+g_assert_cmpstr(status, !=,  "failed");
+result = strcmp(status, "completed") == 0;
+QDECREF(rsp);
+return result;
+}
+
+static void read_blocktime(void)
+{
+QDict *rsp, *rsp_return;
+
+rsp = return_or_event(qmp("{ 'execute': 'query-migrate' }"));
+rsp_return = qdict_get_qdict(rsp, "return");
+g_assert(qdict_haskey(rsp_return, "postcopy-blocktime"));
+QDECREF(rsp);
+}
+
+static void wait_for_migration_complete(QTestState *from, QTestState *to)
+{
 bool completed;
 
 do {
-const char *status;
-
-rsp = return_or_event(qmp("{ 'execute': 'query-migrate' }"));
-rsp_return = qdict_get_qdict(rsp, "return");
-status = qdict_get_str(rsp_return, "status");
-completed = strcmp(status, "completed") == 0;
-g_assert_cmpstr(status, !=,  "failed");
-QDECREF(rsp);
+
+/* test src state */
+global_qtest = from;
+completed = get_src_status();
+
 usleep(1000 * 100);
 } while (!completed);
+
+if (uffd_feature_thread_id) {
+global_qtest = to;
+read_blocktime();
+}
 }
 
 static void wait_for_migration_pass(void)
@@ -364,8 +392,6 @@ static void test_migrate(void)
 char *bootpath = g_strdup_printf("%s/bootsect", tmpfs);
 const char *arch = qtest_get_arch();
 
-got_stop = false;
-
 if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
 init_bootfile_x86(bootpath);
 cmd_src = g_strdup_printf("-machine accel=kvm:tcg -m 150M"
@@ -425,6 +451,15 @@ static void test_migrate(void)
 g_assert(qdict_haskey(rsp, "return"));
 QDECREF(rsp);
 
+global_qtest = to;
+rsp = qmp("{ 'execute': 'migrate-set-capabilities',"
+  "'arguments': { "
+  "'capabilities': [ {"
+  "'capability': 'postcopy-blocktime',"
+  "'state': true } ] } }");
+g_assert(qdict_haskey(rsp, "return"));
+QDECREF(rsp);
+
 /* We want to pick a speed slow enough that the test completes
  * quickly, but that it doesn't complete precopy even on a slow
  * machine, so also set the downtime.
@@ -441,7 +476,6 @@ static void test_migrate(void)
 g_assert(qdict_haskey(rsp, "return"));
 QDECREF(rsp);
 
-
 /* Wait for the first serial output from the source */
 wait_for_serial("src_serial");
 
@@ -467,8 +501,7 @@ static void test_migrate(void)
 qmp_eventwait("RESUME");
 
 wait_for_serial("dest_serial");
-global_qtest = from;
-wait_for_migration_complete();
+wait_for_migration_complete(from, to);
 
 qtest_quit(from);
 
-- 
2.7.4




[Qemu-devel] [PATCH v11 3/6] migration: calculate vCPU blocktime on dst side

2017-10-05 Thread Alexey Perevalov
This patch provides blocktime calculation per vCPU,
as a summary and as a overlapped value for all vCPUs.

This approach was suggested by Peter Xu, as an improvements of
previous approch where QEMU kept tree with faulted page address and cpus bitmask
in it. Now QEMU is keeping array with faulted page address as value and vCPU
as index. It helps to find proper vCPU at UFFD_COPY time. Also it keeps
list for blocktime per vCPU (could be traced with page_fault_addr)

Blocktime will not calculated if postcopy_blocktime field of
MigrationIncomingState wasn't initialized.

Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 migration/postcopy-ram.c | 142 ++-
 migration/trace-events   |   5 +-
 2 files changed, 145 insertions(+), 2 deletions(-)

diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index c18ec5a..2e10870 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -553,6 +553,141 @@ static int ram_block_enable_notify(const char 
*block_name, void *host_addr,
 return 0;
 }
 
+static int get_mem_fault_cpu_index(uint32_t pid)
+{
+CPUState *cpu_iter;
+
+CPU_FOREACH(cpu_iter) {
+if (cpu_iter->thread_id == pid) {
+trace_get_mem_fault_cpu_index(cpu_iter->cpu_index, pid);
+return cpu_iter->cpu_index;
+}
+}
+trace_get_mem_fault_cpu_index(-1, pid);
+return -1;
+}
+
+/*
+ * This function is being called when pagefault occurs. It
+ * tracks down vCPU blocking time.
+ *
+ * @addr: faulted host virtual address
+ * @ptid: faulted process thread id
+ * @rb: ramblock appropriate to addr
+ */
+static void mark_postcopy_blocktime_begin(uint64_t addr, uint32_t ptid,
+  RAMBlock *rb)
+{
+int cpu, already_received;
+MigrationIncomingState *mis = migration_incoming_get_current();
+PostcopyBlocktimeContext *dc = mis->blocktime_ctx;
+int64_t now_ms;
+
+if (!dc || ptid == 0) {
+return;
+}
+cpu = get_mem_fault_cpu_index(ptid);
+if (cpu < 0) {
+return;
+}
+
+now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+if (dc->vcpu_addr[cpu] == 0) {
+atomic_inc(>smp_cpus_down);
+}
+
+atomic_xchg__nocheck(>last_begin, now_ms);
+atomic_xchg__nocheck(>page_fault_vcpu_time[cpu], now_ms);
+atomic_xchg__nocheck(>vcpu_addr[cpu], addr);
+
+/* check it here, not at the begining of the function,
+ * due to, check could accur early than bitmap_set in
+ * qemu_ufd_copy_ioctl */
+already_received = ramblock_recv_bitmap_test(rb, (void *)addr);
+if (already_received) {
+atomic_xchg__nocheck(>vcpu_addr[cpu], 0);
+atomic_xchg__nocheck(>page_fault_vcpu_time[cpu], 0);
+atomic_sub(>smp_cpus_down, 1);
+}
+trace_mark_postcopy_blocktime_begin(addr, dc, 
dc->page_fault_vcpu_time[cpu],
+cpu, already_received);
+}
+
+/*
+ *  This function just provide calculated blocktime per cpu and trace it.
+ *  Total blocktime is calculated in mark_postcopy_blocktime_end.
+ *
+ *
+ * Assume we have 3 CPU
+ *
+ *  S1E1   S1   E1
+ * -***xxx***> CPU1
+ *
+ * S2E2
+ * xxx---> CPU2
+ *
+ * S3E3
+ * xxx---> CPU3
+ *
+ * We have sequence S1,S2,E1,S3,S1,E2,E3,E1
+ * S2,E1 - doesn't match condition due to sequence S1,S2,E1 doesn't include 
CPU3
+ * S3,S1,E2 - sequence includes all CPUs, in this case overlap will be S1,E2 -
+ *it's a part of total blocktime.
+ * S1 - here is last_begin
+ * Legend of the picture is following:
+ *  * - means blocktime per vCPU
+ *  x - means overlapped blocktime (total blocktime)
+ *
+ * @addr: host virtual address
+ */
+static void mark_postcopy_blocktime_end(uint64_t addr)
+{
+MigrationIncomingState *mis = migration_incoming_get_current();
+PostcopyBlocktimeContext *dc = mis->blocktime_ctx;
+int i, affected_cpu = 0;
+int64_t now_ms;
+bool vcpu_total_blocktime = false;
+
+if (!dc) {
+return;
+}
+
+now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+
+/* lookup cpu, to clear it,
+ * that algorithm looks straighforward, but it's not
+ * optimal, more optimal algorithm is keeping tree or hash
+ * where key is address value is a list of  */
+for (i = 0; i < smp_cpus; i++) {
+uint64_t vcpu_blocktime = 0;
+
+if (atomic_fetch_add(>vcpu_addr[i], 0) != addr ||
+atomic_fetch_add(>page_fault_vcpu_time[i], 0) == 0) {
+continue;
+}
+atomic_xchg__nocheck(>vcpu_addr[i], 0);
+vcpu_blocktime = now_ms -
+

[Qemu-devel] [PATCH v11 2/6] migration: add postcopy blocktime ctx into MigrationIncomingState

2017-10-05 Thread Alexey Perevalov
This patch adds request to kernel space for UFFD_FEATURE_THREAD_ID,
in case when this feature is provided by kernel.

PostcopyBlocktimeContext is incapsulated inside postcopy-ram.c,
due to it's postcopy only feature.
Also it defines PostcopyBlocktimeContext's instance live time.
Information from PostcopyBlocktimeContext instance will be provided
much after postcopy migration end, instance of PostcopyBlocktimeContext
will live till QEMU exit, but part of it (vcpu_addr,
page_fault_vcpu_time) used only during calculation, will be released
when postcopy ended or failed.

To enable postcopy blocktime calculation on destination, need to request
proper capabiltiy (Patch for documentation will be at the tail of the patch
set).

As an example following command enable that capability, assume QEMU was
started with
-chardev socket,id=charmonitor,path=/var/lib/migrate-vm-monitor.sock
option to control it

[root@host]#printf "{\"execute\" : \"qmp_capabilities\"}\r\n \
{\"execute\": \"migrate-set-capabilities\" , \"arguments\":   {
\"capabilities\": [ { \"capability\": \"postcopy-blocktime\", \"state\":
true } ] } }" | nc -U /var/lib/migrate-vm-monitor.sock

Or just with HMP
(qemu) migrate_set_capability postcopy-blocktime on

Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 migration/migration.h|  8 +++
 migration/postcopy-ram.c | 59 
 2 files changed, 67 insertions(+)

diff --git a/migration/migration.h b/migration/migration.h
index c12ceba..2bae992 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -22,6 +22,8 @@
 #include "hw/qdev.h"
 #include "io/channel.h"
 
+struct PostcopyBlocktimeContext;
+
 /* State for the incoming migration */
 struct MigrationIncomingState {
 QEMUFile *from_src_file;
@@ -59,6 +61,12 @@ struct MigrationIncomingState {
 /* The coroutine we should enter (back) after failover */
 Coroutine *migration_incoming_co;
 QemuSemaphore colo_incoming_sem;
+
+/*
+ * PostcopyBlocktimeContext to keep information for postcopy
+ * live migration, to calculate vCPU block time
+ * */
+struct PostcopyBlocktimeContext *blocktime_ctx;
 };
 
 MigrationIncomingState *migration_incoming_get_current(void);
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index bec6c2c..c18ec5a 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -61,6 +61,52 @@ struct PostcopyDiscardState {
 #include 
 #include 
 
+typedef struct PostcopyBlocktimeContext {
+/* time when page fault initiated per vCPU */
+int64_t *page_fault_vcpu_time;
+/* page address per vCPU */
+uint64_t *vcpu_addr;
+int64_t total_blocktime;
+/* blocktime per vCPU */
+int64_t *vcpu_blocktime;
+/* point in time when last page fault was initiated */
+int64_t last_begin;
+/* number of vCPU are suspended */
+int smp_cpus_down;
+
+/*
+ * Handler for exit event, necessary for
+ * releasing whole blocktime_ctx
+ */
+Notifier exit_notifier;
+} PostcopyBlocktimeContext;
+
+static void destroy_blocktime_context(struct PostcopyBlocktimeContext *ctx)
+{
+g_free(ctx->page_fault_vcpu_time);
+g_free(ctx->vcpu_addr);
+g_free(ctx->vcpu_blocktime);
+g_free(ctx);
+}
+
+static void migration_exit_cb(Notifier *n, void *data)
+{
+PostcopyBlocktimeContext *ctx = container_of(n, PostcopyBlocktimeContext,
+ exit_notifier);
+destroy_blocktime_context(ctx);
+}
+
+static struct PostcopyBlocktimeContext *blocktime_context_new(void)
+{
+PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1);
+ctx->page_fault_vcpu_time = g_new0(int64_t, smp_cpus);
+ctx->vcpu_addr = g_new0(uint64_t, smp_cpus);
+ctx->vcpu_blocktime = g_new0(int64_t, smp_cpus);
+
+ctx->exit_notifier.notify = migration_exit_cb;
+qemu_add_exit_notifier(>exit_notifier);
+return ctx;
+}
 
 /**
  * receive_ufd_features: check userfault fd features, to request only supported
@@ -153,6 +199,19 @@ static bool ufd_check_and_apply(int ufd, 
MigrationIncomingState *mis)
 }
 }
 
+#ifdef UFFD_FEATURE_THREAD_ID
+if (migrate_postcopy_blocktime() && mis &&
+UFFD_FEATURE_THREAD_ID & supported_features) {
+/* kernel supports that feature */
+/* don't create blocktime_context if it exists */
+if (!mis->blocktime_ctx) {
+mis->blocktime_ctx = blocktime_context_new();
+}
+
+asked_features |= UFFD_FEATURE_THREAD_ID;
+}
+#endif
+
 /*
  * request features, even if asked_features is 0, due to
  * kernel expects UFFD_API before UFFDIO_REGISTER, per
-- 
2.7.4




[Qemu-devel] [PATCH v11 0/6] calculate blocktime for postcopy live migration

2017-10-05 Thread Alexey Perevalov
This is 11th version.

The rationale for that idea is following:
vCPU could suspend during postcopy live migration until faulted
page is not copied into kernel. Downtime on source side it's a value -
time interval since source turn vCPU off, till destination start runnig
vCPU. But that value was proper value for precopy migration it really shows
amount of time when vCPU is down. But not for postcopy migration, because
several vCPU threads could susppend after vCPU was started. That is important
to estimate packet drop for SDN software.

(V11 -> V10)
- rebase
- update documentation (comment from David)
- postcopy_notifier was removed from PostcopyBlocktimeContext (comment from
David)
- fix "since 2.10" for postcopy-vcpu-blocktime (comment from Eric)
- fix order in mark_postcopy_blocktime_begin/end (comment from David),
but I think it still have a slim race condition
- remove error_report from fill_destination_postcopy_migration_info (comment
from David)

(V9 -> V10)
- rebase
- patch "update kernel header for UFFD_FEATURE_*" has changed,
and was generated by  scripts/update-linux-headers.sh as David suggested. 


(V8 -> V9)
- rebase
- traces

(V7 -> V8)
- just one comma in
"migration: fix hardcoded function name in error report"
It was really missed, but fixed in futher patch.

(V6 -> V7)
- copied bitmap was placed into RAMBlock as another migration
related bitmaps.
- Ordering of mark_postcopy_blocktime_end call and ordering
of checking copied bitmap were changed.
- linewrap style defects
- new patch "postcopy_place_page factoring out"
- postcopy_ram_supported_by_host accepts
MigrationIncomingState in qmp_migrate_set_capabilities
- minor fixes of documentation. 
and huge description of get_postcopy_total_blocktime was
moved. Davids comment.

(V5 -> V6)
- blocktime was added into hmp command. Comment from David.
- bitmap for copied pages was added as well as check in *_begin/_end
functions. Patch uses just introduced RAMBLOCK_FOREACH. Comment from David.
- description of receive_ufd_features/request_ufd_features. Comment from 
David.
- commit message headers/@since references were modified. Comment from Eric.
- also typos in documentation. Comment from Eric.
- style and description of field in MigrationInfo. Comment from Eric.
- ufd_check_and_apply (former ufd_version_check) is calling twice,
so my previous patch contained double allocation of blocktime context and
as a result memory leak. In this patch series it was fixed.

(V4 -> V5)
- fill_destination_postcopy_migration_info empty stub was missed for none 
linux
build

(V3 -> V4)
- get rid of Downtime as a name for vCPU waiting time during postcopy 
migration
- PostcopyBlocktimeContext renamed (it was just BlocktimeContext)
- atomic operations are used for dealing with fields of 
PostcopyBlocktimeContext
affected in both threads.
- hardcoded function names in error_report were replaced to %s and __line__
- this patch set includes postcopy-downtime capability, but it used on
destination, coupled with not possibility to return calculated downtime back
to source to show it in query-migrate, it looks like a big trade off
- UFFD_API have to be sent notwithstanding need or not to ask kernel
for a feature, due to kernel expects it in any case (see patch comment)
- postcopy_downtime included into query-migrate output
- also this patch set includes trivial fix
migration: fix hardcoded function name in error report
maybe that is a candidate for qemu-trivial mailing list, but I already
sent "migration: Fixed code style" and it was unclaimed.

(V2 -> V3)
- Downtime calculation approach was changed, thanks to Peter Xu
- Due to previous point no more need to keep GTree as well as bitmap of 
cpus.
So glib changes aren't included in this patch set, it could be resent in
another patch set, if it will be a good reason for it.
- No procfs traces in this patchset, if somebody wants it, you could get it
from patchwork site to track down page fault initiators.
- UFFD_FEATURE_THREAD_ID is requesting only when kernel supports it
- It doesn't send back the downtime, just trace it

This patch set is based on commit
[PATCH v10 0/3] Add bitmap for received pages in postcopy migration

Both patch sets were rebased on 
commit d147f7e815f97cb477e223586bcb80c316ae10ea

Alexey Perevalov (6):
  migration: introduce postcopy-blocktime capability
  migration: add postcopy blocktime ctx into MigrationIncomingState
  migration: calculate vCPU blocktime on dst side
  migration: postcopy_blocktime documentation
  migration: add blocktime calculation into postcopy-test
  migration: add postcopy total blocktime into query-migrate

 docs/devel/migration.txt |  13 +++
 hmp.c|  15 +++
 migration/migration.c|  51 +

[Qemu-devel] [PATCH v11 4/6] migration: postcopy_blocktime documentation

2017-10-05 Thread Alexey Perevalov
Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 docs/devel/migration.txt | 13 +
 1 file changed, 13 insertions(+)

diff --git a/docs/devel/migration.txt b/docs/devel/migration.txt
index 4030703..cebfe7a 100644
--- a/docs/devel/migration.txt
+++ b/docs/devel/migration.txt
@@ -402,6 +402,19 @@ will now cause the transition from precopy to postcopy.
 It can be issued immediately after migration is started or any
 time later on.  Issuing it after the end of a migration is harmless.
 
+Blocktime is a postcopy live migration metric, intended to show
+how long the vCPU was in state of interruptable sleep due to pagefault.
+That metric is calculated both for all vCPUs as overlapped value, and
+separately for each vCPU. These values are calculated on destination side.
+To enable postcopy blocktime calculation, enter following command on 
destination
+monitor:
+
+migrate_set_capability postcopy-blocktime on
+
+Postcopy blocktime can be retrieved by query-migrate qmp command.
+postcopy-blocktime value of qmp command will show overlapped blocking time for
+all vCPU, postcopy-vcpu-blocktime will show list of blocking time per vCPU.
+
 Note: During the postcopy phase, the bandwidth limits set using
 migrate_set_speed is ignored (to avoid delaying requested pages that
 the destination is waiting for).
-- 
2.7.4




[Qemu-devel] [PATCH v11 1/6] migration: introduce postcopy-blocktime capability

2017-10-05 Thread Alexey Perevalov
Right now it could be used on destination side to
enable vCPU blocktime calculation for postcopy live migration.
vCPU blocktime - it's time since vCPU thread was put into
interruptible sleep, till memory page was copied and thread awake.

Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 migration/migration.c | 9 +
 migration/migration.h | 1 +
 qapi/migration.json   | 5 -
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/migration/migration.c b/migration/migration.c
index 98429dc..713f070 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1467,6 +1467,15 @@ bool migrate_zero_blocks(void)
 return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS];
 }
 
+bool migrate_postcopy_blocktime(void)
+{
+MigrationState *s;
+
+s = migrate_get_current();
+
+return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME];
+}
+
 bool migrate_use_compression(void)
 {
 MigrationState *s;
diff --git a/migration/migration.h b/migration/migration.h
index b83ccea..c12ceba 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -193,6 +193,7 @@ int migrate_compress_level(void);
 int migrate_compress_threads(void);
 int migrate_decompress_threads(void);
 bool migrate_use_events(void);
+bool migrate_postcopy_blocktime(void);
 
 /* Sending on the return path - generic and then for each message type */
 void migrate_send_rp_shut(MigrationIncomingState *mis,
diff --git a/qapi/migration.json b/qapi/migration.json
index f8b365e..0f2af26 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -343,12 +343,15 @@
 #
 # @x-multifd: Use more than one fd for migration (since 2.11)
 #
+# @postcopy-blocktime: Calculate downtime for postcopy live migration
+# (since 2.11)
+#
 # Since: 1.2
 ##
 { 'enum': 'MigrationCapability',
   'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram',
-   'block', 'return-path', 'x-multifd' ] }
+   'block', 'return-path', 'x-multifd', 'postcopy-blocktime' ] }
 
 ##
 # @MigrationCapabilityStatus:
-- 
2.7.4




[Qemu-devel] [PATCH v10 3/3] migration: add bitmap for received page

2017-10-05 Thread Alexey Perevalov
This patch adds ability to track down already received
pages, it's necessary for calculation vCPU block time in
postcopy migration feature, and for recovery after
postcopy migration failure.

Also it's necessary to solve shared memory issue in
postcopy livemigration. Information about received pages
will be transferred to the software virtual bridge
(e.g. OVS-VSWITCHD), to avoid fallocate (unmap) for
already received pages. fallocate syscall is required for
remmaped shared memory, due to remmaping itself blocks
ioctl(UFFDIO_COPY, ioctl in this case will end with EEXIT
error (struct page is exists after remmap).

Bitmap is placed into RAMBlock as another postcopy/precopy
related bitmaps.

Reviewed-by: Peter Xu <pet...@redhat.com>
Signed-off-by: Peter Xu <pet...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 include/exec/ram_addr.h  | 10 ++
 migration/postcopy-ram.c | 17 -
 migration/ram.c  | 40 
 migration/ram.h  |  5 +
 4 files changed, 67 insertions(+), 5 deletions(-)

diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index d017639..6cbc02a 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -47,6 +47,8 @@ struct RAMBlock {
  * of the postcopy phase
  */
 unsigned long *unsentmap;
+/* bitmap of already received pages in postcopy */
+unsigned long *receivedmap;
 };
 
 static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset)
@@ -60,6 +62,14 @@ static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t 
offset)
 return (char *)block->host + offset;
 }
 
+static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr,
+RAMBlock *rb)
+{
+uint64_t host_addr_offset =
+(uint64_t)(uintptr_t)(host_addr - (void *)rb->host);
+return host_addr_offset >> TARGET_PAGE_BITS;
+}
+
 long qemu_getrampagesize(void);
 unsigned long last_ram_page(void);
 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 8bf6432..bec6c2c 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -642,22 +642,28 @@ int postcopy_ram_enable_notify(MigrationIncomingState 
*mis)
 }
 
 static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr,
-void *from_addr, uint64_t pagesize)
+   void *from_addr, uint64_t pagesize, RAMBlock 
*rb)
 {
+int ret;
 if (from_addr) {
 struct uffdio_copy copy_struct;
 copy_struct.dst = (uint64_t)(uintptr_t)host_addr;
 copy_struct.src = (uint64_t)(uintptr_t)from_addr;
 copy_struct.len = pagesize;
 copy_struct.mode = 0;
-return ioctl(userfault_fd, UFFDIO_COPY, _struct);
+ret = ioctl(userfault_fd, UFFDIO_COPY, _struct);
 } else {
 struct uffdio_zeropage zero_struct;
 zero_struct.range.start = (uint64_t)(uintptr_t)host_addr;
 zero_struct.range.len = pagesize;
 zero_struct.mode = 0;
-return ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct);
+ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct);
+}
+if (!ret) {
+ramblock_recv_bitmap_set_range(rb, host_addr,
+   pagesize / qemu_target_page_size());
 }
+return ret;
 }
 
 /*
@@ -674,7 +680,7 @@ int postcopy_place_page(MigrationIncomingState *mis, void 
*host, void *from,
  * which would be slightly cheaper, but we'd have to be careful
  * of the order of updating our page state.
  */
-if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize)) {
+if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize, rb)) {
 int e = errno;
 error_report("%s: %s copy host: %p from: %p (size: %zd)",
  __func__, strerror(e), host, from, pagesize);
@@ -696,7 +702,8 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, 
void *host,
 trace_postcopy_place_page_zero(host);
 
 if (qemu_ram_pagesize(rb) == getpagesize()) {
-if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize())) 
{
+if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize(),
+rb)) {
 int e = errno;
 error_report("%s: %s zero host: %p",
  __func__, strerror(e), host);
diff --git a/migration/ram.c b/migration/ram.c
index 304ac59..c30db15 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -44,6 +44,7 @@
 #include "qemu/error-report.h"
 #include "trace.h"
 #include "exec/ram_addr.h"
+#include "exec/target_page.h"
 #include "qemu/rcu_queue.h"
 #include "migration/colo.h"
 #include "migration/block.h"
@@ -148,6 +149,35 @@ out:

[Qemu-devel] [PATCH v10 2/3] migration: introduce qemu_ufd_copy_ioctl helper

2017-10-05 Thread Alexey Perevalov
Just for placing auxilary operations inside helper,
auxilary operations like: track received pages,
notify about copying operation in futher patches.

Reviewed-by: Juan Quintela <quint...@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Reviewed-by: Peter Xu <pet...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 migration/postcopy-ram.c | 34 +-
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index d3073b9..8bf6432 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -641,6 +641,25 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
 return 0;
 }
 
+static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr,
+void *from_addr, uint64_t pagesize)
+{
+if (from_addr) {
+struct uffdio_copy copy_struct;
+copy_struct.dst = (uint64_t)(uintptr_t)host_addr;
+copy_struct.src = (uint64_t)(uintptr_t)from_addr;
+copy_struct.len = pagesize;
+copy_struct.mode = 0;
+return ioctl(userfault_fd, UFFDIO_COPY, _struct);
+} else {
+struct uffdio_zeropage zero_struct;
+zero_struct.range.start = (uint64_t)(uintptr_t)host_addr;
+zero_struct.range.len = pagesize;
+zero_struct.mode = 0;
+return ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct);
+}
+}
+
 /*
  * Place a host page (from) at (host) atomically
  * returns 0 on success
@@ -648,20 +667,14 @@ int postcopy_ram_enable_notify(MigrationIncomingState 
*mis)
 int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
 RAMBlock *rb)
 {
-struct uffdio_copy copy_struct;
 size_t pagesize = qemu_ram_pagesize(rb);
 
-copy_struct.dst = (uint64_t)(uintptr_t)host;
-copy_struct.src = (uint64_t)(uintptr_t)from;
-copy_struct.len = pagesize;
-copy_struct.mode = 0;
-
 /* copy also acks to the kernel waking the stalled thread up
  * TODO: We can inhibit that ack and only do it if it was requested
  * which would be slightly cheaper, but we'd have to be careful
  * of the order of updating our page state.
  */
-if (ioctl(mis->userfault_fd, UFFDIO_COPY, _struct)) {
+if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize)) {
 int e = errno;
 error_report("%s: %s copy host: %p from: %p (size: %zd)",
  __func__, strerror(e), host, from, pagesize);
@@ -683,12 +696,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, 
void *host,
 trace_postcopy_place_page_zero(host);
 
 if (qemu_ram_pagesize(rb) == getpagesize()) {
-struct uffdio_zeropage zero_struct;
-zero_struct.range.start = (uint64_t)(uintptr_t)host;
-zero_struct.range.len = getpagesize();
-zero_struct.mode = 0;
-
-if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, _struct)) {
+if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize())) 
{
 int e = errno;
 error_report("%s: %s zero host: %p",
  __func__, strerror(e), host);
-- 
2.7.4




[Qemu-devel] [PATCH v10 0/3] Add bitmap for received pages in postcopy migration

2017-10-05 Thread Alexey Perevalov
This is 10th version of
[PATCH v1 0/2] Add bitmap for copied pages in postcopy migration
cover message from there

This is a separate patch set, it derived from
https://www.mail-archive.com/qemu-devel@nongnu.org/msg456004.html

There are several possible use cases:
1. solve issue with postcopy live migration and shared memory.
OVS-VSWITCH requires information about copied pages, to fallocate
newly allocated pages.
2. calculation vCPU blocktime
for more details see
https://www.mail-archive.com/qemu-devel@nongnu.org/msg456004.html
3. Recovery after fail in the middle of postcopy migration 


V10 -> V9
- ramblock_recv_bitmap_clear was removed from patchset, due to
I didn't find any usage of it in existing patchsets based on this patch.

V8 -> V9
- patch: "migration: fix incorrect postcopy recved_bitmap" from
"[RFC 00/29] Migration: postcopy failure recovery" patch set was squashed
into the latest patch of this patchset, getpagesize was replaced to
qemu_target_page_size, as David suggested.
- for the sake of API uniformity semantics of all functions were
changed, now RAMBlock *rb is the first argument, as well as in bitmap API.
- Also define TARGET_PAGE_BITS was replaced to qemu_target_page_bits in
all other places of this patchset, for uniformity and maintenance. 

V7 -> V8
- removed unnecessary renaming and moving of block variable to ram_load's
function scope
- ramblock_recv_map_init became static function

V6 -> V7
- rebased on
[PATCH v7 0/5] Create setup/cleanup methods for migration incoming side
- live time of the received map was changed 
(ram_load_setup/ram_load_cleanup) 

V5 -> V6
- call ramblock_recv_map_init from migration_fd_process_incoming (Peter 
suggested)But finalization is still in ram_load_cleanup as Juan suggested.

V4 -> V5
- remove ramblock_recv_bitmap_clear_range in favor to bitmap_clear (comment 
from David)
- single invocation place for ramblock_recv_bitmap_set (comment from Peter)
- minor changes like removing comment from qemu_ufd_copy_ioctl and local 
variable from
ramblock_recv_map_init (comment from Peter)

V3 -> V4
- clear_bit instead of ramblock_recv_bitmap_clear in 
ramblock_recv_bitmap_clear_range,
it reduced number of operation (comment from Juan)
- for postcopy ramblock_recv_bitmap_set is calling after page was copied,
only in case of success (comment from David)
- indentation fixes (comment from Juan)

V2 -> V3
- ramblock_recv_map_init call is placed into migration_incoming_get_current,
looks like it's general place for both precopy and postcopy case.
- received bitmap memory releasing is placed into ram_load_cleanup,
unfortunatelly, it calls only in case of precopy.
- precopy case and discard ram block case
- function renaming, and another minor cleanups

V1 -> V2
- change in terminology s/copied/received/g
- granularity became TARGET_PAGE_SIZE, but not actual page size of the
ramblock
- movecopiedmap & get_copiedmap_size were removed, until patch set where
it will be necessary
- releasing memory of receivedmap was added into ram_load_cleanup
- new patch "migration: introduce qemu_ufd_copy_ioctl helper"

Patchset is based on:
commit d147f7e815f97cb477e223586bcb80c316ae10ea
Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging

Alexey Perevalov (3):
  migration: postcopy_place_page factoring out
  migration: introduce qemu_ufd_copy_ioctl helper
  migration: add bitmap for received page

 include/exec/ram_addr.h  | 10 +
 migration/postcopy-ram.c | 54 +++-
 migration/postcopy-ram.h |  4 ++--
 migration/ram.c  | 44 +--
 migration/ram.h  |  5 +
 5 files changed, 94 insertions(+), 23 deletions(-)

-- 
2.7.4




[Qemu-devel] [PATCH v10 1/3] migration: postcopy_place_page factoring out

2017-10-05 Thread Alexey Perevalov
Need to mark copied pages as closer as possible to the place where it
tracks down. That will be necessary in futher patch.

Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Reviewed-by: Peter Xu <pet...@redhat.com>
Reviewed-by: Juan Quintela <quint...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 migration/postcopy-ram.c | 13 +++--
 migration/postcopy-ram.h |  4 ++--
 migration/ram.c  |  4 ++--
 3 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 0de68e8..d3073b9 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -646,9 +646,10 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
  * returns 0 on success
  */
 int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
-size_t pagesize)
+RAMBlock *rb)
 {
 struct uffdio_copy copy_struct;
+size_t pagesize = qemu_ram_pagesize(rb);
 
 copy_struct.dst = (uint64_t)(uintptr_t)host;
 copy_struct.src = (uint64_t)(uintptr_t)from;
@@ -677,11 +678,11 @@ int postcopy_place_page(MigrationIncomingState *mis, void 
*host, void *from,
  * returns 0 on success
  */
 int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
- size_t pagesize)
+ RAMBlock *rb)
 {
 trace_postcopy_place_page_zero(host);
 
-if (pagesize == getpagesize()) {
+if (qemu_ram_pagesize(rb) == getpagesize()) {
 struct uffdio_zeropage zero_struct;
 zero_struct.range.start = (uint64_t)(uintptr_t)host;
 zero_struct.range.len = getpagesize();
@@ -711,7 +712,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, 
void *host,
 memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size);
 }
 return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page,
-   pagesize);
+   rb);
 }
 
 return 0;
@@ -774,14 +775,14 @@ int postcopy_ram_enable_notify(MigrationIncomingState 
*mis)
 }
 
 int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
-size_t pagesize)
+RAMBlock *rb)
 {
 assert(0);
 return -1;
 }
 
 int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
-size_t pagesize)
+RAMBlock *rb)
 {
 assert(0);
 return -1;
diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h
index 587a8b8..77ea0fd 100644
--- a/migration/postcopy-ram.h
+++ b/migration/postcopy-ram.h
@@ -72,14 +72,14 @@ void postcopy_discard_send_finish(MigrationState *ms,
  * returns 0 on success
  */
 int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
-size_t pagesize);
+RAMBlock *rb);
 
 /*
  * Place a zero page at (host) atomically
  * returns 0 on success
  */
 int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
- size_t pagesize);
+ RAMBlock *rb);
 
 /* The current postcopy state is read/set by postcopy_state_get/set
  * which update it atomically.
diff --git a/migration/ram.c b/migration/ram.c
index b83f897..304ac59 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2680,10 +2680,10 @@ static int ram_load_postcopy(QEMUFile *f)
 
 if (all_zero) {
 ret = postcopy_place_page_zero(mis, place_dest,
-   block->page_size);
+   block);
 } else {
 ret = postcopy_place_page(mis, place_dest,
-  place_source, block->page_size);
+  place_source, block);
 }
 }
 if (!ret) {
-- 
2.7.4




Re: [Qemu-devel] [PATCH v10 07/10] migration: calculate vCPU blocktime on dst side

2017-09-28 Thread Alexey Perevalov

On 09/21/2017 02:57 PM, Dr. David Alan Gilbert wrote:

* Alexey Perevalov (a.pereva...@samsung.com) wrote:

This patch provides blocktime calculation per vCPU,
as a summary and as a overlapped value for all vCPUs.

This approach was suggested by Peter Xu, as an improvements of
previous approch where QEMU kept tree with faulted page address and cpus bitmask
in it. Now QEMU is keeping array with faulted page address as value and vCPU
as index. It helps to find proper vCPU at UFFD_COPY time. Also it keeps
list for blocktime per vCPU (could be traced with page_fault_addr)

Blocktime will not calculated if postcopy_blocktime field of
MigrationIncomingState wasn't initialized.

Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
  migration/postcopy-ram.c | 138 ++-
  migration/trace-events   |   5 +-
  2 files changed, 140 insertions(+), 3 deletions(-)

diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index cc78981..9a5133f 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -110,7 +110,6 @@ static struct PostcopyBlocktimeContext 
*blocktime_context_new(void)
  
  ctx->exit_notifier.notify = migration_exit_cb;

  qemu_add_exit_notifier(>exit_notifier);
-add_migration_state_change_notifier(>postcopy_notifier);
  return ctx;
  }
  
@@ -559,6 +558,136 @@ static int ram_block_enable_notify(const char *block_name, void *host_addr,

  return 0;
  }
  
+static int get_mem_fault_cpu_index(uint32_t pid)

+{
+CPUState *cpu_iter;
+
+CPU_FOREACH(cpu_iter) {
+if (cpu_iter->thread_id == pid) {
+trace_get_mem_fault_cpu_index(cpu_iter->cpu_index, pid);
+return cpu_iter->cpu_index;
+}
+}
+trace_get_mem_fault_cpu_index(-1, pid);
+return -1;
+}
+
+/*
+ * This function is being called when pagefault occurs. It
+ * tracks down vCPU blocking time.
+ *
+ * @addr: faulted host virtual address
+ * @ptid: faulted process thread id
+ * @rb: ramblock appropriate to addr
+ */
+static void mark_postcopy_blocktime_begin(uint64_t addr, uint32_t ptid,
+  RAMBlock *rb)
+{
+int cpu, already_received;
+MigrationIncomingState *mis = migration_incoming_get_current();
+PostcopyBlocktimeContext *dc = mis->blocktime_ctx;
+int64_t now_ms;
+
+if (!dc || ptid == 0) {
+return;
+}
+cpu = get_mem_fault_cpu_index(ptid);
+if (cpu < 0) {
+return;
+}
+
+now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+if (dc->vcpu_addr[cpu] == 0) {
+atomic_inc(>smp_cpus_down);
+}
+
+atomic_xchg__nocheck(>vcpu_addr[cpu], addr);
+atomic_xchg__nocheck(>last_begin, now_ms);
+atomic_xchg__nocheck(>page_fault_vcpu_time[cpu], now_ms);
+
+already_received = ramblock_recv_bitmap_test(rb, (void *)addr);
+if (already_received) {
+atomic_xchg__nocheck(>vcpu_addr[cpu], 0);
+atomic_xchg__nocheck(>page_fault_vcpu_time[cpu], 0);
+atomic_sub(>smp_cpus_down, 1);
+}
+trace_mark_postcopy_blocktime_begin(addr, dc, 
dc->page_fault_vcpu_time[cpu],
+cpu, already_received);
+}
+
+/*
+ *  This function just provide calculated blocktime per cpu and trace it.
+ *  Total blocktime is calculated in mark_postcopy_blocktime_end.
+ *
+ *
+ * Assume we have 3 CPU
+ *
+ *  S1E1   S1   E1
+ * -***xxx***> CPU1
+ *
+ * S2E2
+ * xxx---> CPU2
+ *
+ * S3E3
+ * xxx---> CPU3
+ *
+ * We have sequence S1,S2,E1,S3,S1,E2,E3,E1
+ * S2,E1 - doesn't match condition due to sequence S1,S2,E1 doesn't include 
CPU3
+ * S3,S1,E2 - sequence includes all CPUs, in this case overlap will be S1,E2 -
+ *it's a part of total blocktime.
+ * S1 - here is last_begin
+ * Legend of the picture is following:
+ *  * - means blocktime per vCPU
+ *  x - means overlapped blocktime (total blocktime)
+ *
+ * @addr: host virtual address
+ */
+static void mark_postcopy_blocktime_end(uint64_t addr)
+{
+MigrationIncomingState *mis = migration_incoming_get_current();
+PostcopyBlocktimeContext *dc = mis->blocktime_ctx;
+int i, affected_cpu = 0;
+int64_t now_ms;
+bool vcpu_total_blocktime = false;
+
+if (!dc) {
+return;
+}
+
+now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+
+/* lookup cpu, to clear it,
+ * that algorithm looks straighforward, but it's not
+ * optimal, more optimal algorithm is keeping tree or hash
+ * where key is address value is a list of  */
+for (i = 0; i < smp_cpus; i++) {
+uint64_t vcpu_blocktime = 0;
+   

[Qemu-devel] [PATCH] linux-headers: sync against v4.14-rc1

2017-09-22 Thread Alexey Perevalov
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 include/standard-headers/asm-x86/hyperv.h| 19 ++---
 include/standard-headers/linux/pci_regs.h| 42 
 include/standard-headers/linux/virtio_ring.h |  4 +--
 linux-headers/asm-s390/kvm.h |  6 
 linux-headers/linux/kvm.h|  3 +-
 linux-headers/linux/userfaultfd.h| 16 ++-
 6 files changed, 64 insertions(+), 26 deletions(-)

diff --git a/include/standard-headers/asm-x86/hyperv.h 
b/include/standard-headers/asm-x86/hyperv.h
index fac7651..5f95d5e 100644
--- a/include/standard-headers/asm-x86/hyperv.h
+++ b/include/standard-headers/asm-x86/hyperv.h
@@ -149,12 +149,9 @@
  */
 #define HV_X64_DEPRECATING_AEOI_RECOMMENDED(1 << 9)
 
-/*
- * HV_VP_SET available
- */
+/* Recommend using the newer ExProcessorMasks interface */
 #define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED  (1 << 11)
 
-
 /*
  * Crash notification flag.
  */
@@ -242,7 +239,11 @@
(~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1))
 
 /* Declare the various hypercall operations. */
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST  0x0003
 #define HVCALL_NOTIFY_LONG_SPIN_WAIT   0x0008
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX  0x0013
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX   0x0014
 #define HVCALL_POST_MESSAGE0x005c
 #define HVCALL_SIGNAL_EVENT0x005d
 
@@ -259,6 +260,16 @@
 #define HV_PROCESSOR_POWER_STATE_C22
 #define HV_PROCESSOR_POWER_STATE_C33
 
+#define HV_FLUSH_ALL_PROCESSORSBIT(0)
+#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACESBIT(1)
+#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY  BIT(2)
+#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3)
+
+enum HV_GENERIC_SET_FORMAT {
+   HV_GENERIC_SET_SPARCE_4K,
+   HV_GENERIC_SET_ALL,
+};
+
 /* hypercall status code */
 #define HV_STATUS_SUCCESS  0
 #define HV_STATUS_INVALID_HYPERCALL_CODE   2
diff --git a/include/standard-headers/linux/pci_regs.h 
b/include/standard-headers/linux/pci_regs.h
index c22d3eb..f8d5804 100644
--- a/include/standard-headers/linux/pci_regs.h
+++ b/include/standard-headers/linux/pci_regs.h
@@ -513,6 +513,7 @@
 #define  PCI_EXP_DEVSTA_URD0x0008  /* Unsupported Request Detected */
 #define  PCI_EXP_DEVSTA_AUXPD  0x0010  /* AUX Power Detected */
 #define  PCI_EXP_DEVSTA_TRPND  0x0020  /* Transactions Pending */
+#define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V1  12  /* v1 endpoints without 
link end here */
 #define PCI_EXP_LNKCAP 12  /* Link Capabilities */
 #define  PCI_EXP_LNKCAP_SLS0x000f /* Supported Link Speeds */
 #define  PCI_EXP_LNKCAP_SLS_2_5GB 0x0001 /* LNKCAP2 SLS Vector bit 0 */
@@ -556,7 +557,7 @@
 #define  PCI_EXP_LNKSTA_DLLLA  0x2000  /* Data Link Layer Link Active */
 #define  PCI_EXP_LNKSTA_LBMS   0x4000  /* Link Bandwidth Management Status */
 #define  PCI_EXP_LNKSTA_LABS   0x8000  /* Link Autonomous Bandwidth Status */
-#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V1 20  /* v1 endpoints end here */
+#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V1 20  /* v1 endpoints with link end 
here */
 #define PCI_EXP_SLTCAP 20  /* Slot Capabilities */
 #define  PCI_EXP_SLTCAP_ABP0x0001 /* Attention Button Present */
 #define  PCI_EXP_SLTCAP_PCP0x0002 /* Power Controller Present */
@@ -639,7 +640,7 @@
 #define  PCI_EXP_DEVCTL2_OBFF_MSGB_EN  0x4000  /* Enable OBFF Message type B */
 #define  PCI_EXP_DEVCTL2_OBFF_WAKE_EN  0x6000  /* OBFF using WAKE# signaling */
 #define PCI_EXP_DEVSTA242  /* Device Status 2 */
-#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 44  /* v2 endpoints end here */
+#define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V2  44  /* v2 endpoints without 
link end here */
 #define PCI_EXP_LNKCAP244  /* Link Capabilities 2 */
 #define  PCI_EXP_LNKCAP2_SLS_2_5GB 0x0002 /* Supported Speed 2.5GT/s */
 #define  PCI_EXP_LNKCAP2_SLS_5_0GB 0x0004 /* Supported Speed 5.0GT/s */
@@ -647,6 +648,7 @@
 #define  PCI_EXP_LNKCAP2_CROSSLINK 0x0100 /* Crosslink supported */
 #define PCI_EXP_LNKCTL248  /* Link Control 2 */
 #define PCI_EXP_LNKSTA250  /* Link Status 2 */
+#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 52  /* v2 endpoints with link end 
here */
 #define PCI_EXP_SLTCAP252  /* Slot Capabilities 2 */
 #define PCI_EXP_SLTCTL256  /* Slot Control 2 */
 #define PCI_EXP_SLTSTA258  /* Slot Status 2 */
@@ -733,23 +735,17 @@
 #define  PCI_ERR_CAP_ECRC_CHKE 0x0100  /* ECRC Check Enable */
 #define PCI_ERR_HEADER_LOG 28  /* Header Log Register (16 bytes) */
 #define PCI_ERR_ROOT_COMMAND   44  /* Root Error Command */
-/* Correctable Err Reporting Enable */
-#define

[Qemu-devel] [PATCH] linux-headers: sync against v4.14-rc1

2017-09-22 Thread Alexey Perevalov
This patch contains modification of userfaultfd.h,
necessary for series
"calculate blocktime for postcopy live migration"
it was decided to send it separatelly with another
modifications. 

Build was tested with docker, but it's not fully tested
at runtime.
Based on a664607440511fdf8cff9d1c2afefbdbca1d1295
"Merge remote-tracking branch 
'remotes/famz/tags/build-and-test-automation-pull-request' into staging"


Alexey Perevalov (1):
  linux-headers: sync against v4.14-rc1

 include/standard-headers/asm-x86/hyperv.h| 19 ++---
 include/standard-headers/linux/pci_regs.h| 42 
 include/standard-headers/linux/virtio_ring.h |  4 +--
 linux-headers/asm-s390/kvm.h |  6 
 linux-headers/linux/kvm.h|  3 +-
 linux-headers/linux/userfaultfd.h| 16 ++-
 6 files changed, 64 insertions(+), 26 deletions(-)

-- 
1.9.1




Re: [Qemu-devel] [PATCH v10 07/10] migration: calculate vCPU blocktime on dst side

2017-09-22 Thread Alexey Perevalov

On 09/21/2017 02:57 PM, Dr. David Alan Gilbert wrote:

* Alexey Perevalov (a.pereva...@samsung.com) wrote:

This patch provides blocktime calculation per vCPU,
as a summary and as a overlapped value for all vCPUs.

This approach was suggested by Peter Xu, as an improvements of
previous approch where QEMU kept tree with faulted page address and cpus bitmask
in it. Now QEMU is keeping array with faulted page address as value and vCPU
as index. It helps to find proper vCPU at UFFD_COPY time. Also it keeps
list for blocktime per vCPU (could be traced with page_fault_addr)

Blocktime will not calculated if postcopy_blocktime field of
MigrationIncomingState wasn't initialized.

Signed-off-by: Alexey Perevalov<a.pereva...@samsung.com>
---
  migration/postcopy-ram.c | 138 ++-
  migration/trace-events   |   5 +-
  2 files changed, 140 insertions(+), 3 deletions(-)

diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index cc78981..9a5133f 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -110,7 +110,6 @@ static struct PostcopyBlocktimeContext 
*blocktime_context_new(void)
  
  ctx->exit_notifier.notify = migration_exit_cb;

  qemu_add_exit_notifier(>exit_notifier);
-add_migration_state_change_notifier(>postcopy_notifier);
  return ctx;
  }
  
@@ -559,6 +558,136 @@ static int ram_block_enable_notify(const char *block_name, void *host_addr,

  return 0;
  }
  
+static int get_mem_fault_cpu_index(uint32_t pid)

+{
+CPUState *cpu_iter;
+
+CPU_FOREACH(cpu_iter) {
+if (cpu_iter->thread_id == pid) {
+trace_get_mem_fault_cpu_index(cpu_iter->cpu_index, pid);
+return cpu_iter->cpu_index;
+}
+}
+trace_get_mem_fault_cpu_index(-1, pid);
+return -1;
+}
+
+/*
+ * This function is being called when pagefault occurs. It
+ * tracks down vCPU blocking time.
+ *
+ * @addr: faulted host virtual address
+ * @ptid: faulted process thread id
+ * @rb: ramblock appropriate to addr
+ */
+static void mark_postcopy_blocktime_begin(uint64_t addr, uint32_t ptid,
+  RAMBlock *rb)
+{
+int cpu, already_received;
+MigrationIncomingState *mis = migration_incoming_get_current();
+PostcopyBlocktimeContext *dc = mis->blocktime_ctx;
+int64_t now_ms;
+
+if (!dc || ptid == 0) {
+return;
+}
+cpu = get_mem_fault_cpu_index(ptid);
+if (cpu < 0) {
+return;
+}
+
+now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+if (dc->vcpu_addr[cpu] == 0) {
+atomic_inc(>smp_cpus_down);
+}
+
+atomic_xchg__nocheck(>vcpu_addr[cpu], addr);
+atomic_xchg__nocheck(>last_begin, now_ms);
+atomic_xchg__nocheck(>page_fault_vcpu_time[cpu], now_ms);
+
+already_received = ramblock_recv_bitmap_test(rb, (void *)addr);
+if (already_received) {
+atomic_xchg__nocheck(>vcpu_addr[cpu], 0);
+atomic_xchg__nocheck(>page_fault_vcpu_time[cpu], 0);
+atomic_sub(>smp_cpus_down, 1);
+}
+trace_mark_postcopy_blocktime_begin(addr, dc, 
dc->page_fault_vcpu_time[cpu],
+cpu, already_received);
+}
+
+/*
+ *  This function just provide calculated blocktime per cpu and trace it.
+ *  Total blocktime is calculated in mark_postcopy_blocktime_end.
+ *
+ *
+ * Assume we have 3 CPU
+ *
+ *  S1E1   S1   E1
+ * -***xxx***> CPU1
+ *
+ * S2E2
+ * xxx---> CPU2
+ *
+ * S3E3
+ * xxx---> CPU3
+ *
+ * We have sequence S1,S2,E1,S3,S1,E2,E3,E1
+ * S2,E1 - doesn't match condition due to sequence S1,S2,E1 doesn't include 
CPU3
+ * S3,S1,E2 - sequence includes all CPUs, in this case overlap will be S1,E2 -
+ *it's a part of total blocktime.
+ * S1 - here is last_begin
+ * Legend of the picture is following:
+ *  * - means blocktime per vCPU
+ *  x - means overlapped blocktime (total blocktime)
+ *
+ * @addr: host virtual address
+ */
+static void mark_postcopy_blocktime_end(uint64_t addr)
+{
+MigrationIncomingState *mis = migration_incoming_get_current();
+PostcopyBlocktimeContext *dc = mis->blocktime_ctx;
+int i, affected_cpu = 0;
+int64_t now_ms;
+bool vcpu_total_blocktime = false;
+
+if (!dc) {
+return;
+}
+
+now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+
+/* lookup cpu, to clear it,
+ * that algorithm looks straighforward, but it's not
+ * optimal, more optimal algorithm is keeping tree or hash
+ * where key is address value is a list of  */
+for (i = 0; i < smp_cpus; i++) {
+uint64_t vcpu_blocktime = 0;
+   

Re: [Qemu-devel] [PATCH v10 10/10] migration: add postcopy total blocktime into query-migrate

2017-09-21 Thread Alexey Perevalov

On 09/21/2017 03:42 PM, Dr. David Alan Gilbert wrote:

* Alexey Perevalov (a.pereva...@samsung.com) wrote:

Postcopy total blocktime is available on destination side only.
But query-migrate was possible only for source. This patch
adds ability to call query-migrate on destination.
To be able to see postcopy blocktime, need to request postcopy-blocktime
capability.

The query-migrate command will show following sample result:
{"return":
 "postcopy-vcpu-blocktime": [115, 100],
 "status": "completed",
 "postcopy-blocktime": 100
}}

postcopy_vcpu_blocktime contains list, where the first item is the first
vCPU in QEMU.

This patch has a drawback, it combines states of incoming and
outgoing migration. Ongoing migration state will overwrite incoming
state. Looks like better to separate query-migrate for incoming and
outgoing migration or add parameter to indicate type of migration.

Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
  hmp.c| 15 +
  migration/migration.c| 42 +++
  migration/migration.h|  4 
  migration/postcopy-ram.c | 57 
  migration/trace-events   |  1 +
  qapi/migration.json  | 11 +-
  6 files changed, 125 insertions(+), 5 deletions(-)

diff --git a/hmp.c b/hmp.c
index 0fb2bc7..142f76e 100644
--- a/hmp.c
+++ b/hmp.c
@@ -264,6 +264,21 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
 info->cpu_throttle_percentage);
  }
  
+if (info->has_postcopy_blocktime) {

+monitor_printf(mon, "postcopy blocktime: %" PRId64 "\n",
+   info->postcopy_blocktime);
+}
+
+if (info->has_postcopy_vcpu_blocktime) {
+Visitor *v;
+char *str;
+v = string_output_visitor_new(false, );
+visit_type_int64List(v, NULL, >postcopy_vcpu_blocktime, NULL);
+visit_complete(v, );
+monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str);
+g_free(str);
+visit_free(v);
+}
  qapi_free_MigrationInfo(info);
  qapi_free_MigrationCapabilityStatusList(caps);
  }
diff --git a/migration/migration.c b/migration/migration.c
index 4f029e8..e1d3248 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -528,14 +528,15 @@ static void populate_disk_info(MigrationInfo *info)
  }
  }
  
-MigrationInfo *qmp_query_migrate(Error **errp)

+static void fill_source_migration_info(MigrationInfo *info)
  {
-MigrationInfo *info = g_malloc0(sizeof(*info));
  MigrationState *s = migrate_get_current();
  
  switch (s->state) {

  case MIGRATION_STATUS_NONE:
  /* no migration has happened ever */
+/* do not overwrite destination migration status */
+return;
  break;
  case MIGRATION_STATUS_SETUP:
  info->has_status = true;
@@ -584,8 +585,6 @@ MigrationInfo *qmp_query_migrate(Error **errp)
  break;
  }
  info->status = s->state;
-
-return info;
  }
  
  /**

@@ -649,6 +648,41 @@ static bool migrate_caps_check(bool *cap_list,
  return true;
  }
  
+static void fill_destination_migration_info(MigrationInfo *info)

+{
+MigrationIncomingState *mis = migration_incoming_get_current();
+
+switch (mis->state) {
+case MIGRATION_STATUS_NONE:
+return;
+break;
+case MIGRATION_STATUS_SETUP:
+case MIGRATION_STATUS_CANCELLING:
+case MIGRATION_STATUS_CANCELLED:
+case MIGRATION_STATUS_ACTIVE:
+case MIGRATION_STATUS_POSTCOPY_ACTIVE:
+case MIGRATION_STATUS_FAILED:
+case MIGRATION_STATUS_COLO:
+info->has_status = true;
+break;
+case MIGRATION_STATUS_COMPLETED:
+info->has_status = true;
+fill_destination_postcopy_migration_info(info);
+break;
+}
+info->status = mis->state;
+}
+
+MigrationInfo *qmp_query_migrate(Error **errp)
+{
+MigrationInfo *info = g_malloc0(sizeof(*info));
+
+fill_destination_migration_info(info);
+fill_source_migration_info(info);
+
+return info;
+}
+
  void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
Error **errp)
  {
diff --git a/migration/migration.h b/migration/migration.h
index 770466b..882a59b 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -70,6 +70,10 @@ struct MigrationIncomingState {
  
  MigrationIncomingState *migration_incoming_get_current(void);

  void migration_incoming_state_destroy(void);
+/*
+ * Functions to work with blocktime context
+ */
+void fill_destination_postcopy_migration_info(MigrationInfo *info);
  
  #define TYPE_MIGRATION "migration"
  
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c

index 9

Re: [Qemu-devel] [PATCH v10 08/10] migration: postcopy_blocktime documentation

2017-09-21 Thread Alexey Perevalov

On 09/21/2017 03:33 PM, Dr. David Alan Gilbert wrote:

* Alexey Perevalov (a.pereva...@samsung.com) wrote:

Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>

Although it does have my R-b it might be worth adding some clarification
that it's a measure of when *all* cpus are blocked and so isn't a
total measure of impact of postcopy (when blocking some of them).

yes, maybe additional clarification is necessary.
now we have both values:
{"return": {"postcopy-blocktime": 5691, "status": "completed", 
"postcopy-vcpu-blocktime": [7671, 6388]}}
where postcopy-blocktime is for *all* and postcopy-vcpu-blocktime is per 
vCPU,

it's really worth to describe it, like:

Blocktime is a postcopy live migration metric, intended to show
how long the vCPU was in state of interruptible sleep due to pagefault.
That metric is calculated both for all vCPUs as overlapped value, and
separately for each vCPU. These values are calculated on destination side.
To enable postcopy blocktime calculation, enter following command on destination
monitor:

migrate_set_capability postcopy-blocktime on

Postcopy blocktime can be retrieved by query-migrate qmp command.
postcopy-blocktime value of qmp command will show overlapped blocking time for 
all vCPU,
postcopy-vcpu-blocktime will show list of blocking time per vCPU.


--
Best regards,
Alexey Perevalov



Dave


---
  docs/devel/migration.txt | 10 ++
  1 file changed, 10 insertions(+)

diff --git a/docs/devel/migration.txt b/docs/devel/migration.txt
index 1b940a8..4b625ca 100644
--- a/docs/devel/migration.txt
+++ b/docs/devel/migration.txt
@@ -402,6 +402,16 @@ will now cause the transition from precopy to postcopy.
  It can be issued immediately after migration is started or any
  time later on.  Issuing it after the end of a migration is harmless.
  
+Blocktime is a postcopy live migration metric, intended to show

+how long the vCPU was in state of interruptable sleep due to pagefault.
+This value is calculated on destination side.
+To enable postcopy blocktime calculation, enter following command on 
destination
+monitor:
+
+migrate_set_capability postcopy-blocktime on
+
+Postcopy blocktime can be retrieved by query-migrate qmp command.
+
  Note: During the postcopy phase, the bandwidth limits set using
  migrate_set_speed is ignored (to avoid delaying requested pages that
  the destination is waiting for).
--
1.9.1


--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK









Re: [Qemu-devel] [PATCH v10 06/10] migration: add postcopy blocktime ctx into MigrationIncomingState

2017-09-21 Thread Alexey Perevalov

On 09/21/2017 01:16 PM, Dr. David Alan Gilbert wrote:

* Alexey Perevalov (a.pereva...@samsung.com) wrote:

This patch adds request to kernel space for UFFD_FEATURE_THREAD_ID,
in case when this feature is provided by kernel.

PostcopyBlocktimeContext is incapsulated inside postcopy-ram.c,
due to it's postcopy only feature.
Also it defines PostcopyBlocktimeContext's instance live time.
Information from PostcopyBlocktimeContext instance will be provided
much after postcopy migration end, instance of PostcopyBlocktimeContext
will live till QEMU exit, but part of it (vcpu_addr,
page_fault_vcpu_time) used only during calculation, will be released
when postcopy ended or failed.

To enable postcopy blocktime calculation on destination, need to request
proper capabiltiy (Patch for documentation will be at the tail of the patch
set).

As an example following command enable that capability, assume QEMU was
started with
-chardev socket,id=charmonitor,path=/var/lib/migrate-vm-monitor.sock
option to control it

[root@host]#printf "{\"execute\" : \"qmp_capabilities\"}\r\n \
{\"execute\": \"migrate-set-capabilities\" , \"arguments\":   {
\"capabilities\": [ { \"capability\": \"postcopy-blocktime\", \"state\":
true } ] } }" | nc -U /var/lib/migrate-vm-monitor.sock

Or just with HMP
(qemu) migrate_set_capability postcopy-blocktime on

Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
  migration/migration.h|  8 ++
  migration/postcopy-ram.c | 65 
  2 files changed, 73 insertions(+)

diff --git a/migration/migration.h b/migration/migration.h
index 56bf33c..770466b 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -21,6 +21,8 @@
  #include "qemu/coroutine_int.h"
  #include "hw/qdev.h"
  
+struct PostcopyBlocktimeContext;

+
  /* State for the incoming migration */
  struct MigrationIncomingState {
  QEMUFile *from_src_file;
@@ -58,6 +60,12 @@ struct MigrationIncomingState {
  /* The coroutine we should enter (back) after failover */
  Coroutine *migration_incoming_co;
  QemuSemaphore colo_incoming_sem;
+
+/*
+ * PostcopyBlocktimeContext to keep information for postcopy
+ * live migration, to calculate vCPU block time
+ * */
+struct PostcopyBlocktimeContext *blocktime_ctx;
  };
  
  MigrationIncomingState *migration_incoming_get_current(void);

diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index bec6c2c..cc78981 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -61,6 +61,58 @@ struct PostcopyDiscardState {
  #include 
  #include 
  
+typedef struct PostcopyBlocktimeContext {

+/* time when page fault initiated per vCPU */
+int64_t *page_fault_vcpu_time;
+/* page address per vCPU */
+uint64_t *vcpu_addr;
+int64_t total_blocktime;
+/* blocktime per vCPU */
+int64_t *vcpu_blocktime;
+/* point in time when last page fault was initiated */
+int64_t last_begin;
+/* number of vCPU are suspended */
+int smp_cpus_down;
+
+/*
+ * Handler for exit event, necessary for
+ * releasing whole blocktime_ctx
+ */
+Notifier exit_notifier;
+/*
+ * Handler for postcopy event, necessary for
+ * releasing unnecessary part of blocktime_ctx
+ */
+Notifier postcopy_notifier;

Is this actually used? It's just that...


+} PostcopyBlocktimeContext;
+
+static void destroy_blocktime_context(struct PostcopyBlocktimeContext *ctx)
+{
+g_free(ctx->page_fault_vcpu_time);
+g_free(ctx->vcpu_addr);
+g_free(ctx->vcpu_blocktime);
+g_free(ctx);
+}
+
+static void migration_exit_cb(Notifier *n, void *data)
+{
+PostcopyBlocktimeContext *ctx = container_of(n, PostcopyBlocktimeContext,
+ exit_notifier);
+destroy_blocktime_context(ctx);
+}
+
+static struct PostcopyBlocktimeContext *blocktime_context_new(void)
+{
+PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1);
+ctx->page_fault_vcpu_time = g_new0(int64_t, smp_cpus);
+ctx->vcpu_addr = g_new0(uint64_t, smp_cpus);
+ctx->vcpu_blocktime = g_new0(int64_t, smp_cpus);
+
+ctx->exit_notifier.notify = migration_exit_cb;
+qemu_add_exit_notifier(>exit_notifier);
+add_migration_state_change_notifier(>postcopy_notifier);

Patch 7 removes that line, and I don't see what puts it back;
and this line doesn't actually set up ctx->postcopy_notifier.

Other than that, it looks OK.

Thank you, I really changed my mind, and decided to keep
blocktime context (and all calculated values) till the
stop of VM, but not till the end of migration.

--
Best regards,
Alexey Perevalov


Dave


+return ctx;
+}
  
  /**

   * receive_ufd_features: check userfault fd features, to request only 
supported
@@ -153,6 +205,19 @@ static bool uf

Re: [Qemu-devel] [PATCH v10 01/10] userfault: update kernel header for UFFD_FEATURE_*

2017-09-21 Thread Alexey Perevalov

On 09/20/2017 09:43 PM, Dr. David Alan Gilbert wrote:

* Alexey Perevalov (a.pereva...@samsung.com) wrote:

This commit adds modification for UFFD_FEATURE_SIGBUS and
UFFD_FEATURE_THREAD_ID.

Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>

This should be replaced with just running the
scripts/update-linux-headers.sh
against a 4.14-rc1 checkout.

That can be done as a separate patch or the first patch
of this series.

Ok, in case of separate patch it's reasonably to
send modification for all headers.


--
Best regards,
Alexey Perevalov



Dave


---
  linux-headers/linux/userfaultfd.h | 16 +++-
  1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/linux-headers/linux/userfaultfd.h 
b/linux-headers/linux/userfaultfd.h
index 9701772..b43cf0d 100644
--- a/linux-headers/linux/userfaultfd.h
+++ b/linux-headers/linux/userfaultfd.h
@@ -23,7 +23,9 @@
   UFFD_FEATURE_EVENT_REMOVE |  \
   UFFD_FEATURE_EVENT_UNMAP |   \
   UFFD_FEATURE_MISSING_HUGETLBFS | \
-  UFFD_FEATURE_MISSING_SHMEM)
+  UFFD_FEATURE_MISSING_SHMEM | \
+  UFFD_FEATURE_SIGBUS |\
+  UFFD_FEATURE_THREAD_ID)
  #define UFFD_API_IOCTLS   \
((__u64)1 << _UFFDIO_REGISTER |   \
 (__u64)1 << _UFFDIO_UNREGISTER | \
@@ -78,6 +80,9 @@ struct uffd_msg {
struct {
__u64   flags;
__u64   address;
+   union {
+   __u32 ptid;
+   } feat;
} pagefault;
  
  		struct {

@@ -153,6 +158,13 @@ struct uffdio_api {
 * UFFD_FEATURE_MISSING_SHMEM works the same as
 * UFFD_FEATURE_MISSING_HUGETLBFS, but it applies to shmem
 * (i.e. tmpfs and other shmem based APIs).
+*
+* UFFD_FEATURE_SIGBUS feature means no page-fault
+* (UFFD_EVENT_PAGEFAULT) event will be delivered, instead
+* a SIGBUS signal will be sent to the faulting process.
+*
+* UFFD_FEATURE_THREAD_ID pid of the page faulted task_struct will
+* be returned, if feature is not requested 0 will be returned.
 */
  #define UFFD_FEATURE_PAGEFAULT_FLAG_WP(1<<0)
  #define UFFD_FEATURE_EVENT_FORK   (1<<1)
@@ -161,6 +173,8 @@ struct uffdio_api {
  #define UFFD_FEATURE_MISSING_HUGETLBFS(1<<4)
  #define UFFD_FEATURE_MISSING_SHMEM(1<<5)
  #define UFFD_FEATURE_EVENT_UNMAP  (1<<6)
+#define UFFD_FEATURE_SIGBUS(1<<7)
+#define UFFD_FEATURE_THREAD_ID (1<<8)
__u64 features;
  
  	__u64 ioctls;

--
1.9.1


--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK







[Qemu-devel] [PATCH v10 10/10] migration: add postcopy total blocktime into query-migrate

2017-09-19 Thread Alexey Perevalov
Postcopy total blocktime is available on destination side only.
But query-migrate was possible only for source. This patch
adds ability to call query-migrate on destination.
To be able to see postcopy blocktime, need to request postcopy-blocktime
capability.

The query-migrate command will show following sample result:
{"return":
"postcopy-vcpu-blocktime": [115, 100],
"status": "completed",
"postcopy-blocktime": 100
}}

postcopy_vcpu_blocktime contains list, where the first item is the first
vCPU in QEMU.

This patch has a drawback, it combines states of incoming and
outgoing migration. Ongoing migration state will overwrite incoming
state. Looks like better to separate query-migrate for incoming and
outgoing migration or add parameter to indicate type of migration.

Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 hmp.c| 15 +
 migration/migration.c| 42 +++
 migration/migration.h|  4 
 migration/postcopy-ram.c | 57 
 migration/trace-events   |  1 +
 qapi/migration.json  | 11 +-
 6 files changed, 125 insertions(+), 5 deletions(-)

diff --git a/hmp.c b/hmp.c
index 0fb2bc7..142f76e 100644
--- a/hmp.c
+++ b/hmp.c
@@ -264,6 +264,21 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
info->cpu_throttle_percentage);
 }
 
+if (info->has_postcopy_blocktime) {
+monitor_printf(mon, "postcopy blocktime: %" PRId64 "\n",
+   info->postcopy_blocktime);
+}
+
+if (info->has_postcopy_vcpu_blocktime) {
+Visitor *v;
+char *str;
+v = string_output_visitor_new(false, );
+visit_type_int64List(v, NULL, >postcopy_vcpu_blocktime, NULL);
+visit_complete(v, );
+monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str);
+g_free(str);
+visit_free(v);
+}
 qapi_free_MigrationInfo(info);
 qapi_free_MigrationCapabilityStatusList(caps);
 }
diff --git a/migration/migration.c b/migration/migration.c
index 4f029e8..e1d3248 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -528,14 +528,15 @@ static void populate_disk_info(MigrationInfo *info)
 }
 }
 
-MigrationInfo *qmp_query_migrate(Error **errp)
+static void fill_source_migration_info(MigrationInfo *info)
 {
-MigrationInfo *info = g_malloc0(sizeof(*info));
 MigrationState *s = migrate_get_current();
 
 switch (s->state) {
 case MIGRATION_STATUS_NONE:
 /* no migration has happened ever */
+/* do not overwrite destination migration status */
+return;
 break;
 case MIGRATION_STATUS_SETUP:
 info->has_status = true;
@@ -584,8 +585,6 @@ MigrationInfo *qmp_query_migrate(Error **errp)
 break;
 }
 info->status = s->state;
-
-return info;
 }
 
 /**
@@ -649,6 +648,41 @@ static bool migrate_caps_check(bool *cap_list,
 return true;
 }
 
+static void fill_destination_migration_info(MigrationInfo *info)
+{
+MigrationIncomingState *mis = migration_incoming_get_current();
+
+switch (mis->state) {
+case MIGRATION_STATUS_NONE:
+return;
+break;
+case MIGRATION_STATUS_SETUP:
+case MIGRATION_STATUS_CANCELLING:
+case MIGRATION_STATUS_CANCELLED:
+case MIGRATION_STATUS_ACTIVE:
+case MIGRATION_STATUS_POSTCOPY_ACTIVE:
+case MIGRATION_STATUS_FAILED:
+case MIGRATION_STATUS_COLO:
+info->has_status = true;
+break;
+case MIGRATION_STATUS_COMPLETED:
+info->has_status = true;
+fill_destination_postcopy_migration_info(info);
+break;
+}
+info->status = mis->state;
+}
+
+MigrationInfo *qmp_query_migrate(Error **errp)
+{
+MigrationInfo *info = g_malloc0(sizeof(*info));
+
+fill_destination_migration_info(info);
+fill_source_migration_info(info);
+
+return info;
+}
+
 void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
   Error **errp)
 {
diff --git a/migration/migration.h b/migration/migration.h
index 770466b..882a59b 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -70,6 +70,10 @@ struct MigrationIncomingState {
 
 MigrationIncomingState *migration_incoming_get_current(void);
 void migration_incoming_state_destroy(void);
+/*
+ * Functions to work with blocktime context
+ */
+void fill_destination_postcopy_migration_info(MigrationInfo *info);
 
 #define TYPE_MIGRATION "migration"
 
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 9a5133f..5fdbf1e 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -113,6 +113,55 @@ static struct PostcopyBlocktimeContext 
*blocktime_context_new(void)
 re

[Qemu-devel] [PATCH v10 05/10] migration: introduce postcopy-blocktime capability

2017-09-19 Thread Alexey Perevalov
Right now it could be used on destination side to
enable vCPU blocktime calculation for postcopy live migration.
vCPU blocktime - it's time since vCPU thread was put into
interruptible sleep, till memory page was copied and thread awake.

Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 migration/migration.c | 9 +
 migration/migration.h | 1 +
 qapi/migration.json   | 5 -
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/migration/migration.c b/migration/migration.c
index e820d47..4f029e8 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1380,6 +1380,15 @@ bool migrate_zero_blocks(void)
 return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS];
 }
 
+bool migrate_postcopy_blocktime(void)
+{
+MigrationState *s;
+
+s = migrate_get_current();
+
+return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME];
+}
+
 bool migrate_use_compression(void)
 {
 MigrationState *s;
diff --git a/migration/migration.h b/migration/migration.h
index 148c9fa..56bf33c 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -184,6 +184,7 @@ int migrate_compress_level(void);
 int migrate_compress_threads(void);
 int migrate_decompress_threads(void);
 bool migrate_use_events(void);
+bool migrate_postcopy_blocktime(void);
 
 /* Sending on the return path - generic and then for each message type */
 void migrate_send_rp_shut(MigrationIncomingState *mis,
diff --git a/qapi/migration.json b/qapi/migration.json
index ee2b3b8..2e4a15d 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -341,12 +341,15 @@
 # @return-path: If enabled, migration will use the return path even
 #   for precopy. (since 2.10)
 #
+# @postcopy-blocktime: Calculate downtime for postcopy live migration
+# (since 2.11)
+#
 # Since: 1.2
 ##
 { 'enum': 'MigrationCapability',
   'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram',
-   'block', 'return-path' ] }
+   'block', 'return-path', 'postcopy-blocktime' ] }
 
 ##
 # @MigrationCapabilityStatus:
-- 
1.9.1




[Qemu-devel] [PATCH v10 02/10] migration: pass MigrationIncomingState* into migration check functions

2017-09-19 Thread Alexey Perevalov
That tiny refactoring is necessary to be able to set
UFFD_FEATURE_THREAD_ID while requesting features, and then
to create downtime context in case when kernel supports it.

Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 migration/migration.c|  3 ++-
 migration/postcopy-ram.c | 10 +-
 migration/postcopy-ram.h |  2 +-
 migration/savevm.c   |  2 +-
 4 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 959e8ec..e820d47 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -603,6 +603,7 @@ static bool migrate_caps_check(bool *cap_list,
 {
 MigrationCapabilityStatusList *cap;
 bool old_postcopy_cap;
+MigrationIncomingState *mis = migration_incoming_get_current();
 
 old_postcopy_cap = cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM];
 
@@ -636,7 +637,7 @@ static bool migrate_caps_check(bool *cap_list,
  * special support.
  */
 if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) &&
-!postcopy_ram_supported_by_host()) {
+!postcopy_ram_supported_by_host(mis)) {
 /* postcopy_ram_supported_by_host will have emitted a more
  * detailed message
  */
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 7a414eb..4350dd0 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -61,7 +61,7 @@ struct PostcopyDiscardState {
 #include 
 #include 
 
-static bool ufd_version_check(int ufd)
+static bool ufd_version_check(int ufd, MigrationIncomingState *mis)
 {
 struct uffdio_api api_struct;
 uint64_t ioctl_mask;
@@ -124,7 +124,7 @@ static int test_ramblock_postcopiable(const char 
*block_name, void *host_addr,
  * normally fine since if the postcopy succeeds it gets turned back on at the
  * end.
  */
-bool postcopy_ram_supported_by_host(void)
+bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
 {
 long pagesize = getpagesize();
 int ufd = -1;
@@ -147,7 +147,7 @@ bool postcopy_ram_supported_by_host(void)
 }
 
 /* Version and features check */
-if (!ufd_version_check(ufd)) {
+if (!ufd_version_check(ufd, mis)) {
 goto out;
 }
 
@@ -523,7 +523,7 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
  * Although the host check already tested the API, we need to
  * do the check again as an ABI handshake on the new fd.
  */
-if (!ufd_version_check(mis->userfault_fd)) {
+if (!ufd_version_check(mis->userfault_fd, mis)) {
 return -1;
 }
 
@@ -677,7 +677,7 @@ void *postcopy_get_tmp_page(MigrationIncomingState *mis)
 
 #else
 /* No target OS support, stubs just fail */
-bool postcopy_ram_supported_by_host(void)
+bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
 {
 error_report("%s: No OS support", __func__);
 return false;
diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h
index 78a3591..77ea0fd 100644
--- a/migration/postcopy-ram.h
+++ b/migration/postcopy-ram.h
@@ -14,7 +14,7 @@
 #define QEMU_POSTCOPY_RAM_H
 
 /* Return true if the host supports everything we need to do postcopy-ram */
-bool postcopy_ram_supported_by_host(void);
+bool postcopy_ram_supported_by_host(MigrationIncomingState *mis);
 
 /*
  * Make all of RAM sensitive to accesses to areas that haven't yet been written
diff --git a/migration/savevm.c b/migration/savevm.c
index 7a55023..6ed6d57 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1352,7 +1352,7 @@ static int 
loadvm_postcopy_handle_advise(MigrationIncomingState *mis)
 return -1;
 }
 
-if (!postcopy_ram_supported_by_host()) {
+if (!postcopy_ram_supported_by_host(mis)) {
 postcopy_state_set(POSTCOPY_INCOMING_NONE);
 return -1;
 }
-- 
1.9.1




[Qemu-devel] [PATCH v10 01/10] userfault: update kernel header for UFFD_FEATURE_*

2017-09-19 Thread Alexey Perevalov
This commit adds modification for UFFD_FEATURE_SIGBUS and
UFFD_FEATURE_THREAD_ID.

Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 linux-headers/linux/userfaultfd.h | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/linux-headers/linux/userfaultfd.h 
b/linux-headers/linux/userfaultfd.h
index 9701772..b43cf0d 100644
--- a/linux-headers/linux/userfaultfd.h
+++ b/linux-headers/linux/userfaultfd.h
@@ -23,7 +23,9 @@
   UFFD_FEATURE_EVENT_REMOVE |  \
   UFFD_FEATURE_EVENT_UNMAP |   \
   UFFD_FEATURE_MISSING_HUGETLBFS | \
-  UFFD_FEATURE_MISSING_SHMEM)
+  UFFD_FEATURE_MISSING_SHMEM | \
+  UFFD_FEATURE_SIGBUS |\
+  UFFD_FEATURE_THREAD_ID)
 #define UFFD_API_IOCTLS\
((__u64)1 << _UFFDIO_REGISTER | \
 (__u64)1 << _UFFDIO_UNREGISTER |   \
@@ -78,6 +80,9 @@ struct uffd_msg {
struct {
__u64   flags;
__u64   address;
+   union {
+   __u32 ptid;
+   } feat;
} pagefault;
 
struct {
@@ -153,6 +158,13 @@ struct uffdio_api {
 * UFFD_FEATURE_MISSING_SHMEM works the same as
 * UFFD_FEATURE_MISSING_HUGETLBFS, but it applies to shmem
 * (i.e. tmpfs and other shmem based APIs).
+*
+* UFFD_FEATURE_SIGBUS feature means no page-fault
+* (UFFD_EVENT_PAGEFAULT) event will be delivered, instead
+* a SIGBUS signal will be sent to the faulting process.
+*
+* UFFD_FEATURE_THREAD_ID pid of the page faulted task_struct will
+* be returned, if feature is not requested 0 will be returned.
 */
 #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0)
 #define UFFD_FEATURE_EVENT_FORK(1<<1)
@@ -161,6 +173,8 @@ struct uffdio_api {
 #define UFFD_FEATURE_MISSING_HUGETLBFS (1<<4)
 #define UFFD_FEATURE_MISSING_SHMEM (1<<5)
 #define UFFD_FEATURE_EVENT_UNMAP   (1<<6)
+#define UFFD_FEATURE_SIGBUS(1<<7)
+#define UFFD_FEATURE_THREAD_ID (1<<8)
__u64 features;
 
__u64 ioctls;
-- 
1.9.1




[Qemu-devel] [PATCH v10 03/10] migration: fix hardcoded function name in error report

2017-09-19 Thread Alexey Perevalov
Reviewed-by: Juan Quintela <quint...@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 migration/postcopy-ram.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 4350dd0..a0e74db 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -69,7 +69,7 @@ static bool ufd_version_check(int ufd, MigrationIncomingState 
*mis)
 api_struct.api = UFFD_API;
 api_struct.features = 0;
 if (ioctl(ufd, UFFDIO_API, _struct)) {
-error_report("postcopy_ram_supported_by_host: UFFDIO_API failed: %s",
+error_report("%s: UFFDIO_API failed: %s", __func__,
  strerror(errno));
 return false;
 }
-- 
1.9.1




[Qemu-devel] [PATCH v10 07/10] migration: calculate vCPU blocktime on dst side

2017-09-19 Thread Alexey Perevalov
This patch provides blocktime calculation per vCPU,
as a summary and as a overlapped value for all vCPUs.

This approach was suggested by Peter Xu, as an improvements of
previous approch where QEMU kept tree with faulted page address and cpus bitmask
in it. Now QEMU is keeping array with faulted page address as value and vCPU
as index. It helps to find proper vCPU at UFFD_COPY time. Also it keeps
list for blocktime per vCPU (could be traced with page_fault_addr)

Blocktime will not calculated if postcopy_blocktime field of
MigrationIncomingState wasn't initialized.

Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 migration/postcopy-ram.c | 138 ++-
 migration/trace-events   |   5 +-
 2 files changed, 140 insertions(+), 3 deletions(-)

diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index cc78981..9a5133f 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -110,7 +110,6 @@ static struct PostcopyBlocktimeContext 
*blocktime_context_new(void)
 
 ctx->exit_notifier.notify = migration_exit_cb;
 qemu_add_exit_notifier(>exit_notifier);
-add_migration_state_change_notifier(>postcopy_notifier);
 return ctx;
 }
 
@@ -559,6 +558,136 @@ static int ram_block_enable_notify(const char 
*block_name, void *host_addr,
 return 0;
 }
 
+static int get_mem_fault_cpu_index(uint32_t pid)
+{
+CPUState *cpu_iter;
+
+CPU_FOREACH(cpu_iter) {
+if (cpu_iter->thread_id == pid) {
+trace_get_mem_fault_cpu_index(cpu_iter->cpu_index, pid);
+return cpu_iter->cpu_index;
+}
+}
+trace_get_mem_fault_cpu_index(-1, pid);
+return -1;
+}
+
+/*
+ * This function is being called when pagefault occurs. It
+ * tracks down vCPU blocking time.
+ *
+ * @addr: faulted host virtual address
+ * @ptid: faulted process thread id
+ * @rb: ramblock appropriate to addr
+ */
+static void mark_postcopy_blocktime_begin(uint64_t addr, uint32_t ptid,
+  RAMBlock *rb)
+{
+int cpu, already_received;
+MigrationIncomingState *mis = migration_incoming_get_current();
+PostcopyBlocktimeContext *dc = mis->blocktime_ctx;
+int64_t now_ms;
+
+if (!dc || ptid == 0) {
+return;
+}
+cpu = get_mem_fault_cpu_index(ptid);
+if (cpu < 0) {
+return;
+}
+
+now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+if (dc->vcpu_addr[cpu] == 0) {
+atomic_inc(>smp_cpus_down);
+}
+
+atomic_xchg__nocheck(>vcpu_addr[cpu], addr);
+atomic_xchg__nocheck(>last_begin, now_ms);
+atomic_xchg__nocheck(>page_fault_vcpu_time[cpu], now_ms);
+
+already_received = ramblock_recv_bitmap_test(rb, (void *)addr);
+if (already_received) {
+atomic_xchg__nocheck(>vcpu_addr[cpu], 0);
+atomic_xchg__nocheck(>page_fault_vcpu_time[cpu], 0);
+atomic_sub(>smp_cpus_down, 1);
+}
+trace_mark_postcopy_blocktime_begin(addr, dc, 
dc->page_fault_vcpu_time[cpu],
+cpu, already_received);
+}
+
+/*
+ *  This function just provide calculated blocktime per cpu and trace it.
+ *  Total blocktime is calculated in mark_postcopy_blocktime_end.
+ *
+ *
+ * Assume we have 3 CPU
+ *
+ *  S1E1   S1   E1
+ * -***xxx***> CPU1
+ *
+ * S2E2
+ * xxx---> CPU2
+ *
+ * S3E3
+ * xxx---> CPU3
+ *
+ * We have sequence S1,S2,E1,S3,S1,E2,E3,E1
+ * S2,E1 - doesn't match condition due to sequence S1,S2,E1 doesn't include 
CPU3
+ * S3,S1,E2 - sequence includes all CPUs, in this case overlap will be S1,E2 -
+ *it's a part of total blocktime.
+ * S1 - here is last_begin
+ * Legend of the picture is following:
+ *  * - means blocktime per vCPU
+ *  x - means overlapped blocktime (total blocktime)
+ *
+ * @addr: host virtual address
+ */
+static void mark_postcopy_blocktime_end(uint64_t addr)
+{
+MigrationIncomingState *mis = migration_incoming_get_current();
+PostcopyBlocktimeContext *dc = mis->blocktime_ctx;
+int i, affected_cpu = 0;
+int64_t now_ms;
+bool vcpu_total_blocktime = false;
+
+if (!dc) {
+return;
+}
+
+now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+
+/* lookup cpu, to clear it,
+ * that algorithm looks straighforward, but it's not
+ * optimal, more optimal algorithm is keeping tree or hash
+ * where key is address value is a list of  */
+for (i = 0; i < smp_cpus; i++) {
+uint64_t vcpu_blocktime = 0;
+if (atomic_fetch_add(>vcpu_addr[i], 0) != addr) {
+continue;
+}
+atomic_xchg__nocheck(>vcpu

[Qemu-devel] [PATCH v10 04/10] migration: split ufd_version_check onto receive/request features part

2017-09-19 Thread Alexey Perevalov
This modification is necessary for userfault fd features which are
required to be requested from userspace.
UFFD_FEATURE_THREAD_ID is a one of such "on demand" feature, which will
be introduced in the next patch.

QEMU have to use separate userfault file descriptor, due to
userfault context has internal state, and after first call of
ioctl UFFD_API it changes its state to UFFD_STATE_RUNNING (in case of
success), but kernel while handling ioctl UFFD_API expects UFFD_STATE_WAIT_API.
So only one ioctl with UFFD_API is possible per ufd.

Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 migration/postcopy-ram.c | 94 
 1 file changed, 88 insertions(+), 6 deletions(-)

diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index a0e74db..bec6c2c 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -61,16 +61,67 @@ struct PostcopyDiscardState {
 #include 
 #include 
 
-static bool ufd_version_check(int ufd, MigrationIncomingState *mis)
+
+/**
+ * receive_ufd_features: check userfault fd features, to request only supported
+ * features in the future.
+ *
+ * Returns: true on success
+ *
+ * __NR_userfaultfd - should be checked before
+ *  @features: out parameter will contain uffdio_api.features provided by 
kernel
+ *  in case of success
+ */
+static bool receive_ufd_features(uint64_t *features)
 {
-struct uffdio_api api_struct;
-uint64_t ioctl_mask;
+struct uffdio_api api_struct = {0};
+int ufd;
+bool ret = true;
+
+/* if we are here __NR_userfaultfd should exists */
+ufd = syscall(__NR_userfaultfd, O_CLOEXEC);
+if (ufd == -1) {
+error_report("%s: syscall __NR_userfaultfd failed: %s", __func__,
+ strerror(errno));
+return false;
+}
 
+/* ask features */
 api_struct.api = UFFD_API;
 api_struct.features = 0;
 if (ioctl(ufd, UFFDIO_API, _struct)) {
 error_report("%s: UFFDIO_API failed: %s", __func__,
  strerror(errno));
+ret = false;
+goto release_ufd;
+}
+
+*features = api_struct.features;
+
+release_ufd:
+close(ufd);
+return ret;
+}
+
+/**
+ * request_ufd_features: this function should be called only once on a newly
+ * opened ufd, subsequent calls will lead to error.
+ *
+ * Returns: true on succes
+ *
+ * @ufd: fd obtained from userfaultfd syscall
+ * @features: bit mask see UFFD_API_FEATURES
+ */
+static bool request_ufd_features(int ufd, uint64_t features)
+{
+struct uffdio_api api_struct = {0};
+uint64_t ioctl_mask;
+
+api_struct.api = UFFD_API;
+api_struct.features = features;
+if (ioctl(ufd, UFFDIO_API, _struct)) {
+error_report("%s failed: UFFDIO_API failed: %s", __func__,
+ strerror(errno));
 return false;
 }
 
@@ -82,11 +133,42 @@ static bool ufd_version_check(int ufd, 
MigrationIncomingState *mis)
 return false;
 }
 
+return true;
+}
+
+static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis)
+{
+uint64_t asked_features = 0;
+static uint64_t supported_features;
+
+/*
+ * it's not possible to
+ * request UFFD_API twice per one fd
+ * userfault fd features is persistent
+ */
+if (!supported_features) {
+if (!receive_ufd_features(_features)) {
+error_report("%s failed", __func__);
+return false;
+}
+}
+
+/*
+ * request features, even if asked_features is 0, due to
+ * kernel expects UFFD_API before UFFDIO_REGISTER, per
+ * userfault file descriptor
+ */
+if (!request_ufd_features(ufd, asked_features)) {
+error_report("%s failed: features %" PRIu64, __func__,
+ asked_features);
+return false;
+}
+
 if (getpagesize() != ram_pagesize_summary()) {
 bool have_hp = false;
 /* We've got a huge page */
 #ifdef UFFD_FEATURE_MISSING_HUGETLBFS
-have_hp = api_struct.features & UFFD_FEATURE_MISSING_HUGETLBFS;
+have_hp = supported_features & UFFD_FEATURE_MISSING_HUGETLBFS;
 #endif
 if (!have_hp) {
 error_report("Userfault on this host does not support huge pages");
@@ -147,7 +229,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState 
*mis)
 }
 
 /* Version and features check */
-if (!ufd_version_check(ufd, mis)) {
+if (!ufd_check_and_apply(ufd, mis)) {
 goto out;
 }
 
@@ -523,7 +605,7 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
  * Although the host check already tested the API, we need to
  * do the check again as an ABI handshake on the new fd.
  */
-if (!ufd_version_check(mis->userfault_fd, mis)) {
+if (!ufd_check_and_apply(mis->userfault_fd, mis)) {
 return -1;
 }
 
-- 
1.9.1




[Qemu-devel] [PATCH v10 06/10] migration: add postcopy blocktime ctx into MigrationIncomingState

2017-09-19 Thread Alexey Perevalov
This patch adds request to kernel space for UFFD_FEATURE_THREAD_ID,
in case when this feature is provided by kernel.

PostcopyBlocktimeContext is incapsulated inside postcopy-ram.c,
due to it's postcopy only feature.
Also it defines PostcopyBlocktimeContext's instance live time.
Information from PostcopyBlocktimeContext instance will be provided
much after postcopy migration end, instance of PostcopyBlocktimeContext
will live till QEMU exit, but part of it (vcpu_addr,
page_fault_vcpu_time) used only during calculation, will be released
when postcopy ended or failed.

To enable postcopy blocktime calculation on destination, need to request
proper capabiltiy (Patch for documentation will be at the tail of the patch
set).

As an example following command enable that capability, assume QEMU was
started with
-chardev socket,id=charmonitor,path=/var/lib/migrate-vm-monitor.sock
option to control it

[root@host]#printf "{\"execute\" : \"qmp_capabilities\"}\r\n \
{\"execute\": \"migrate-set-capabilities\" , \"arguments\":   {
\"capabilities\": [ { \"capability\": \"postcopy-blocktime\", \"state\":
true } ] } }" | nc -U /var/lib/migrate-vm-monitor.sock

Or just with HMP
(qemu) migrate_set_capability postcopy-blocktime on

Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 migration/migration.h|  8 ++
 migration/postcopy-ram.c | 65 
 2 files changed, 73 insertions(+)

diff --git a/migration/migration.h b/migration/migration.h
index 56bf33c..770466b 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -21,6 +21,8 @@
 #include "qemu/coroutine_int.h"
 #include "hw/qdev.h"
 
+struct PostcopyBlocktimeContext;
+
 /* State for the incoming migration */
 struct MigrationIncomingState {
 QEMUFile *from_src_file;
@@ -58,6 +60,12 @@ struct MigrationIncomingState {
 /* The coroutine we should enter (back) after failover */
 Coroutine *migration_incoming_co;
 QemuSemaphore colo_incoming_sem;
+
+/*
+ * PostcopyBlocktimeContext to keep information for postcopy
+ * live migration, to calculate vCPU block time
+ * */
+struct PostcopyBlocktimeContext *blocktime_ctx;
 };
 
 MigrationIncomingState *migration_incoming_get_current(void);
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index bec6c2c..cc78981 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -61,6 +61,58 @@ struct PostcopyDiscardState {
 #include 
 #include 
 
+typedef struct PostcopyBlocktimeContext {
+/* time when page fault initiated per vCPU */
+int64_t *page_fault_vcpu_time;
+/* page address per vCPU */
+uint64_t *vcpu_addr;
+int64_t total_blocktime;
+/* blocktime per vCPU */
+int64_t *vcpu_blocktime;
+/* point in time when last page fault was initiated */
+int64_t last_begin;
+/* number of vCPU are suspended */
+int smp_cpus_down;
+
+/*
+ * Handler for exit event, necessary for
+ * releasing whole blocktime_ctx
+ */
+Notifier exit_notifier;
+/*
+ * Handler for postcopy event, necessary for
+ * releasing unnecessary part of blocktime_ctx
+ */
+Notifier postcopy_notifier;
+} PostcopyBlocktimeContext;
+
+static void destroy_blocktime_context(struct PostcopyBlocktimeContext *ctx)
+{
+g_free(ctx->page_fault_vcpu_time);
+g_free(ctx->vcpu_addr);
+g_free(ctx->vcpu_blocktime);
+g_free(ctx);
+}
+
+static void migration_exit_cb(Notifier *n, void *data)
+{
+PostcopyBlocktimeContext *ctx = container_of(n, PostcopyBlocktimeContext,
+ exit_notifier);
+destroy_blocktime_context(ctx);
+}
+
+static struct PostcopyBlocktimeContext *blocktime_context_new(void)
+{
+PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1);
+ctx->page_fault_vcpu_time = g_new0(int64_t, smp_cpus);
+ctx->vcpu_addr = g_new0(uint64_t, smp_cpus);
+ctx->vcpu_blocktime = g_new0(int64_t, smp_cpus);
+
+ctx->exit_notifier.notify = migration_exit_cb;
+qemu_add_exit_notifier(>exit_notifier);
+add_migration_state_change_notifier(>postcopy_notifier);
+return ctx;
+}
 
 /**
  * receive_ufd_features: check userfault fd features, to request only supported
@@ -153,6 +205,19 @@ static bool ufd_check_and_apply(int ufd, 
MigrationIncomingState *mis)
 }
 }
 
+#ifdef UFFD_FEATURE_THREAD_ID
+if (migrate_postcopy_blocktime() && mis &&
+UFFD_FEATURE_THREAD_ID & supported_features) {
+/* kernel supports that feature */
+/* don't create blocktime_context if it exists */
+if (!mis->blocktime_ctx) {
+mis->blocktime_ctx = blocktime_context_new();
+}
+
+asked_features |= UFFD_FEATURE_THREAD_ID;
+}
+#endif
+
 /*
  * request features, even if asked_features is 0, due to
  * kernel expects UFFD_API before UFFDIO_REGISTER, per
-- 
1.9.1




[Qemu-devel] [PATCH v10 08/10] migration: postcopy_blocktime documentation

2017-09-19 Thread Alexey Perevalov
Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 docs/devel/migration.txt | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/docs/devel/migration.txt b/docs/devel/migration.txt
index 1b940a8..4b625ca 100644
--- a/docs/devel/migration.txt
+++ b/docs/devel/migration.txt
@@ -402,6 +402,16 @@ will now cause the transition from precopy to postcopy.
 It can be issued immediately after migration is started or any
 time later on.  Issuing it after the end of a migration is harmless.
 
+Blocktime is a postcopy live migration metric, intended to show
+how long the vCPU was in state of interruptable sleep due to pagefault.
+This value is calculated on destination side.
+To enable postcopy blocktime calculation, enter following command on 
destination
+monitor:
+
+migrate_set_capability postcopy-blocktime on
+
+Postcopy blocktime can be retrieved by query-migrate qmp command.
+
 Note: During the postcopy phase, the bandwidth limits set using
 migrate_set_speed is ignored (to avoid delaying requested pages that
 the destination is waiting for).
-- 
1.9.1




[Qemu-devel] [PATCH v10 00/10] calculate blocktime for postcopy live migration

2017-09-19 Thread Alexey Perevalov
This is 10th version.

The rationale for that idea is following:
vCPU could suspend during postcopy live migration until faulted
page is not copied into kernel. Downtime on source side it's a value -
time interval since source turn vCPU off, till destination start runnig
vCPU. But that value was proper value for precopy migration it really shows
amount of time when vCPU is down. But not for postcopy migration, because
several vCPU threads could susppend after vCPU was started. That is important
to estimate packet drop for SDN software.

(V9 -> V10)
- rebase
- patch "update kernel header for UFFD_FEATURE_*" has changed,
and was generated by  scripts/update-linux-headers.sh as David suggested. 


(V8 -> V9)
- rebase
- traces

(V7 -> V8)
- just one comma in
"migration: fix hardcoded function name in error report"
It was really missed, but fixed in futher patch.

(V6 -> V7)
- copied bitmap was placed into RAMBlock as another migration
related bitmaps.
- Ordering of mark_postcopy_blocktime_end call and ordering
of checking copied bitmap were changed.
- linewrap style defects
- new patch "postcopy_place_page factoring out"
- postcopy_ram_supported_by_host accepts
MigrationIncomingState in qmp_migrate_set_capabilities
- minor fixes of documentation. 
and huge description of get_postcopy_total_blocktime was
moved. Davids comment.

(V5 -> V6)
- blocktime was added into hmp command. Comment from David.
- bitmap for copied pages was added as well as check in *_begin/_end
functions. Patch uses just introduced RAMBLOCK_FOREACH. Comment from David.
- description of receive_ufd_features/request_ufd_features. Comment from 
David.
- commit message headers/@since references were modified. Comment from Eric.
- also typos in documentation. Comment from Eric.
- style and description of field in MigrationInfo. Comment from Eric.
- ufd_check_and_apply (former ufd_version_check) is calling twice,
so my previous patch contained double allocation of blocktime context and
as a result memory leak. In this patch series it was fixed.

(V4 -> V5)
- fill_destination_postcopy_migration_info empty stub was missed for none 
linux
build

(V3 -> V4)
- get rid of Downtime as a name for vCPU waiting time during postcopy 
migration
- PostcopyBlocktimeContext renamed (it was just BlocktimeContext)
- atomic operations are used for dealing with fields of 
PostcopyBlocktimeContext
affected in both threads.
- hardcoded function names in error_report were replaced to %s and __line__
- this patch set includes postcopy-downtime capability, but it used on
destination, coupled with not possibility to return calculated downtime back
to source to show it in query-migrate, it looks like a big trade off
- UFFD_API have to be sent notwithstanding need or not to ask kernel
for a feature, due to kernel expects it in any case (see patch comment)
- postcopy_downtime included into query-migrate output
- also this patch set includes trivial fix
migration: fix hardcoded function name in error report
maybe that is a candidate for qemu-trivial mailing list, but I already
sent "migration: Fixed code style" and it was unclaimed.

(V2 -> V3)
- Downtime calculation approach was changed, thanks to Peter Xu
- Due to previous point no more need to keep GTree as well as bitmap of 
cpus.
So glib changes aren't included in this patch set, it could be resent in
another patch set, if it will be a good reason for it.
- No procfs traces in this patchset, if somebody wants it, you could get it
from patchwork site to track down page fault initiators.
- UFFD_FEATURE_THREAD_ID is requesting only when kernel supports it
- It doesn't send back the downtime, just trace it

This patch set is based on commit
[PATCH v9 0/3] Add bitmap for received pages in postcopy migration

Both patch sets were rebased on commit a9158a5cba955b79d580a252cc58ff44d154e370


AlexeyaPerevalov (10):
  userfault: update kernel header for UFFD_FEATURE_*
  migration: pass MigrationIncomingState* into migration check functions
  migration: fix hardcoded function name in error report
  migration: split ufd_version_check onto receive/request features part
  migration: introduce postcopy-blocktime capability
  migration: add postcopy blocktime ctx into MigrationIncomingState
  migration: calculate vCPU blocktime on dst side
  migration: postcopy_blocktime documentation
  migration: add blocktime calculation into postcopy-test
  migration: add postcopy total blocktime into query-migrate

 docs/devel/migration.txt  |  10 ++
 hmp.c |  15 ++
 linux-headers/linux/userfaultfd.h |  16 +-
 migration/migration.c |  54 +-
 migration/migration.h |  13 ++
 migration/postcopy-ram.c  | 358 --
 migration/postcopy-ram.h  |   2 +-
 migration/savevm.c|   2 +-
 

[Qemu-devel] [PATCH v10 09/10] migration: add blocktime calculation into postcopy-test

2017-09-19 Thread Alexey Perevalov
This patch just requests blocktime calculation, but doesn't
add any facility to check or show it.

Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 tests/postcopy-test.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/tests/postcopy-test.c b/tests/postcopy-test.c
index 8142f2a..1db5359 100644
--- a/tests/postcopy-test.c
+++ b/tests/postcopy-test.c
@@ -425,6 +425,17 @@ static void test_migrate(void)
 g_assert(qdict_haskey(rsp, "return"));
 QDECREF(rsp);
 
+#ifdef UFFD_FEATURE_THREAD_ID
+global_qtest = to;
+rsp = qmp("{ 'execute': 'migrate-set-capabilities',"
+  "'arguments': { "
+  "'capabilities': [ {"
+  "'capability': 'postcopy-blocktime',"
+  "'state': true } ] } }");
+g_assert(qdict_haskey(rsp, "return"));
+QDECREF(rsp);
+#endif
+
 /* We want to pick a speed slow enough that the test completes
  * quickly, but that it doesn't complete precopy even on a slow
  * machine, so also set the downtime.
@@ -441,7 +452,6 @@ static void test_migrate(void)
 g_assert(qdict_haskey(rsp, "return"));
 QDECREF(rsp);
 
-
 /* Wait for the first serial output from the source */
 wait_for_serial("src_serial");
 
-- 
1.9.1




Re: [Qemu-devel] [PATCH v9 0/8] calculate blocktime for postcopy live migration

2017-09-18 Thread Alexey Perevalov

On 09/18/2017 02:15 PM, Dr. David Alan Gilbert wrote:

* Alexey Perevalov (a.pereva...@samsung.com) wrote:

This is 9th version.

The rationale for that idea is following:
vCPU could suspend during postcopy live migration until faulted
page is not copied into kernel. Downtime on source side it's a value -
time interval since source turn vCPU off, till destination start runnig
vCPU. But that value was proper value for precopy migration it really shows
amount of time when vCPU is down. But not for postcopy migration, because
several vCPU threads could susppend after vCPU was started. That is important
to estimate packet drop for SDN software.

Hi Alexey,
   I see that the UFFD_FEATURE_THREAD_ID has landed in kernel v4.14-rc1
over the weekend, so it's probably time to reheat this patchset.

   I think you should be able to generate a first patch by running
   scripts/update-linux-headers.sh

Hi David,
ok, I'll resend it tomorrow,
I also added set capability postcopy-blocktime into tests/postcopy-test.c,
but I don't check the result of the qmp there,
I added it just to enable and test code path, is it ok for you?


Dave


(V8 -> V9)
 - rebase
 - traces

(V7 -> V8)
 - just one comma in
"migration: fix hardcoded function name in error report"
It was really missed, but fixed in futher patch.

(V6 -> V7)
 - copied bitmap was placed into RAMBlock as another migration
related bitmaps.
 - Ordering of mark_postcopy_blocktime_end call and ordering
of checking copied bitmap were changed.
 - linewrap style defects
 - new patch "postcopy_place_page factoring out"
 - postcopy_ram_supported_by_host accepts
MigrationIncomingState in qmp_migrate_set_capabilities
 - minor fixes of documentation.
 and huge description of get_postcopy_total_blocktime was
moved. Davids comment.

(V5 -> V6)
 - blocktime was added into hmp command. Comment from David.
 - bitmap for copied pages was added as well as check in *_begin/_end
functions. Patch uses just introduced RAMBLOCK_FOREACH. Comment from David.
 - description of receive_ufd_features/request_ufd_features. Comment from 
David.
 - commit message headers/@since references were modified. Comment from 
Eric.
 - also typos in documentation. Comment from Eric.
 - style and description of field in MigrationInfo. Comment from Eric.
 - ufd_check_and_apply (former ufd_version_check) is calling twice,
so my previous patch contained double allocation of blocktime context and
as a result memory leak. In this patch series it was fixed.

(V4 -> V5)
 - fill_destination_postcopy_migration_info empty stub was missed for none 
linux
build

(V3 -> V4)
 - get rid of Downtime as a name for vCPU waiting time during postcopy 
migration
 - PostcopyBlocktimeContext renamed (it was just BlocktimeContext)
 - atomic operations are used for dealing with fields of 
PostcopyBlocktimeContext
affected in both threads.
 - hardcoded function names in error_report were replaced to %s and __line__
 - this patch set includes postcopy-downtime capability, but it used on
destination, coupled with not possibility to return calculated downtime back
to source to show it in query-migrate, it looks like a big trade off
 - UFFD_API have to be sent notwithstanding need or not to ask kernel
for a feature, due to kernel expects it in any case (see patch comment)
 - postcopy_downtime included into query-migrate output
 - also this patch set includes trivial fix
migration: fix hardcoded function name in error report
maybe that is a candidate for qemu-trivial mailing list, but I already
sent "migration: Fixed code style" and it was unclaimed.

(V2 -> V3)
 - Downtime calculation approach was changed, thanks to Peter Xu
 - Due to previous point no more need to keep GTree as well as bitmap of 
cpus.
So glib changes aren't included in this patch set, it could be resent in
another patch set, if it will be a good reason for it.
 - No procfs traces in this patchset, if somebody wants it, you could get it
from patchwork site to track down page fault initiators.
 - UFFD_FEATURE_THREAD_ID is requesting only when kernel supports it
 - It doesn't send back the downtime, just trace it

This patch set is based on commit
[PATCH v3 0/3] Add bitmap for received pages in postcopy migration


Alexey Perevalov (8):
   userfault: add pid into uffd_msg & update UFFD_FEATURE_*
   migration: pass MigrationIncomingState* into migration check functions
   migration: fix hardcoded function name in error report
   migration: split ufd_version_check onto receive/request features part
   migration: introduce postcopy-blocktime capability
   migration: add postcopy blocktime ctx into MigrationIncomingState
   migration: calculate vCPU blocktime on dst side
   migration: postcopy_blocktime documentation

  docs/devel/migration.txt  |  10 ++
  linux-headers/linux/userfaul

Re: [Qemu-devel] [RFC v2 00/32] postcopy+vhost-user/shared ram

2017-09-01 Thread Alexey Perevalov

Hello David,

You wrote in previous version:


We've had a postcopy migrate work now, with a few hacks we're still
cleaning up, both on vhost-user-bridge and dpdk; so I'll get this
updated and reposted.


I want to know more about DPDK work, do you know, is somebody assigned to that 
task?



On 08/24/2017 10:26 PM, Dr. David Alan Gilbert (git) wrote:

From: "Dr. David Alan Gilbert" <dgilb...@redhat.com>

Hi,
   This is a RFC/WIP series that enables postcopy migration
with shared memory to a vhost-user process.
It's based off current-head + Alexey's bitmap series

It's tested with vhost-user-bridge and a dpdk (modified by Maxime
that will get posted separately) - both very lightly.

It's still got a few very rough edges, but it succesfully migrates
with both normal and huge pages (2M).

The major difference over v1 is that there's a set of code
that merges vhost regions together on the qemu side so that
we get a single hugepage region on the PC spanning the 640k
hole (the hole hopefully isn't accessed by the client,
but the client used to align around it anyway).

It's also got a lot of cleanups from the comments from v1
but there's still a few things that need work.
In particular, there's still the hack around qemu waiting
for the set_mem_table to come back; I also worry what would
happen if a set-mem-table was triggered during a migrate;
I suspect it would break badly.

One problem that didn't cause a problem was madvises for hugepages;
because we register userfault directly after mmap'ing the
region in the client, we have no pages mapped and hence
the madvise's/fallocate's are fortunately not compulsary.
Still, I'd like a way to do it, it would feel safer.

A copy of this code, based off the current 2.10.0-rc4
together with Alexey's bitmap code is available here:
 https://github.com/dagrh/qemu/tree/vhost-wipv2

Dave

Dr. David Alan Gilbert (32):
   vhu: vu_queue_started
   vhub: Only process received packets on started queues
   migrate: Update ram_block_discard_range for shared
   qemu_ram_block_host_offset
   migration/ram: ramblock_recv_bitmap_test_byte_offset
   postcopy: use UFFDIO_ZEROPAGE only when available
   postcopy: Add notifier chain
   postcopy: Add vhost-user flag for postcopy and check it
   vhost-user: Add 'VHOST_USER_POSTCOPY_ADVISE' message
   vhub: Support sending fds back to qemu
   vhub: Open userfaultfd
   postcopy: Allow registering of fd handler
   vhost+postcopy: Register shared ufd with postcopy
   vhost+postcopy: Transmit 'listen' to client
   vhost+postcopy: Register new regions with the ufd
   vhost+postcopy: Send address back to qemu
   vhost+postcopy: Stash RAMBlock and offset
   vhost+postcopy: Send requests to source for shared pages
   vhost+postcopy: Resolve client address
   postcopy: wake shared
   postcopy: postcopy_notify_shared_wake
   vhost+postcopy: Add vhost waker
   vhost+postcopy: Call wakeups
   vub+postcopy: madvises
   vhost+postcopy: Lock around set_mem_table
   vhost: Add VHOST_USER_POSTCOPY_END message
   vhost+postcopy: Wire up POSTCOPY_END notify
   postcopy: Allow shared memory
   vhost-user: Claim support for postcopy
   vhost: Merge neighbouring hugepage regions where appropriate
   vhost: Don't break merged regions on small remove/non-adds
   postcopy shared docs

  contrib/libvhost-user/libvhost-user.c | 226 -
  contrib/libvhost-user/libvhost-user.h |  22 ++-
  docs/devel/migration.txt  |  39 
  docs/interop/vhost-user.txt   |  39 
  exec.c|  60 --
  hw/virtio/trace-events|  27 +++
  hw/virtio/vhost-user.c| 326 +-
  hw/virtio/vhost.c | 121 +++-
  include/exec/cpu-common.h |   4 +
  migration/migration.c |   3 +
  migration/migration.h |   4 +
  migration/postcopy-ram.c  | 359 +++---
  migration/postcopy-ram.h  |  69 +++
  migration/ram.c   |   5 +
  migration/ram.h   |   1 +
  migration/savevm.c|  13 ++
  migration/trace-events|   6 +
  tests/vhost-user-bridge.c |   1 +
  trace-events  |   3 +
  vl.c  |   2 +
  20 files changed, 1241 insertions(+), 89 deletions(-)



--
Best regards,
Alexey Perevalov



Re: [Qemu-devel] [RFC 23/29] vub+postcopy: madvises

2017-08-09 Thread Alexey Perevalov

On 08/08/2017 08:06 PM, Dr. David Alan Gilbert wrote:

* Alexey Perevalov (a.pereva...@samsung.com) wrote:

On 06/28/2017 10:00 PM, Dr. David Alan Gilbert (git) wrote:

From: "Dr. David Alan Gilbert" <dgilb...@redhat.com>

Clear the area and turn off THP.

Signed-off-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
---
   contrib/libvhost-user/libvhost-user.c | 32 ++--
   1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/contrib/libvhost-user/libvhost-user.c 
b/contrib/libvhost-user/libvhost-user.c
index 0658b6e847..ceddeac74f 100644
--- a/contrib/libvhost-user/libvhost-user.c
+++ b/contrib/libvhost-user/libvhost-user.c
@@ -451,11 +451,39 @@ vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg)
   }
   if (dev->postcopy_listening) {
+int ret;
   /* We should already have an open ufd need to mark each memory
* range as ufd.
- * Note: Do we need any madvises? Well it's not been accessed
- * yet, still probably need no THP to be safe, discard to be safe?
*/
+
+/* Discard any mapping we have here; note I can't use MADV_REMOVE
+ * or fallocate to make the hole since I don't want to lose
+ * data that's already arrived in the shared process.
+ * TODO: How to do hugepage
+ */

Hi, David, frankly saying, I stuck with my solution, and I have also another
issues,
but here I could suggest solution for hugepages. I think we could transmit a
received pages
bitmap in VHOST_USER_SET_MEM_TABLE (VhostUserMemoryRegion), but it will
raise a compatibility issue,
or introduce special message type for that and send it before
VHOST_USER_SET_MEM_TABLE.
So it will be  possible to do fallocate on received bitmap basis, just skip
already copied pages.
If you wish, I could send patches, rebased on yours, for doing it.

What we found works is that actually we don't need to do a discard -
since we've only just done the mmap of the arena, nothing will be
occupying it on the shared client, so we don't need to discard.

Looks like yes, I checked on kernel from Andrea's git,
there is any more EEXIST error in case when client doesn't
fallocate.



We've had a postcopy migrate work now, with a few hacks we're still
cleaning up, both on vhost-user-bridge and dpdk; so I'll get this
updated and reposted.

In you patch series vring is disabling in case of VHOST_USER_GET_VRING_BASE.
It's being called when vhost-user server want's to stop vring.
QEMU is enabling vring as soon as virtual machine is started, so I 
didn't see

explicit vring disabling for migrating VRING.
So migrating VRING is protected just by uffd_register, isn't it? And PMD 
thread (any
vhost-user thread which accessing migrating VRING) will wait page 
copying in this case,

right?



Dave


+ret = madvise((void *)dev_region->mmap_addr,
+  dev_region->size + dev_region->mmap_offset,
+  MADV_DONTNEED);
+if (ret) {
+fprintf(stderr,
+"%s: Failed to madvise(DONTNEED) region %d: %s\n",
+__func__, i, strerror(errno));
+}
+/* Turn off transparent hugepages so we dont get lose wakeups
+ * in neighbouring pages.
+ * TODO: Turn this backon later.
+ */
+ret = madvise((void *)dev_region->mmap_addr,
+  dev_region->size + dev_region->mmap_offset,
+  MADV_NOHUGEPAGE);
+if (ret) {
+/* Note: This can happen legally on kernels that are configured
+ * without madvise'able hugepages
+ */
+fprintf(stderr,
+"%s: Failed to madvise(NOHUGEPAGE) region %d: %s\n",
+__func__, i, strerror(errno));
+}
   struct uffdio_register reg_struct;
   /* Note: We might need to go back to using mmap_addr and
* len + mmap_offset for * huge pages, but then we do hope not to


--
Best regards,
Alexey Perevalov

--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK





--
Best regards,
Alexey Perevalov



Re: [Qemu-devel] [RFC 23/29] vub+postcopy: madvises

2017-08-06 Thread Alexey Perevalov

On 06/28/2017 10:00 PM, Dr. David Alan Gilbert (git) wrote:

From: "Dr. David Alan Gilbert" <dgilb...@redhat.com>

Clear the area and turn off THP.

Signed-off-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
---
  contrib/libvhost-user/libvhost-user.c | 32 ++--
  1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/contrib/libvhost-user/libvhost-user.c 
b/contrib/libvhost-user/libvhost-user.c
index 0658b6e847..ceddeac74f 100644
--- a/contrib/libvhost-user/libvhost-user.c
+++ b/contrib/libvhost-user/libvhost-user.c
@@ -451,11 +451,39 @@ vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg)
  }
  
  if (dev->postcopy_listening) {

+int ret;
  /* We should already have an open ufd need to mark each memory
   * range as ufd.
- * Note: Do we need any madvises? Well it's not been accessed
- * yet, still probably need no THP to be safe, discard to be safe?
   */
+
+/* Discard any mapping we have here; note I can't use MADV_REMOVE
+ * or fallocate to make the hole since I don't want to lose
+ * data that's already arrived in the shared process.
+ * TODO: How to do hugepage
+ */
Hi, David, frankly saying, I stuck with my solution, and I have also 
another issues,
but here I could suggest solution for hugepages. I think we could 
transmit a received pages
bitmap in VHOST_USER_SET_MEM_TABLE (VhostUserMemoryRegion), but it will 
raise a compatibility issue,
or introduce special message type for that and send it before 
VHOST_USER_SET_MEM_TABLE.
So it will be  possible to do fallocate on received bitmap basis, just 
skip already copied pages.

If you wish, I could send patches, rebased on yours, for doing it.


+ret = madvise((void *)dev_region->mmap_addr,
+  dev_region->size + dev_region->mmap_offset,
+  MADV_DONTNEED);
+if (ret) {
+fprintf(stderr,
+"%s: Failed to madvise(DONTNEED) region %d: %s\n",
+__func__, i, strerror(errno));
+}
+/* Turn off transparent hugepages so we dont get lose wakeups
+ * in neighbouring pages.
+ * TODO: Turn this backon later.
+ */
+ret = madvise((void *)dev_region->mmap_addr,
+  dev_region->size + dev_region->mmap_offset,
+  MADV_NOHUGEPAGE);
+if (ret) {
+/* Note: This can happen legally on kernels that are configured
+ * without madvise'able hugepages
+ */
+fprintf(stderr,
+"%s: Failed to madvise(NOHUGEPAGE) region %d: %s\n",
+__func__, i, strerror(errno));
+}
  struct uffdio_register reg_struct;
  /* Note: We might need to go back to using mmap_addr and
   * len + mmap_offset for * huge pages, but then we do hope not to



--
Best regards,
Alexey Perevalov



[Qemu-devel] [PATCH v9 3/3] migration: add bitmap for received page

2017-08-01 Thread Alexey Perevalov
This patch adds ability to track down already received
pages, it's necessary for calculation vCPU block time in
postcopy migration feature, and for recovery after
postcopy migration failure.

Also it's necessary to solve shared memory issue in
postcopy livemigration. Information about received pages
will be transferred to the software virtual bridge
(e.g. OVS-VSWITCHD), to avoid fallocate (unmap) for
already received pages. fallocate syscall is required for
remmaped shared memory, due to remmaping itself blocks
ioctl(UFFDIO_COPY, ioctl in this case will end with EEXIT
error (struct page is exists after remmap).

Bitmap is placed into RAMBlock as another postcopy/precopy
related bitmaps.

Reviewed-by: Peter Xu <pet...@redhat.com>
Signed-off-by: Peter Xu <pet...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 include/exec/ram_addr.h  | 10 ++
 migration/postcopy-ram.c | 17 -
 migration/ram.c  | 45 +
 migration/ram.h  |  6 ++
 4 files changed, 73 insertions(+), 5 deletions(-)

diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index c04f4f6..bb902bb 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -47,6 +47,8 @@ struct RAMBlock {
  * of the postcopy phase
  */
 unsigned long *unsentmap;
+/* bitmap of already received pages in postcopy */
+unsigned long *receivedmap;
 };
 
 static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset)
@@ -60,6 +62,14 @@ static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t 
offset)
 return (char *)block->host + offset;
 }
 
+static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr,
+RAMBlock *rb)
+{
+uint64_t host_addr_offset =
+(uint64_t)(uintptr_t)(host_addr - (void *)rb->host);
+return host_addr_offset >> TARGET_PAGE_BITS;
+}
+
 long qemu_getrampagesize(void);
 unsigned long last_ram_page(void);
 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index be497bb..7a414eb 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -560,22 +560,28 @@ int postcopy_ram_enable_notify(MigrationIncomingState 
*mis)
 }
 
 static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr,
-void *from_addr, uint64_t pagesize)
+   void *from_addr, uint64_t pagesize, RAMBlock 
*rb)
 {
+int ret;
 if (from_addr) {
 struct uffdio_copy copy_struct;
 copy_struct.dst = (uint64_t)(uintptr_t)host_addr;
 copy_struct.src = (uint64_t)(uintptr_t)from_addr;
 copy_struct.len = pagesize;
 copy_struct.mode = 0;
-return ioctl(userfault_fd, UFFDIO_COPY, _struct);
+ret = ioctl(userfault_fd, UFFDIO_COPY, _struct);
 } else {
 struct uffdio_zeropage zero_struct;
 zero_struct.range.start = (uint64_t)(uintptr_t)host_addr;
 zero_struct.range.len = pagesize;
 zero_struct.mode = 0;
-return ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct);
+ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct);
+}
+if (!ret) {
+ramblock_recv_bitmap_set_range(rb, host_addr,
+   pagesize / qemu_target_page_size());
 }
+return ret;
 }
 
 /*
@@ -592,7 +598,7 @@ int postcopy_place_page(MigrationIncomingState *mis, void 
*host, void *from,
  * which would be slightly cheaper, but we'd have to be careful
  * of the order of updating our page state.
  */
-if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize)) {
+if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize, rb)) {
 int e = errno;
 error_report("%s: %s copy host: %p from: %p (size: %zd)",
  __func__, strerror(e), host, from, pagesize);
@@ -614,7 +620,8 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, 
void *host,
 trace_postcopy_place_page_zero(host);
 
 if (qemu_ram_pagesize(rb) == getpagesize()) {
-if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize())) 
{
+if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize(),
+rb)) {
 int e = errno;
 error_report("%s: %s zero host: %p",
  __func__, strerror(e), host);
diff --git a/migration/ram.c b/migration/ram.c
index 9cc1b17..d14b8bb 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -44,6 +44,7 @@
 #include "qemu/error-report.h"
 #include "trace.h"
 #include "exec/ram_addr.h"
+#include "exec/target_page.h"
 #include "qemu/rcu_queue.h"
 #include "migration/colo.h"
 
@@ -147,6 +148,40 @@ out:
 return ret;
 }
 
+sta

[Qemu-devel] [PATCH v9 0/3] Add bitmap for received pages in postcopy migration

2017-08-01 Thread Alexey Perevalov
This is 9th version of
[PATCH v1 0/2] Add bitmap for copied pages in postcopy migration
cover message from there

This is a separate patch set, it derived from
https://www.mail-archive.com/qemu-devel@nongnu.org/msg456004.html

There are several possible use cases:
1. solve issue with postcopy live migration and shared memory.
OVS-VSWITCH requires information about copied pages, to fallocate
newly allocated pages.
2. calculation vCPU blocktime
for more details see
https://www.mail-archive.com/qemu-devel@nongnu.org/msg456004.html
3. Recovery after fail in the middle of postcopy migration 


V8 -> V9
- patch: "migration: fix incorrect postcopy recved_bitmap" from
"[RFC 00/29] Migration: postcopy failure recovery" patch set was squashed
into the latest patch of this patchset, getpagesize was replaced to
qemu_target_page_size, as David suggested.
- for the sake of API uniformity semantics of all functions were
changed, now RAMBlock *rb is the first argument, as well as in bitmap API.
- Also define TARGET_PAGE_BITS was replaced to qemu_target_page_bits in
all other places of this patchset, for uniformity and maintenance. 

V7 -> V8
- removed unnecessary renaming and moving of block variable to ram_load's
function scope
- ramblock_recv_map_init became static function

V6 -> V7
- rebased on
[PATCH v7 0/5] Create setup/cleanup methods for migration incoming side
- live time of the received map was changed 
(ram_load_setup/ram_load_cleanup) 

V5 -> V6
- call ramblock_recv_map_init from migration_fd_process_incoming (Peter 
suggested)But finalization is still in ram_load_cleanup as Juan suggested.

V4 -> V5
- remove ramblock_recv_bitmap_clear_range in favor to bitmap_clear (comment 
from David)
- single invocation place for ramblock_recv_bitmap_set (comment from Peter)
- minor changes like removing comment from qemu_ufd_copy_ioctl and local 
variable from
ramblock_recv_map_init (comment from Peter)

V3 -> V4
- clear_bit instead of ramblock_recv_bitmap_clear in 
ramblock_recv_bitmap_clear_range,
it reduced number of operation (comment from Juan)
- for postcopy ramblock_recv_bitmap_set is calling after page was copied,
only in case of success (comment from David)
- indentation fixes (comment from Juan)

V2 -> V3
- ramblock_recv_map_init call is placed into migration_incoming_get_current,
looks like it's general place for both precopy and postcopy case.
- received bitmap memory releasing is placed into ram_load_cleanup,
unfortunatelly, it calls only in case of precopy.
- precopy case and discard ram block case
- function renaming, and another minor cleanups

V1 -> V2
- change in terminology s/copied/received/g
- granularity became TARGET_PAGE_SIZE, but not actual page size of the
ramblock
- movecopiedmap & get_copiedmap_size were removed, until patch set where
it will be necessary
- releasing memory of receivedmap was added into ram_load_cleanup
- new patch "migration: introduce qemu_ufd_copy_ioctl helper"

Patchset is based on:
commit 6d60e295ef020759a03b90724d0342012c189ba2
"Merge remote-tracking branch 'remotes/jnsnow/tags/ide-pull-request' into 
staging"

Alexey Perevalov (3):
  migration: postcopy_place_page factoring out
  migration: introduce qemu_ufd_copy_ioctl helper
  migration: add bitmap for received page

 include/exec/ram_addr.h  | 10 +
 migration/postcopy-ram.c | 54 +++-
 migration/postcopy-ram.h |  4 ++--
 migration/ram.c  | 49 +--
 migration/ram.h  |  6 ++
 5 files changed, 100 insertions(+), 23 deletions(-)

-- 
1.9.1




[Qemu-devel] [PATCH v9 1/3] migration: postcopy_place_page factoring out

2017-08-01 Thread Alexey Perevalov
Need to mark copied pages as closer as possible to the place where it
tracks down. That will be necessary in futher patch.

Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Reviewed-by: Peter Xu <pet...@redhat.com>
Reviewed-by: Juan Quintela <quint...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 migration/postcopy-ram.c | 13 +++--
 migration/postcopy-ram.h |  4 ++--
 migration/ram.c  |  4 ++--
 3 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 7e21e6f..996e64d 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -564,9 +564,10 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
  * returns 0 on success
  */
 int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
-size_t pagesize)
+RAMBlock *rb)
 {
 struct uffdio_copy copy_struct;
+size_t pagesize = qemu_ram_pagesize(rb);
 
 copy_struct.dst = (uint64_t)(uintptr_t)host;
 copy_struct.src = (uint64_t)(uintptr_t)from;
@@ -595,11 +596,11 @@ int postcopy_place_page(MigrationIncomingState *mis, void 
*host, void *from,
  * returns 0 on success
  */
 int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
- size_t pagesize)
+ RAMBlock *rb)
 {
 trace_postcopy_place_page_zero(host);
 
-if (pagesize == getpagesize()) {
+if (qemu_ram_pagesize(rb) == getpagesize()) {
 struct uffdio_zeropage zero_struct;
 zero_struct.range.start = (uint64_t)(uintptr_t)host;
 zero_struct.range.len = getpagesize();
@@ -629,7 +630,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, 
void *host,
 memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size);
 }
 return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page,
-   pagesize);
+   rb);
 }
 
 return 0;
@@ -692,14 +693,14 @@ int postcopy_ram_enable_notify(MigrationIncomingState 
*mis)
 }
 
 int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
-size_t pagesize)
+RAMBlock *rb)
 {
 assert(0);
 return -1;
 }
 
 int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
-size_t pagesize)
+RAMBlock *rb)
 {
 assert(0);
 return -1;
diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h
index 52d51e8..78a3591 100644
--- a/migration/postcopy-ram.h
+++ b/migration/postcopy-ram.h
@@ -72,14 +72,14 @@ void postcopy_discard_send_finish(MigrationState *ms,
  * returns 0 on success
  */
 int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
-size_t pagesize);
+RAMBlock *rb);
 
 /*
  * Place a zero page at (host) atomically
  * returns 0 on success
  */
 int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
- size_t pagesize);
+ RAMBlock *rb);
 
 /* The current postcopy state is read/set by postcopy_state_get/set
  * which update it atomically.
diff --git a/migration/ram.c b/migration/ram.c
index 1b08296..9cc1b17 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2470,10 +2470,10 @@ static int ram_load_postcopy(QEMUFile *f)
 
 if (all_zero) {
 ret = postcopy_place_page_zero(mis, place_dest,
-   block->page_size);
+   block);
 } else {
 ret = postcopy_place_page(mis, place_dest,
-  place_source, block->page_size);
+  place_source, block);
 }
 }
 if (!ret) {
-- 
1.9.1




[Qemu-devel] [PATCH v9 2/3] migration: introduce qemu_ufd_copy_ioctl helper

2017-08-01 Thread Alexey Perevalov
Just for placing auxilary operations inside helper,
auxilary operations like: track received pages,
notify about copying operation in futher patches.

Reviewed-by: Juan Quintela <quint...@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Reviewed-by: Peter Xu <pet...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 migration/postcopy-ram.c | 34 +-
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 996e64d..be497bb 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -559,6 +559,25 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
 return 0;
 }
 
+static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr,
+void *from_addr, uint64_t pagesize)
+{
+if (from_addr) {
+struct uffdio_copy copy_struct;
+copy_struct.dst = (uint64_t)(uintptr_t)host_addr;
+copy_struct.src = (uint64_t)(uintptr_t)from_addr;
+copy_struct.len = pagesize;
+copy_struct.mode = 0;
+return ioctl(userfault_fd, UFFDIO_COPY, _struct);
+} else {
+struct uffdio_zeropage zero_struct;
+zero_struct.range.start = (uint64_t)(uintptr_t)host_addr;
+zero_struct.range.len = pagesize;
+zero_struct.mode = 0;
+return ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct);
+}
+}
+
 /*
  * Place a host page (from) at (host) atomically
  * returns 0 on success
@@ -566,20 +585,14 @@ int postcopy_ram_enable_notify(MigrationIncomingState 
*mis)
 int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
 RAMBlock *rb)
 {
-struct uffdio_copy copy_struct;
 size_t pagesize = qemu_ram_pagesize(rb);
 
-copy_struct.dst = (uint64_t)(uintptr_t)host;
-copy_struct.src = (uint64_t)(uintptr_t)from;
-copy_struct.len = pagesize;
-copy_struct.mode = 0;
-
 /* copy also acks to the kernel waking the stalled thread up
  * TODO: We can inhibit that ack and only do it if it was requested
  * which would be slightly cheaper, but we'd have to be careful
  * of the order of updating our page state.
  */
-if (ioctl(mis->userfault_fd, UFFDIO_COPY, _struct)) {
+if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize)) {
 int e = errno;
 error_report("%s: %s copy host: %p from: %p (size: %zd)",
  __func__, strerror(e), host, from, pagesize);
@@ -601,12 +614,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, 
void *host,
 trace_postcopy_place_page_zero(host);
 
 if (qemu_ram_pagesize(rb) == getpagesize()) {
-struct uffdio_zeropage zero_struct;
-zero_struct.range.start = (uint64_t)(uintptr_t)host;
-zero_struct.range.len = getpagesize();
-zero_struct.mode = 0;
-
-if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, _struct)) {
+if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize())) 
{
 int e = errno;
 error_report("%s: %s zero host: %p",
  __func__, strerror(e), host);
-- 
1.9.1




Re: [Qemu-devel] [RFC 01/29] migration: fix incorrect postcopy recved_bitmap

2017-08-01 Thread Alexey Perevalov

On 08/01/2017 09:02 AM, Peter Xu wrote:

On Tue, Aug 01, 2017 at 08:48:18AM +0300, Alexey Perevalov wrote:

On 08/01/2017 05:11 AM, Peter Xu wrote:

On Mon, Jul 31, 2017 at 05:34:14PM +0100, Dr. David Alan Gilbert wrote:

* Peter Xu (pet...@redhat.com) wrote:

The bitmap setup during postcopy is incorrectly when the pgaes are huge
pages. Fix it.

Signed-off-by: Peter Xu <pet...@redhat.com>
---
  migration/postcopy-ram.c | 2 +-
  migration/ram.c  | 8 
  migration/ram.h  | 2 ++
  3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 276ce12..952b73a 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -578,7 +578,7 @@ static int qemu_ufd_copy_ioctl(int userfault_fd, void 
*host_addr,
  ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct);
  }
  if (!ret) {
-ramblock_recv_bitmap_set(host_addr, rb);
+ramblock_recv_bitmap_set_range(rb, host_addr, pagesize / 
getpagesize());

isn't that   pagesize / qemu_target_page_size() ?

Other than that it looks OK.

Yes, I should have fixed this before.

I guess Alexey will handle this change (along with the copied bitmap
series)?  Anyway, I'll fix it as well in my series, until Alexey post
the new version that I can rebase to.  Thanks,


I'll squash it, and I'll resend it today.
Are you agree to add

Signed-off-by: Peter Xu <pet...@redhat.com>

to my patch?

Firstly, if you are squashing the patch, fixing the issue that Dave
has pointed out, please feel free to add my R-b on the patch.

Of course I'll take into account David's suggestion.


I don't know whether it would be suitable to add my S-o-b here - since
most of the patch content is written by you, not me. But I'm totally
fine if you want to include that (btw, thanks for the offer :).

So either one R-b or S-o-b is okay to me.  Thanks,



--
Best regards,
Alexey Perevalov



Re: [Qemu-devel] [RFC 01/29] migration: fix incorrect postcopy recved_bitmap

2017-07-31 Thread Alexey Perevalov

On 08/01/2017 05:11 AM, Peter Xu wrote:

On Mon, Jul 31, 2017 at 05:34:14PM +0100, Dr. David Alan Gilbert wrote:

* Peter Xu (pet...@redhat.com) wrote:

The bitmap setup during postcopy is incorrectly when the pgaes are huge
pages. Fix it.

Signed-off-by: Peter Xu <pet...@redhat.com>
---
  migration/postcopy-ram.c | 2 +-
  migration/ram.c  | 8 
  migration/ram.h  | 2 ++
  3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 276ce12..952b73a 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -578,7 +578,7 @@ static int qemu_ufd_copy_ioctl(int userfault_fd, void 
*host_addr,
  ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct);
  }
  if (!ret) {
-ramblock_recv_bitmap_set(host_addr, rb);
+ramblock_recv_bitmap_set_range(rb, host_addr, pagesize / 
getpagesize());

isn't that   pagesize / qemu_target_page_size() ?

Other than that it looks OK.

Yes, I should have fixed this before.

I guess Alexey will handle this change (along with the copied bitmap
series)?  Anyway, I'll fix it as well in my series, until Alexey post
the new version that I can rebase to.  Thanks,


I'll squash it, and I'll resend it today.
Are you agree to add

Signed-off-by: Peter Xu <pet...@redhat.com>

to my patch?


--
Best regards,
Alexey Perevalov



Re: [Qemu-devel] [PATCH v8 3/3] migration: add bitmap for received page

2017-07-28 Thread Alexey Perevalov

On 07/28/2017 10:06 AM, Alexey Perevalov wrote:

On 07/28/2017 09:57 AM, Peter Xu wrote:

On Fri, Jul 28, 2017 at 09:43:28AM +0300, Alexey Perevalov wrote:

On 07/28/2017 07:27 AM, Peter Xu wrote:

On Thu, Jul 27, 2017 at 10:27:41AM +0300, Alexey Perevalov wrote:

On 07/27/2017 05:35 AM, Peter Xu wrote:

On Wed, Jul 26, 2017 at 06:24:11PM +0300, Alexey Perevalov wrote:

On 07/26/2017 11:43 AM, Peter Xu wrote:

On Wed, Jul 26, 2017 at 11:07:17AM +0300, Alexey Perevalov wrote:

On 07/26/2017 04:49 AM, Peter Xu wrote:
On Thu, Jul 20, 2017 at 09:52:34AM +0300, Alexey Perevalov 
wrote:

This patch adds ability to track down already received
pages, it's necessary for calculation vCPU block time in
postcopy migration feature, maybe for restore after
postcopy migration failure.
Also it's necessary to solve shared memory issue in
postcopy livemigration. Information about received pages
will be transferred to the software virtual bridge
(e.g. OVS-VSWITCHD), to avoid fallocate (unmap) for
already received pages. fallocate syscall is required for
remmaped shared memory, due to remmaping itself blocks
ioctl(UFFDIO_COPY, ioctl in this case will end with EEXIT
error (struct page is exists after remmap).

Bitmap is placed into RAMBlock as another postcopy/precopy
related bitmaps.

Reviewed-by: Peter Xu <pet...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---

[...]

  static int qemu_ufd_copy_ioctl(int userfault_fd, void 
*host_addr,

-void *from_addr, uint64_t pagesize)
+   void *from_addr, uint64_t 
pagesize, RAMBlock *rb)

  {
+int ret;
  if (from_addr) {
  struct uffdio_copy copy_struct;
  copy_struct.dst = (uint64_t)(uintptr_t)host_addr;
  copy_struct.src = (uint64_t)(uintptr_t)from_addr;
  copy_struct.len = pagesize;
  copy_struct.mode = 0;
-return ioctl(userfault_fd, UFFDIO_COPY, _struct);
+ret = ioctl(userfault_fd, UFFDIO_COPY, _struct);
  } else {
  struct uffdio_zeropage zero_struct;
  zero_struct.range.start = 
(uint64_t)(uintptr_t)host_addr;

  zero_struct.range.len = pagesize;
  zero_struct.mode = 0;
-return ioctl(userfault_fd, UFFDIO_ZEROPAGE, 
_struct);
+ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, 
_struct);

+}
+if (!ret) {
+ramblock_recv_bitmap_set(host_addr, rb);

Wait...

Now we are using 4k-page/bit bitmap, do we need to take care 
of the
huge pages here?  Looks like we are only setting the first 
bit of it

if it is a huge page?
First version was per ramblock page size, IOW bitmap was 
smaller in

case of hugepages.

Yes, but this is not the first version any more. :)

This patch is using:

   bitmap_new(rb->max_length >> TARGET_PAGE_BITS);

to allocate bitmap, so it is using small pages always for bitmap,
right? (I should not really say "4k" pages, here I think the 
size is

host page size, which is the thing returned from getpagesize()).

You mentioned that TARGET_PAGE_SIZE is reasonable for precopy 
case,
in "Re: [Qemu-devel] [PATCH v1 2/2] migration: add bitmap for 
copied page"
I though TARGET_PAGE_SIZE as transmition unit, is using in 
precopy even

hugepage case.
But it's not so logically, page being marked as dirty, should 
be sent as a

whole page.

Sorry if I misunderstood, but I didn't see anything wrong - we are
sending pages in small pages, but when postcopy is there, we do
UFFDIO_COPY in huge page, so everything is fine?

I think yes, we chose TARGET_PAGE_SIZE because of wider
use case ranges.
So... are you going to post another version? IIUC we just need to 
use

a bitmap_set() to replace the ramblock_recv_bitmap_set(), while set
the size with "pagesize / TARGET_PAGE_SIZE"?
 From my point of view TARGET_PAGE_SIZE/TARGET_PAGE_BITS it's a 
platform

specific

and it used in ram_load to copy to buffer so it's more preferred 
for bitmap size
and I'm not going to replace ramblock_recv_bitmap_set helper - it 
calculates offset.



(I think I was wrong when saying getpagesize() above: the small page
  should be target page size, while the huge page should be the 
host's)

I think we should forget about huge page case in "received bitmap"
concept, maybe in "uffd_copied bitmap" it was reasonable ;)

Again, I am not sure I got the whole idea of the reply...

However, I do think when we UFFDIO_COPY a huge page, then we should do
bitmap_set() on the received bitmap for the whole range that the huge
page covers.

for what purpose?

We chose to use small-paged bitmap since in precopy we need to have
such a granularity (in precopy, we can copy a small page even that
small page is on a host huge page).

Since we decided to use the small-paged bitmap, we need to make sure
it follows how it was defined: one bit defines whether the
corresponding small page is received. IMHO not following that is hacky
and error-prone.


IMHO, the

Re: [Qemu-devel] [PATCH v8 3/3] migration: add bitmap for received page

2017-07-28 Thread Alexey Perevalov

On 07/28/2017 09:57 AM, Peter Xu wrote:

On Fri, Jul 28, 2017 at 09:43:28AM +0300, Alexey Perevalov wrote:

On 07/28/2017 07:27 AM, Peter Xu wrote:

On Thu, Jul 27, 2017 at 10:27:41AM +0300, Alexey Perevalov wrote:

On 07/27/2017 05:35 AM, Peter Xu wrote:

On Wed, Jul 26, 2017 at 06:24:11PM +0300, Alexey Perevalov wrote:

On 07/26/2017 11:43 AM, Peter Xu wrote:

On Wed, Jul 26, 2017 at 11:07:17AM +0300, Alexey Perevalov wrote:

On 07/26/2017 04:49 AM, Peter Xu wrote:

On Thu, Jul 20, 2017 at 09:52:34AM +0300, Alexey Perevalov wrote:

This patch adds ability to track down already received
pages, it's necessary for calculation vCPU block time in
postcopy migration feature, maybe for restore after
postcopy migration failure.
Also it's necessary to solve shared memory issue in
postcopy livemigration. Information about received pages
will be transferred to the software virtual bridge
(e.g. OVS-VSWITCHD), to avoid fallocate (unmap) for
already received pages. fallocate syscall is required for
remmaped shared memory, due to remmaping itself blocks
ioctl(UFFDIO_COPY, ioctl in this case will end with EEXIT
error (struct page is exists after remmap).

Bitmap is placed into RAMBlock as another postcopy/precopy
related bitmaps.

Reviewed-by: Peter Xu <pet...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---

[...]


  static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr,
-void *from_addr, uint64_t pagesize)
+   void *from_addr, uint64_t pagesize, RAMBlock 
*rb)
  {
+int ret;
  if (from_addr) {
  struct uffdio_copy copy_struct;
  copy_struct.dst = (uint64_t)(uintptr_t)host_addr;
  copy_struct.src = (uint64_t)(uintptr_t)from_addr;
  copy_struct.len = pagesize;
  copy_struct.mode = 0;
-return ioctl(userfault_fd, UFFDIO_COPY, _struct);
+ret = ioctl(userfault_fd, UFFDIO_COPY, _struct);
  } else {
  struct uffdio_zeropage zero_struct;
  zero_struct.range.start = (uint64_t)(uintptr_t)host_addr;
  zero_struct.range.len = pagesize;
  zero_struct.mode = 0;
-return ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct);
+ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct);
+}
+if (!ret) {
+ramblock_recv_bitmap_set(host_addr, rb);

Wait...

Now we are using 4k-page/bit bitmap, do we need to take care of the
huge pages here?  Looks like we are only setting the first bit of it
if it is a huge page?

First version was per ramblock page size, IOW bitmap was smaller in
case of hugepages.

Yes, but this is not the first version any more. :)

This patch is using:

   bitmap_new(rb->max_length >> TARGET_PAGE_BITS);

to allocate bitmap, so it is using small pages always for bitmap,
right? (I should not really say "4k" pages, here I think the size is
host page size, which is the thing returned from getpagesize()).


You mentioned that TARGET_PAGE_SIZE is reasonable for precopy case,
in "Re: [Qemu-devel] [PATCH v1 2/2] migration: add bitmap for copied page"
I though TARGET_PAGE_SIZE as transmition unit, is using in precopy even
hugepage case.
But it's not so logically, page being marked as dirty, should be sent as a
whole page.

Sorry if I misunderstood, but I didn't see anything wrong - we are
sending pages in small pages, but when postcopy is there, we do
UFFDIO_COPY in huge page, so everything is fine?

I think yes, we chose TARGET_PAGE_SIZE because of wider
use case ranges.

So... are you going to post another version? IIUC we just need to use
a bitmap_set() to replace the ramblock_recv_bitmap_set(), while set
the size with "pagesize / TARGET_PAGE_SIZE"?

 From my point of view TARGET_PAGE_SIZE/TARGET_PAGE_BITS it's a platform
specific

and it used in ram_load to copy to buffer so it's more preferred for bitmap size
and I'm not going to replace ramblock_recv_bitmap_set helper - it calculates 
offset.


(I think I was wrong when saying getpagesize() above: the small page
  should be target page size, while the huge page should be the host's)

I think we should forget about huge page case in "received bitmap"
concept, maybe in "uffd_copied bitmap" it was reasonable ;)

Again, I am not sure I got the whole idea of the reply...

However, I do think when we UFFDIO_COPY a huge page, then we should do
bitmap_set() on the received bitmap for the whole range that the huge
page covers.

for what purpose?

We chose to use small-paged bitmap since in precopy we need to have
such a granularity (in precopy, we can copy a small page even that
small page is on a host huge page).

Since we decided to use the small-paged bitmap, we need to make sure
it follows how it was defined: one bit defines whether the
corresponding small page is received. IMHO not following that is hacky
and error-prone.


IMHO, the bitmap is defined as "one bit per small page", and the small
page size is T

Re: [Qemu-devel] [PATCH v8 3/3] migration: add bitmap for received page

2017-07-28 Thread Alexey Perevalov

On 07/28/2017 07:27 AM, Peter Xu wrote:

On Thu, Jul 27, 2017 at 10:27:41AM +0300, Alexey Perevalov wrote:

On 07/27/2017 05:35 AM, Peter Xu wrote:

On Wed, Jul 26, 2017 at 06:24:11PM +0300, Alexey Perevalov wrote:

On 07/26/2017 11:43 AM, Peter Xu wrote:

On Wed, Jul 26, 2017 at 11:07:17AM +0300, Alexey Perevalov wrote:

On 07/26/2017 04:49 AM, Peter Xu wrote:

On Thu, Jul 20, 2017 at 09:52:34AM +0300, Alexey Perevalov wrote:

This patch adds ability to track down already received
pages, it's necessary for calculation vCPU block time in
postcopy migration feature, maybe for restore after
postcopy migration failure.
Also it's necessary to solve shared memory issue in
postcopy livemigration. Information about received pages
will be transferred to the software virtual bridge
(e.g. OVS-VSWITCHD), to avoid fallocate (unmap) for
already received pages. fallocate syscall is required for
remmaped shared memory, due to remmaping itself blocks
ioctl(UFFDIO_COPY, ioctl in this case will end with EEXIT
error (struct page is exists after remmap).

Bitmap is placed into RAMBlock as another postcopy/precopy
related bitmaps.

Reviewed-by: Peter Xu <pet...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---

[...]


  static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr,
-void *from_addr, uint64_t pagesize)
+   void *from_addr, uint64_t pagesize, RAMBlock 
*rb)
  {
+int ret;
  if (from_addr) {
  struct uffdio_copy copy_struct;
  copy_struct.dst = (uint64_t)(uintptr_t)host_addr;
  copy_struct.src = (uint64_t)(uintptr_t)from_addr;
  copy_struct.len = pagesize;
  copy_struct.mode = 0;
-return ioctl(userfault_fd, UFFDIO_COPY, _struct);
+ret = ioctl(userfault_fd, UFFDIO_COPY, _struct);
  } else {
  struct uffdio_zeropage zero_struct;
  zero_struct.range.start = (uint64_t)(uintptr_t)host_addr;
  zero_struct.range.len = pagesize;
  zero_struct.mode = 0;
-return ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct);
+ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct);
+}
+if (!ret) {
+ramblock_recv_bitmap_set(host_addr, rb);

Wait...

Now we are using 4k-page/bit bitmap, do we need to take care of the
huge pages here?  Looks like we are only setting the first bit of it
if it is a huge page?

First version was per ramblock page size, IOW bitmap was smaller in
case of hugepages.

Yes, but this is not the first version any more. :)

This patch is using:

   bitmap_new(rb->max_length >> TARGET_PAGE_BITS);

to allocate bitmap, so it is using small pages always for bitmap,
right? (I should not really say "4k" pages, here I think the size is
host page size, which is the thing returned from getpagesize()).


You mentioned that TARGET_PAGE_SIZE is reasonable for precopy case,
in "Re: [Qemu-devel] [PATCH v1 2/2] migration: add bitmap for copied page"
I though TARGET_PAGE_SIZE as transmition unit, is using in precopy even
hugepage case.
But it's not so logically, page being marked as dirty, should be sent as a
whole page.

Sorry if I misunderstood, but I didn't see anything wrong - we are
sending pages in small pages, but when postcopy is there, we do
UFFDIO_COPY in huge page, so everything is fine?

I think yes, we chose TARGET_PAGE_SIZE because of wider
use case ranges.

So... are you going to post another version? IIUC we just need to use
a bitmap_set() to replace the ramblock_recv_bitmap_set(), while set
the size with "pagesize / TARGET_PAGE_SIZE"?

 From my point of view TARGET_PAGE_SIZE/TARGET_PAGE_BITS it's a platform
specific

and it used in ram_load to copy to buffer so it's more preferred for bitmap size
and I'm not going to replace ramblock_recv_bitmap_set helper - it calculates 
offset.


(I think I was wrong when saying getpagesize() above: the small page
  should be target page size, while the huge page should be the host's)

I think we should forget about huge page case in "received bitmap"
concept, maybe in "uffd_copied bitmap" it was reasonable ;)

Again, I am not sure I got the whole idea of the reply...

However, I do think when we UFFDIO_COPY a huge page, then we should do
bitmap_set() on the received bitmap for the whole range that the huge
page covers.

for what purpose?



IMHO, the bitmap is defined as "one bit per small page", and the small
page size is TARGET_PAGE_SIZE. We cannot just assume that "as long as
the first bit of the huge page is set, all the small pages in the huge
page are set".

At the moment of copying all small pages of the huge page,
should be received. Yes it's assumption, but I couldn't predict
side effect, maybe it will be necessary in postcopy failure handling,
while copying pages back, but I'm not sure right now.
To know that, need to start implementing it, or at least to deep 
investigation.

Thanks,



--
Best regards,
Alexey Perevalov



Re: [Qemu-devel] [PATCH v8 3/3] migration: add bitmap for received page

2017-07-27 Thread Alexey Perevalov

On 07/27/2017 05:35 AM, Peter Xu wrote:

On Wed, Jul 26, 2017 at 06:24:11PM +0300, Alexey Perevalov wrote:

On 07/26/2017 11:43 AM, Peter Xu wrote:

On Wed, Jul 26, 2017 at 11:07:17AM +0300, Alexey Perevalov wrote:

On 07/26/2017 04:49 AM, Peter Xu wrote:

On Thu, Jul 20, 2017 at 09:52:34AM +0300, Alexey Perevalov wrote:

This patch adds ability to track down already received
pages, it's necessary for calculation vCPU block time in
postcopy migration feature, maybe for restore after
postcopy migration failure.
Also it's necessary to solve shared memory issue in
postcopy livemigration. Information about received pages
will be transferred to the software virtual bridge
(e.g. OVS-VSWITCHD), to avoid fallocate (unmap) for
already received pages. fallocate syscall is required for
remmaped shared memory, due to remmaping itself blocks
ioctl(UFFDIO_COPY, ioctl in this case will end with EEXIT
error (struct page is exists after remmap).

Bitmap is placed into RAMBlock as another postcopy/precopy
related bitmaps.

Reviewed-by: Peter Xu <pet...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---

[...]


  static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr,
-void *from_addr, uint64_t pagesize)
+   void *from_addr, uint64_t pagesize, RAMBlock 
*rb)
  {
+int ret;
  if (from_addr) {
  struct uffdio_copy copy_struct;
  copy_struct.dst = (uint64_t)(uintptr_t)host_addr;
  copy_struct.src = (uint64_t)(uintptr_t)from_addr;
  copy_struct.len = pagesize;
  copy_struct.mode = 0;
-return ioctl(userfault_fd, UFFDIO_COPY, _struct);
+ret = ioctl(userfault_fd, UFFDIO_COPY, _struct);
  } else {
  struct uffdio_zeropage zero_struct;
  zero_struct.range.start = (uint64_t)(uintptr_t)host_addr;
  zero_struct.range.len = pagesize;
  zero_struct.mode = 0;
-return ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct);
+ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct);
+}
+if (!ret) {
+ramblock_recv_bitmap_set(host_addr, rb);

Wait...

Now we are using 4k-page/bit bitmap, do we need to take care of the
huge pages here?  Looks like we are only setting the first bit of it
if it is a huge page?

First version was per ramblock page size, IOW bitmap was smaller in
case of hugepages.

Yes, but this is not the first version any more. :)

This patch is using:

   bitmap_new(rb->max_length >> TARGET_PAGE_BITS);

to allocate bitmap, so it is using small pages always for bitmap,
right? (I should not really say "4k" pages, here I think the size is
host page size, which is the thing returned from getpagesize()).


You mentioned that TARGET_PAGE_SIZE is reasonable for precopy case,
in "Re: [Qemu-devel] [PATCH v1 2/2] migration: add bitmap for copied page"
I though TARGET_PAGE_SIZE as transmition unit, is using in precopy even
hugepage case.
But it's not so logically, page being marked as dirty, should be sent as a
whole page.

Sorry if I misunderstood, but I didn't see anything wrong - we are
sending pages in small pages, but when postcopy is there, we do
UFFDIO_COPY in huge page, so everything is fine?

I think yes, we chose TARGET_PAGE_SIZE because of wider
use case ranges.

So... are you going to post another version? IIUC we just need to use
a bitmap_set() to replace the ramblock_recv_bitmap_set(), while set
the size with "pagesize / TARGET_PAGE_SIZE"?
From my point of view TARGET_PAGE_SIZE/TARGET_PAGE_BITS it's a platform 
specific


and it used in ram_load to copy to buffer so it's more preferred for bitmap size
and I'm not going to replace ramblock_recv_bitmap_set helper - it calculates 
offset.



(I think I was wrong when saying getpagesize() above: the small page
  should be target page size, while the huge page should be the host's)

I think we should forget about huge page case in "received bitmap"
concept, maybe in "uffd_copied bitmap" it was reasonable ;)





--
Best regards,
Alexey Perevalov



Re: [Qemu-devel] [PATCH v8 3/3] migration: add bitmap for received page

2017-07-26 Thread Alexey Perevalov

On 07/26/2017 11:43 AM, Peter Xu wrote:

On Wed, Jul 26, 2017 at 11:07:17AM +0300, Alexey Perevalov wrote:

On 07/26/2017 04:49 AM, Peter Xu wrote:

On Thu, Jul 20, 2017 at 09:52:34AM +0300, Alexey Perevalov wrote:

This patch adds ability to track down already received
pages, it's necessary for calculation vCPU block time in
postcopy migration feature, maybe for restore after
postcopy migration failure.
Also it's necessary to solve shared memory issue in
postcopy livemigration. Information about received pages
will be transferred to the software virtual bridge
(e.g. OVS-VSWITCHD), to avoid fallocate (unmap) for
already received pages. fallocate syscall is required for
remmaped shared memory, due to remmaping itself blocks
ioctl(UFFDIO_COPY, ioctl in this case will end with EEXIT
error (struct page is exists after remmap).

Bitmap is placed into RAMBlock as another postcopy/precopy
related bitmaps.

Reviewed-by: Peter Xu <pet...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---

[...]


  static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr,
-void *from_addr, uint64_t pagesize)
+   void *from_addr, uint64_t pagesize, RAMBlock 
*rb)
  {
+int ret;
  if (from_addr) {
  struct uffdio_copy copy_struct;
  copy_struct.dst = (uint64_t)(uintptr_t)host_addr;
  copy_struct.src = (uint64_t)(uintptr_t)from_addr;
  copy_struct.len = pagesize;
  copy_struct.mode = 0;
-return ioctl(userfault_fd, UFFDIO_COPY, _struct);
+ret = ioctl(userfault_fd, UFFDIO_COPY, _struct);
  } else {
  struct uffdio_zeropage zero_struct;
  zero_struct.range.start = (uint64_t)(uintptr_t)host_addr;
  zero_struct.range.len = pagesize;
  zero_struct.mode = 0;
-return ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct);
+ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct);
+}
+if (!ret) {
+ramblock_recv_bitmap_set(host_addr, rb);

Wait...

Now we are using 4k-page/bit bitmap, do we need to take care of the
huge pages here?  Looks like we are only setting the first bit of it
if it is a huge page?

First version was per ramblock page size, IOW bitmap was smaller in
case of hugepages.

Yes, but this is not the first version any more. :)

This patch is using:

   bitmap_new(rb->max_length >> TARGET_PAGE_BITS);

to allocate bitmap, so it is using small pages always for bitmap,
right? (I should not really say "4k" pages, here I think the size is
host page size, which is the thing returned from getpagesize()).



You mentioned that TARGET_PAGE_SIZE is reasonable for precopy case,
in "Re: [Qemu-devel] [PATCH v1 2/2] migration: add bitmap for copied page"
I though TARGET_PAGE_SIZE as transmition unit, is using in precopy even
hugepage case.
But it's not so logically, page being marked as dirty, should be sent as a
whole page.

Sorry if I misunderstood, but I didn't see anything wrong - we are
sending pages in small pages, but when postcopy is there, we do
UFFDIO_COPY in huge page, so everything is fine?

I think yes, we chose TARGET_PAGE_SIZE because of wider
use case ranges.


--
Best regards,
Alexey Perevalov



Re: [Qemu-devel] [PATCH v8 3/3] migration: add bitmap for received page

2017-07-26 Thread Alexey Perevalov

On 07/26/2017 04:49 AM, Peter Xu wrote:

On Thu, Jul 20, 2017 at 09:52:34AM +0300, Alexey Perevalov wrote:

This patch adds ability to track down already received
pages, it's necessary for calculation vCPU block time in
postcopy migration feature, maybe for restore after
postcopy migration failure.
Also it's necessary to solve shared memory issue in
postcopy livemigration. Information about received pages
will be transferred to the software virtual bridge
(e.g. OVS-VSWITCHD), to avoid fallocate (unmap) for
already received pages. fallocate syscall is required for
remmaped shared memory, due to remmaping itself blocks
ioctl(UFFDIO_COPY, ioctl in this case will end with EEXIT
error (struct page is exists after remmap).

Bitmap is placed into RAMBlock as another postcopy/precopy
related bitmaps.

Reviewed-by: Peter Xu <pet...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---

[...]


  static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr,
-void *from_addr, uint64_t pagesize)
+   void *from_addr, uint64_t pagesize, RAMBlock 
*rb)
  {
+int ret;
  if (from_addr) {
  struct uffdio_copy copy_struct;
  copy_struct.dst = (uint64_t)(uintptr_t)host_addr;
  copy_struct.src = (uint64_t)(uintptr_t)from_addr;
  copy_struct.len = pagesize;
  copy_struct.mode = 0;
-return ioctl(userfault_fd, UFFDIO_COPY, _struct);
+ret = ioctl(userfault_fd, UFFDIO_COPY, _struct);
  } else {
  struct uffdio_zeropage zero_struct;
  zero_struct.range.start = (uint64_t)(uintptr_t)host_addr;
  zero_struct.range.len = pagesize;
  zero_struct.mode = 0;
-return ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct);
+ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct);
+}
+if (!ret) {
+ramblock_recv_bitmap_set(host_addr, rb);

Wait...

Now we are using 4k-page/bit bitmap, do we need to take care of the
huge pages here?  Looks like we are only setting the first bit of it
if it is a huge page?

First version was per ramblock page size, IOW bitmap was smaller in
case of hugepages.


You mentioned that TARGET_PAGE_SIZE is reasonable for precopy case,
in "Re: [Qemu-devel] [PATCH v1 2/2] migration: add bitmap for copied page"
I though TARGET_PAGE_SIZE as transmition unit, is using in precopy even 
hugepage case.
But it's not so logically, page being marked as dirty, should be sent as 
a whole page.






--
Best regards,
Alexey Perevalov



[Qemu-devel] [PATCH v8 3/3] migration: add bitmap for received page

2017-07-20 Thread Alexey Perevalov
This patch adds ability to track down already received
pages, it's necessary for calculation vCPU block time in
postcopy migration feature, maybe for restore after
postcopy migration failure.
Also it's necessary to solve shared memory issue in
postcopy livemigration. Information about received pages
will be transferred to the software virtual bridge
(e.g. OVS-VSWITCHD), to avoid fallocate (unmap) for
already received pages. fallocate syscall is required for
remmaped shared memory, due to remmaping itself blocks
ioctl(UFFDIO_COPY, ioctl in this case will end with EEXIT
error (struct page is exists after remmap).

Bitmap is placed into RAMBlock as another postcopy/precopy
related bitmaps.

Reviewed-by: Peter Xu <pet...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 include/exec/ram_addr.h  | 10 ++
 migration/postcopy-ram.c | 16 +++-
 migration/ram.c  | 36 
 migration/ram.h  |  5 +
 4 files changed, 62 insertions(+), 5 deletions(-)

diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index c04f4f6..bb902bb 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -47,6 +47,8 @@ struct RAMBlock {
  * of the postcopy phase
  */
 unsigned long *unsentmap;
+/* bitmap of already received pages in postcopy */
+unsigned long *receivedmap;
 };
 
 static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset)
@@ -60,6 +62,14 @@ static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t 
offset)
 return (char *)block->host + offset;
 }
 
+static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr,
+RAMBlock *rb)
+{
+uint64_t host_addr_offset =
+(uint64_t)(uintptr_t)(host_addr - (void *)rb->host);
+return host_addr_offset >> TARGET_PAGE_BITS;
+}
+
 long qemu_getrampagesize(void);
 unsigned long last_ram_page(void);
 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index be497bb..276ce12 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -560,22 +560,27 @@ int postcopy_ram_enable_notify(MigrationIncomingState 
*mis)
 }
 
 static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr,
-void *from_addr, uint64_t pagesize)
+   void *from_addr, uint64_t pagesize, RAMBlock 
*rb)
 {
+int ret;
 if (from_addr) {
 struct uffdio_copy copy_struct;
 copy_struct.dst = (uint64_t)(uintptr_t)host_addr;
 copy_struct.src = (uint64_t)(uintptr_t)from_addr;
 copy_struct.len = pagesize;
 copy_struct.mode = 0;
-return ioctl(userfault_fd, UFFDIO_COPY, _struct);
+ret = ioctl(userfault_fd, UFFDIO_COPY, _struct);
 } else {
 struct uffdio_zeropage zero_struct;
 zero_struct.range.start = (uint64_t)(uintptr_t)host_addr;
 zero_struct.range.len = pagesize;
 zero_struct.mode = 0;
-return ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct);
+ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct);
+}
+if (!ret) {
+ramblock_recv_bitmap_set(host_addr, rb);
 }
+return ret;
 }
 
 /*
@@ -592,7 +597,7 @@ int postcopy_place_page(MigrationIncomingState *mis, void 
*host, void *from,
  * which would be slightly cheaper, but we'd have to be careful
  * of the order of updating our page state.
  */
-if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize)) {
+if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize, rb)) {
 int e = errno;
 error_report("%s: %s copy host: %p from: %p (size: %zd)",
  __func__, strerror(e), host, from, pagesize);
@@ -614,7 +619,8 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, 
void *host,
 trace_postcopy_place_page_zero(host);
 
 if (qemu_ram_pagesize(rb) == getpagesize()) {
-if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize())) 
{
+if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize(),
+rb)) {
 int e = errno;
 error_report("%s: %s zero host: %p",
  __func__, strerror(e), host);
diff --git a/migration/ram.c b/migration/ram.c
index 9cc1b17..107ee9d 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -147,6 +147,32 @@ out:
 return ret;
 }
 
+static void ramblock_recv_map_init(void)
+{
+RAMBlock *rb;
+
+RAMBLOCK_FOREACH(rb) {
+assert(!rb->receivedmap);
+rb->receivedmap = bitmap_new(rb->max_length >> TARGET_PAGE_BITS);
+}
+}
+
+int ramblock_recv_bitmap_test(void *host_addr, RAMBlock *rb)
+{
+return test_bit(ramblock_recv_bitmap_offset(host_addr, rb),
+rb->recei

[Qemu-devel] [PATCH v8 0/3] Add bitmap for received pages in postcopy migration

2017-07-20 Thread Alexey Perevalov
This is 8th version of
[PATCH v1 0/2] Add bitmap for copied pages in postcopy migration
cover message from there

This is a separate patch set, it derived from
https://www.mail-archive.com/qemu-devel@nongnu.org/msg456004.html

There are several possible use cases:
1. solve issue with postcopy live migration and shared memory.
OVS-VSWITCH requires information about copied pages, to fallocate
newly allocated pages.
2. calculation vCPU blocktime
for more details see
https://www.mail-archive.com/qemu-devel@nongnu.org/msg456004.html
3. Recovery after fail in the middle of postcopy migration 


V7 -> V8
- removed unnecessary renaming and moving of block variable to ram_load's
function scope
- ramblock_recv_map_init became static function

V6 -> V7
- rebased on
[PATCH v7 0/5] Create setup/cleanup methods for migration incoming side
- live time of the received map was changed 
(ram_load_setup/ram_load_cleanup) 

V5 -> V6
- call ramblock_recv_map_init from migration_fd_process_incoming (Peter 
suggested)But finalization is still in ram_load_cleanup as Juan suggested.

V4 -> V5
- remove ramblock_recv_bitmap_clear_range in favor to bitmap_clear (comment 
from David)
- single invocation place for ramblock_recv_bitmap_set (comment from Peter)
- minor changes like removing comment from qemu_ufd_copy_ioctl and local 
variable from
ramblock_recv_map_init (comment from Peter)

V3 -> V4
- clear_bit instead of ramblock_recv_bitmap_clear in 
ramblock_recv_bitmap_clear_range,
it reduced number of operation (comment from Juan)
- for postcopy ramblock_recv_bitmap_set is calling after page was copied,
only in case of success (comment from David)
- indentation fixes (comment from Juan)

V2 -> V3
- ramblock_recv_map_init call is placed into migration_incoming_get_current,
looks like it's general place for both precopy and postcopy case.
- received bitmap memory releasing is placed into ram_load_cleanup,
unfortunatelly, it calls only in case of precopy.
- precopy case and discard ram block case
- function renaming, and another minor cleanups

V1 -> V2
- change in terminology s/copied/received/g
- granularity became TARGET_PAGE_SIZE, but not actual page size of the
ramblock
- movecopiedmap & get_copiedmap_size were removed, until patch set where
it will be necessary
- releasing memory of receivedmap was added into ram_load_cleanup
- new patch "migration: introduce qemu_ufd_copy_ioctl helper"

Patchset is based on:
commit 6d60e295ef020759a03b90724d0342012c189ba2
"Merge remote-tracking branch 'remotes/jnsnow/tags/ide-pull-request' into 
staging"

Alexey Perevalov (3):
  migration: postcopy_place_page factoring out
  migration: introduce qemu_ufd_copy_ioctl helper
  migration: add bitmap for received page

 include/exec/ram_addr.h  | 10 +
 migration/postcopy-ram.c | 53 +++-
 migration/postcopy-ram.h |  4 ++--
 migration/ram.c  | 40 ++--
 migration/ram.h  |  5 +
 5 files changed, 89 insertions(+), 23 deletions(-)

-- 
1.9.1




[Qemu-devel] [PATCH v8 2/3] migration: introduce qemu_ufd_copy_ioctl helper

2017-07-20 Thread Alexey Perevalov
Just for placing auxilary operations inside helper,
auxilary operations like: track received pages,
notify about copying operation in futher patches.

Reviewed-by: Juan Quintela <quint...@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Reviewed-by: Peter Xu <pet...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 migration/postcopy-ram.c | 34 +-
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 996e64d..be497bb 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -559,6 +559,25 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
 return 0;
 }
 
+static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr,
+void *from_addr, uint64_t pagesize)
+{
+if (from_addr) {
+struct uffdio_copy copy_struct;
+copy_struct.dst = (uint64_t)(uintptr_t)host_addr;
+copy_struct.src = (uint64_t)(uintptr_t)from_addr;
+copy_struct.len = pagesize;
+copy_struct.mode = 0;
+return ioctl(userfault_fd, UFFDIO_COPY, _struct);
+} else {
+struct uffdio_zeropage zero_struct;
+zero_struct.range.start = (uint64_t)(uintptr_t)host_addr;
+zero_struct.range.len = pagesize;
+zero_struct.mode = 0;
+return ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct);
+}
+}
+
 /*
  * Place a host page (from) at (host) atomically
  * returns 0 on success
@@ -566,20 +585,14 @@ int postcopy_ram_enable_notify(MigrationIncomingState 
*mis)
 int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
 RAMBlock *rb)
 {
-struct uffdio_copy copy_struct;
 size_t pagesize = qemu_ram_pagesize(rb);
 
-copy_struct.dst = (uint64_t)(uintptr_t)host;
-copy_struct.src = (uint64_t)(uintptr_t)from;
-copy_struct.len = pagesize;
-copy_struct.mode = 0;
-
 /* copy also acks to the kernel waking the stalled thread up
  * TODO: We can inhibit that ack and only do it if it was requested
  * which would be slightly cheaper, but we'd have to be careful
  * of the order of updating our page state.
  */
-if (ioctl(mis->userfault_fd, UFFDIO_COPY, _struct)) {
+if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize)) {
 int e = errno;
 error_report("%s: %s copy host: %p from: %p (size: %zd)",
  __func__, strerror(e), host, from, pagesize);
@@ -601,12 +614,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, 
void *host,
 trace_postcopy_place_page_zero(host);
 
 if (qemu_ram_pagesize(rb) == getpagesize()) {
-struct uffdio_zeropage zero_struct;
-zero_struct.range.start = (uint64_t)(uintptr_t)host;
-zero_struct.range.len = getpagesize();
-zero_struct.mode = 0;
-
-if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, _struct)) {
+if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize())) 
{
 int e = errno;
 error_report("%s: %s zero host: %p",
  __func__, strerror(e), host);
-- 
1.9.1




[Qemu-devel] [PATCH v8 1/3] migration: postcopy_place_page factoring out

2017-07-20 Thread Alexey Perevalov
Need to mark copied pages as closer as possible to the place where it
tracks down. That will be necessary in futher patch.

Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Reviewed-by: Peter Xu <pet...@redhat.com>
Reviewed-by: Juan Quintela <quint...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 migration/postcopy-ram.c | 13 +++--
 migration/postcopy-ram.h |  4 ++--
 migration/ram.c  |  4 ++--
 3 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 7e21e6f..996e64d 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -564,9 +564,10 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
  * returns 0 on success
  */
 int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
-size_t pagesize)
+RAMBlock *rb)
 {
 struct uffdio_copy copy_struct;
+size_t pagesize = qemu_ram_pagesize(rb);
 
 copy_struct.dst = (uint64_t)(uintptr_t)host;
 copy_struct.src = (uint64_t)(uintptr_t)from;
@@ -595,11 +596,11 @@ int postcopy_place_page(MigrationIncomingState *mis, void 
*host, void *from,
  * returns 0 on success
  */
 int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
- size_t pagesize)
+ RAMBlock *rb)
 {
 trace_postcopy_place_page_zero(host);
 
-if (pagesize == getpagesize()) {
+if (qemu_ram_pagesize(rb) == getpagesize()) {
 struct uffdio_zeropage zero_struct;
 zero_struct.range.start = (uint64_t)(uintptr_t)host;
 zero_struct.range.len = getpagesize();
@@ -629,7 +630,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, 
void *host,
 memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size);
 }
 return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page,
-   pagesize);
+   rb);
 }
 
 return 0;
@@ -692,14 +693,14 @@ int postcopy_ram_enable_notify(MigrationIncomingState 
*mis)
 }
 
 int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
-size_t pagesize)
+RAMBlock *rb)
 {
 assert(0);
 return -1;
 }
 
 int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
-size_t pagesize)
+RAMBlock *rb)
 {
 assert(0);
 return -1;
diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h
index 52d51e8..78a3591 100644
--- a/migration/postcopy-ram.h
+++ b/migration/postcopy-ram.h
@@ -72,14 +72,14 @@ void postcopy_discard_send_finish(MigrationState *ms,
  * returns 0 on success
  */
 int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
-size_t pagesize);
+RAMBlock *rb);
 
 /*
  * Place a zero page at (host) atomically
  * returns 0 on success
  */
 int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
- size_t pagesize);
+ RAMBlock *rb);
 
 /* The current postcopy state is read/set by postcopy_state_get/set
  * which update it atomically.
diff --git a/migration/ram.c b/migration/ram.c
index 1b08296..9cc1b17 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2470,10 +2470,10 @@ static int ram_load_postcopy(QEMUFile *f)
 
 if (all_zero) {
 ret = postcopy_place_page_zero(mis, place_dest,
-   block->page_size);
+   block);
 } else {
 ret = postcopy_place_page(mis, place_dest,
-  place_source, block->page_size);
+  place_source, block);
 }
 }
 if (!ret) {
-- 
1.9.1




[Qemu-devel] [PATCH v7 3/3] migration: add bitmap for received page

2017-06-30 Thread Alexey Perevalov
This patch adds ability to track down already received
pages, it's necessary for calculation vCPU block time in
postcopy migration feature, maybe for restore after
postcopy migration failure.
Also it's necessary to solve shared memory issue in
postcopy livemigration. Information about received pages
will be transferred to the software virtual bridge
(e.g. OVS-VSWITCHD), to avoid fallocate (unmap) for
already received pages. fallocate syscall is required for
remmaped shared memory, due to remmaping itself blocks
ioctl(UFFDIO_COPY, ioctl in this case will end with EEXIT
error (struct page is exists after remmap).

Bitmap is placed into RAMBlock as another postcopy/precopy
related bitmaps.

Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 include/exec/ram_addr.h  | 10 ++
 migration/postcopy-ram.c | 16 +++-
 migration/ram.c  | 43 ---
 migration/ram.h  |  6 ++
 4 files changed, 67 insertions(+), 8 deletions(-)

diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index 73d1bea..af5bf26 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -47,6 +47,8 @@ struct RAMBlock {
  * of the postcopy phase
  */
 unsigned long *unsentmap;
+/* bitmap of already received pages in postcopy */
+unsigned long *receivedmap;
 };
 
 static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset)
@@ -60,6 +62,14 @@ static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t 
offset)
 return (char *)block->host + offset;
 }
 
+static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr,
+RAMBlock *rb)
+{
+uint64_t host_addr_offset =
+(uint64_t)(uintptr_t)(host_addr - (void *)rb->host);
+return host_addr_offset >> TARGET_PAGE_BITS;
+}
+
 long qemu_getrampagesize(void);
 unsigned long last_ram_page(void);
 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index be497bb..276ce12 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -560,22 +560,27 @@ int postcopy_ram_enable_notify(MigrationIncomingState 
*mis)
 }
 
 static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr,
-void *from_addr, uint64_t pagesize)
+   void *from_addr, uint64_t pagesize, RAMBlock 
*rb)
 {
+int ret;
 if (from_addr) {
 struct uffdio_copy copy_struct;
 copy_struct.dst = (uint64_t)(uintptr_t)host_addr;
 copy_struct.src = (uint64_t)(uintptr_t)from_addr;
 copy_struct.len = pagesize;
 copy_struct.mode = 0;
-return ioctl(userfault_fd, UFFDIO_COPY, _struct);
+ret = ioctl(userfault_fd, UFFDIO_COPY, _struct);
 } else {
 struct uffdio_zeropage zero_struct;
 zero_struct.range.start = (uint64_t)(uintptr_t)host_addr;
 zero_struct.range.len = pagesize;
 zero_struct.mode = 0;
-return ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct);
+ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct);
+}
+if (!ret) {
+ramblock_recv_bitmap_set(host_addr, rb);
 }
+return ret;
 }
 
 /*
@@ -592,7 +597,7 @@ int postcopy_place_page(MigrationIncomingState *mis, void 
*host, void *from,
  * which would be slightly cheaper, but we'd have to be careful
  * of the order of updating our page state.
  */
-if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize)) {
+if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize, rb)) {
 int e = errno;
 error_report("%s: %s copy host: %p from: %p (size: %zd)",
  __func__, strerror(e), host, from, pagesize);
@@ -614,7 +619,8 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, 
void *host,
 trace_postcopy_place_page_zero(host);
 
 if (qemu_ram_pagesize(rb) == getpagesize()) {
-if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize())) 
{
+if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize(),
+rb)) {
 int e = errno;
 error_report("%s: %s zero host: %p",
  __func__, strerror(e), host);
diff --git a/migration/ram.c b/migration/ram.c
index 9cc1b17..dfbb36b 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -147,6 +147,32 @@ out:
 return ret;
 }
 
+void ramblock_recv_map_init(void)
+{
+RAMBlock *rb;
+
+RAMBLOCK_FOREACH(rb) {
+assert(!rb->receivedmap);
+rb->receivedmap = bitmap_new(rb->max_length >> TARGET_PAGE_BITS);
+}
+}
+
+int ramblock_recv_bitmap_test(void *host_addr, RAMBlock *rb)
+{
+return test_bit(ramblock_recv_bitmap_offset(host_addr, rb),
+rb->receivedmap);
+}
+
+void ramblock_r

[Qemu-devel] [PATCH v7 1/3] migration: postcopy_place_page factoring out

2017-06-30 Thread Alexey Perevalov
Need to mark copied pages as closer as possible to the place where it
tracks down. That will be necessary in futher patch.

Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Reviewed-by: Peter Xu <pet...@redhat.com>
Reviewed-by: Juan Quintela <quint...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 migration/postcopy-ram.c | 13 +++--
 migration/postcopy-ram.h |  4 ++--
 migration/ram.c  |  4 ++--
 3 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 7e21e6f..996e64d 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -564,9 +564,10 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
  * returns 0 on success
  */
 int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
-size_t pagesize)
+RAMBlock *rb)
 {
 struct uffdio_copy copy_struct;
+size_t pagesize = qemu_ram_pagesize(rb);
 
 copy_struct.dst = (uint64_t)(uintptr_t)host;
 copy_struct.src = (uint64_t)(uintptr_t)from;
@@ -595,11 +596,11 @@ int postcopy_place_page(MigrationIncomingState *mis, void 
*host, void *from,
  * returns 0 on success
  */
 int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
- size_t pagesize)
+ RAMBlock *rb)
 {
 trace_postcopy_place_page_zero(host);
 
-if (pagesize == getpagesize()) {
+if (qemu_ram_pagesize(rb) == getpagesize()) {
 struct uffdio_zeropage zero_struct;
 zero_struct.range.start = (uint64_t)(uintptr_t)host;
 zero_struct.range.len = getpagesize();
@@ -629,7 +630,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, 
void *host,
 memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size);
 }
 return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page,
-   pagesize);
+   rb);
 }
 
 return 0;
@@ -692,14 +693,14 @@ int postcopy_ram_enable_notify(MigrationIncomingState 
*mis)
 }
 
 int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
-size_t pagesize)
+RAMBlock *rb)
 {
 assert(0);
 return -1;
 }
 
 int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
-size_t pagesize)
+RAMBlock *rb)
 {
 assert(0);
 return -1;
diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h
index 52d51e8..78a3591 100644
--- a/migration/postcopy-ram.h
+++ b/migration/postcopy-ram.h
@@ -72,14 +72,14 @@ void postcopy_discard_send_finish(MigrationState *ms,
  * returns 0 on success
  */
 int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
-size_t pagesize);
+RAMBlock *rb);
 
 /*
  * Place a zero page at (host) atomically
  * returns 0 on success
  */
 int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
- size_t pagesize);
+ RAMBlock *rb);
 
 /* The current postcopy state is read/set by postcopy_state_get/set
  * which update it atomically.
diff --git a/migration/ram.c b/migration/ram.c
index 1b08296..9cc1b17 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2470,10 +2470,10 @@ static int ram_load_postcopy(QEMUFile *f)
 
 if (all_zero) {
 ret = postcopy_place_page_zero(mis, place_dest,
-   block->page_size);
+   block);
 } else {
 ret = postcopy_place_page(mis, place_dest,
-  place_source, block->page_size);
+  place_source, block);
 }
 }
 if (!ret) {
-- 
1.8.3.1




[Qemu-devel] [PATCH v7 2/3] migration: introduce qemu_ufd_copy_ioctl helper

2017-06-30 Thread Alexey Perevalov
Just for placing auxilary operations inside helper,
auxilary operations like: track received pages,
notify about copying operation in futher patches.

Reviewed-by: Juan Quintela <quint...@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Reviewed-by: Peter Xu <pet...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 migration/postcopy-ram.c | 34 +-
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 996e64d..be497bb 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -559,6 +559,25 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
 return 0;
 }
 
+static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr,
+void *from_addr, uint64_t pagesize)
+{
+if (from_addr) {
+struct uffdio_copy copy_struct;
+copy_struct.dst = (uint64_t)(uintptr_t)host_addr;
+copy_struct.src = (uint64_t)(uintptr_t)from_addr;
+copy_struct.len = pagesize;
+copy_struct.mode = 0;
+return ioctl(userfault_fd, UFFDIO_COPY, _struct);
+} else {
+struct uffdio_zeropage zero_struct;
+zero_struct.range.start = (uint64_t)(uintptr_t)host_addr;
+zero_struct.range.len = pagesize;
+zero_struct.mode = 0;
+return ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct);
+}
+}
+
 /*
  * Place a host page (from) at (host) atomically
  * returns 0 on success
@@ -566,20 +585,14 @@ int postcopy_ram_enable_notify(MigrationIncomingState 
*mis)
 int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
 RAMBlock *rb)
 {
-struct uffdio_copy copy_struct;
 size_t pagesize = qemu_ram_pagesize(rb);
 
-copy_struct.dst = (uint64_t)(uintptr_t)host;
-copy_struct.src = (uint64_t)(uintptr_t)from;
-copy_struct.len = pagesize;
-copy_struct.mode = 0;
-
 /* copy also acks to the kernel waking the stalled thread up
  * TODO: We can inhibit that ack and only do it if it was requested
  * which would be slightly cheaper, but we'd have to be careful
  * of the order of updating our page state.
  */
-if (ioctl(mis->userfault_fd, UFFDIO_COPY, _struct)) {
+if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize)) {
 int e = errno;
 error_report("%s: %s copy host: %p from: %p (size: %zd)",
  __func__, strerror(e), host, from, pagesize);
@@ -601,12 +614,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, 
void *host,
 trace_postcopy_place_page_zero(host);
 
 if (qemu_ram_pagesize(rb) == getpagesize()) {
-struct uffdio_zeropage zero_struct;
-zero_struct.range.start = (uint64_t)(uintptr_t)host;
-zero_struct.range.len = getpagesize();
-zero_struct.mode = 0;
-
-if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, _struct)) {
+if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize())) 
{
 int e = errno;
 error_report("%s: %s zero host: %p",
  __func__, strerror(e), host);
-- 
1.8.3.1




[Qemu-devel] [PATCH v6 3/3] migration: add bitmap for received page

2017-06-28 Thread Alexey Perevalov
This patch adds ability to track down already received
pages, it's necessary for calculation vCPU block time in
postcopy migration feature, maybe for restore after
postcopy migration failure.
Also it's necessary to solve shared memory issue in
postcopy livemigration. Information about received pages
will be transferred to the software virtual bridge
(e.g. OVS-VSWITCHD), to avoid fallocate (unmap) for
already received pages. fallocate syscall is required for
remmaped shared memory, due to remmaping itself blocks
ioctl(UFFDIO_COPY, ioctl in this case will end with EEXIT
error (struct page is exists after remmap).

Bitmap is placed into RAMBlock as another postcopy/precopy
related bitmaps.

Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 include/exec/ram_addr.h  | 10 ++
 migration/migration.c|  1 +
 migration/postcopy-ram.c | 16 +++-
 migration/ram.c  | 42 +++---
 migration/ram.h  |  6 ++
 5 files changed, 67 insertions(+), 8 deletions(-)

diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index 140efa8..4170656 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -47,6 +47,8 @@ struct RAMBlock {
  * of the postcopy phase
  */
 unsigned long *unsentmap;
+/* bitmap of already received pages in postcopy */
+unsigned long *receivedmap;
 };
 
 static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset)
@@ -60,6 +62,14 @@ static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t 
offset)
 return (char *)block->host + offset;
 }
 
+static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr,
+RAMBlock *rb)
+{
+uint64_t host_addr_offset =
+(uint64_t)(uintptr_t)(host_addr - (void *)rb->host);
+return host_addr_offset >> TARGET_PAGE_BITS;
+}
+
 long qemu_getrampagesize(void);
 unsigned long last_ram_page(void);
 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
diff --git a/migration/migration.c b/migration/migration.c
index 71e38bc..63ded8c 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -363,6 +363,7 @@ void migration_fd_process_incoming(QEMUFile *f)
 Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, f);
 
 qemu_file_set_blocking(f, false);
+ramblock_recv_map_init();
 qemu_coroutine_enter(co);
 }
 
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 293db97..f980d93 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -562,22 +562,27 @@ int postcopy_ram_enable_notify(MigrationIncomingState 
*mis)
 }
 
 static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr,
-void *from_addr, uint64_t pagesize)
+   void *from_addr, uint64_t pagesize, RAMBlock 
*rb)
 {
+int ret;
 if (from_addr) {
 struct uffdio_copy copy_struct;
 copy_struct.dst = (uint64_t)(uintptr_t)host_addr;
 copy_struct.src = (uint64_t)(uintptr_t)from_addr;
 copy_struct.len = pagesize;
 copy_struct.mode = 0;
-return ioctl(userfault_fd, UFFDIO_COPY, _struct);
+ret = ioctl(userfault_fd, UFFDIO_COPY, _struct);
 } else {
 struct uffdio_zeropage zero_struct;
 zero_struct.range.start = (uint64_t)(uintptr_t)host_addr;
 zero_struct.range.len = pagesize;
 zero_struct.mode = 0;
-return ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct);
+ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct);
+}
+if (!ret) {
+ramblock_recv_bitmap_set(host_addr, rb);
 }
+return ret;
 }
 
 /*
@@ -594,7 +599,7 @@ int postcopy_place_page(MigrationIncomingState *mis, void 
*host, void *from,
  * which would be slightly cheaper, but we'd have to be careful
  * of the order of updating our page state.
  */
-if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize)) {
+if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize, rb)) {
 int e = errno;
 error_report("%s: %s copy host: %p from: %p (size: %zd)",
  __func__, strerror(e), host, from, pagesize);
@@ -616,7 +621,8 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, 
void *host,
 trace_postcopy_place_page_zero(host);
 
 if (qemu_ram_pagesize(rb) == getpagesize()) {
-if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize())) 
{
+if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize(),
+rb)) {
 int e = errno;
 error_report("%s: %s zero host: %p",
  __func__, strerror(e), host);
diff --git a/migration/ram.c b/migration/ram.c
index f50479d..95962a0 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -151,6 +151,32 @@ out:
 return ret;
 }
 
+void

[Qemu-devel] [PATCH v6 1/3] migration: postcopy_place_page factoring out

2017-06-28 Thread Alexey Perevalov
Need to mark copied pages as closer as possible to the place where it
tracks down. That will be necessary in futher patch.

Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Reviewed-by: Peter Xu <pet...@redhat.com>
Reviewed-by: Juan Quintela <quint...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 migration/postcopy-ram.c | 13 +++--
 migration/postcopy-ram.h |  4 ++--
 migration/ram.c  |  4 ++--
 3 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index c8c4500..dae41b5 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -566,9 +566,10 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
  * returns 0 on success
  */
 int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
-size_t pagesize)
+RAMBlock *rb)
 {
 struct uffdio_copy copy_struct;
+size_t pagesize = qemu_ram_pagesize(rb);
 
 copy_struct.dst = (uint64_t)(uintptr_t)host;
 copy_struct.src = (uint64_t)(uintptr_t)from;
@@ -597,11 +598,11 @@ int postcopy_place_page(MigrationIncomingState *mis, void 
*host, void *from,
  * returns 0 on success
  */
 int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
- size_t pagesize)
+ RAMBlock *rb)
 {
 trace_postcopy_place_page_zero(host);
 
-if (pagesize == getpagesize()) {
+if (qemu_ram_pagesize(rb) == getpagesize()) {
 struct uffdio_zeropage zero_struct;
 zero_struct.range.start = (uint64_t)(uintptr_t)host;
 zero_struct.range.len = getpagesize();
@@ -631,7 +632,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, 
void *host,
 memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size);
 }
 return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page,
-   pagesize);
+   rb);
 }
 
 return 0;
@@ -694,14 +695,14 @@ int postcopy_ram_enable_notify(MigrationIncomingState 
*mis)
 }
 
 int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
-size_t pagesize)
+RAMBlock *rb)
 {
 assert(0);
 return -1;
 }
 
 int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
-size_t pagesize)
+RAMBlock *rb)
 {
 assert(0);
 return -1;
diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h
index 52d51e8..78a3591 100644
--- a/migration/postcopy-ram.h
+++ b/migration/postcopy-ram.h
@@ -72,14 +72,14 @@ void postcopy_discard_send_finish(MigrationState *ms,
  * returns 0 on success
  */
 int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
-size_t pagesize);
+RAMBlock *rb);
 
 /*
  * Place a zero page at (host) atomically
  * returns 0 on success
  */
 int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
- size_t pagesize);
+ RAMBlock *rb);
 
 /* The current postcopy state is read/set by postcopy_state_get/set
  * which update it atomically.
diff --git a/migration/ram.c b/migration/ram.c
index 8dbdfdb..f50479d 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2465,10 +2465,10 @@ static int ram_load_postcopy(QEMUFile *f)
 
 if (all_zero) {
 ret = postcopy_place_page_zero(mis, place_dest,
-   block->page_size);
+   block);
 } else {
 ret = postcopy_place_page(mis, place_dest,
-  place_source, block->page_size);
+  place_source, block);
 }
 }
 if (!ret) {
-- 
1.8.3.1




[Qemu-devel] [PATCH v6 0/3] Add bitmap for received pages in postcopy migration

2017-06-28 Thread Alexey Perevalov
This is 6th version of
[PATCH v1 0/2] Add bitmap for copied pages in postcopy migration
cover message from there

This is a separate patch set, it derived from
https://www.mail-archive.com/qemu-devel@nongnu.org/msg456004.html

There are several possible use cases:
1. solve issue with postcopy live migration and shared memory.
OVS-VSWITCH requires information about copied pages, to fallocate
newly allocated pages.
2. calculation vCPU blocktime
for more details see
https://www.mail-archive.com/qemu-devel@nongnu.org/msg456004.html
3. Recovery after fail in the middle of postcopy migration 

Declaration is placed in two places include/migration/migration.h and into
migration/postcopy-ram.h, because some functions are required in virtio and
into public function include/exec/ram_addr.h.


V5 -> V6
- call ramblock_recv_map_init from migration_fd_process_incoming (Peter 
suggested)But finalization is still in ram_load_cleanup as Juan suggested.

V4 -> V5
- remove ramblock_recv_bitmap_clear_range in favor to bitmap_clear (comment 
from David)
- single invocation place for ramblock_recv_bitmap_set (comment from Peter)
- minor changes like removing comment from qemu_ufd_copy_ioctl and local 
variable from
ramblock_recv_map_init (comment from Peter)

V3 -> V4
- clear_bit instead of ramblock_recv_bitmap_clear in 
ramblock_recv_bitmap_clear_range,
it reduced number of operation (comment from Juan)
- for postcopy ramblock_recv_bitmap_set is calling after page was copied,
only in case of success (comment from David)
- indentation fixes (comment from Juan)

V2 -> V3
- ramblock_recv_map_init call is placed into migration_incoming_get_current,
looks like it's general place for both precopy and postcopy case.
- received bitmap memory releasing is placed into ram_load_cleanup,
unfortunatelly, it calls only in case of precopy.
- precopy case and discard ram block case
- function renaming, and another minor cleanups

V1 -> V2
- change in terminology s/copied/received/g
- granularity became TARGET_PAGE_SIZE, but not actual page size of the
ramblock
- movecopiedmap & get_copiedmap_size were removed, until patch set where
it will be necessary
- releasing memory of receivedmap was added into ram_load_cleanup
- new patch "migration: introduce qemu_ufd_copy_ioctl helper"

Patchset is based on Juan's patchset:
[PATCH v2 0/5] Create setup/cleanup methods for migration incoming side

Alexey Perevalov (3):
  migration: postcopy_place_page factoring out
  migration: introduce qemu_ufd_copy_ioctl helper
  migration: add bitmap for received page

 include/exec/ram_addr.h  | 10 +
 migration/migration.c|  1 +
 migration/postcopy-ram.c | 53 +++-
 migration/postcopy-ram.h |  4 ++--
 migration/ram.c  | 46 -
 migration/ram.h  |  6 ++
 6 files changed, 94 insertions(+), 26 deletions(-)

-- 
1.8.3.1




[Qemu-devel] [PATCH v6 2/3] migration: introduce qemu_ufd_copy_ioctl helper

2017-06-28 Thread Alexey Perevalov
Just for placing auxilary operations inside helper,
auxilary operations like: track received pages,
notify about copying operation in futher patches.

Reviewed-by: Juan Quintela <quint...@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Reviewed-by: Peter Xu <pet...@redhat.com>
Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 migration/postcopy-ram.c | 34 +-
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index dae41b5..293db97 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -561,6 +561,25 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
 return 0;
 }
 
+static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr,
+void *from_addr, uint64_t pagesize)
+{
+if (from_addr) {
+struct uffdio_copy copy_struct;
+copy_struct.dst = (uint64_t)(uintptr_t)host_addr;
+copy_struct.src = (uint64_t)(uintptr_t)from_addr;
+copy_struct.len = pagesize;
+copy_struct.mode = 0;
+return ioctl(userfault_fd, UFFDIO_COPY, _struct);
+} else {
+struct uffdio_zeropage zero_struct;
+zero_struct.range.start = (uint64_t)(uintptr_t)host_addr;
+zero_struct.range.len = pagesize;
+zero_struct.mode = 0;
+return ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct);
+}
+}
+
 /*
  * Place a host page (from) at (host) atomically
  * returns 0 on success
@@ -568,20 +587,14 @@ int postcopy_ram_enable_notify(MigrationIncomingState 
*mis)
 int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
 RAMBlock *rb)
 {
-struct uffdio_copy copy_struct;
 size_t pagesize = qemu_ram_pagesize(rb);
 
-copy_struct.dst = (uint64_t)(uintptr_t)host;
-copy_struct.src = (uint64_t)(uintptr_t)from;
-copy_struct.len = pagesize;
-copy_struct.mode = 0;
-
 /* copy also acks to the kernel waking the stalled thread up
  * TODO: We can inhibit that ack and only do it if it was requested
  * which would be slightly cheaper, but we'd have to be careful
  * of the order of updating our page state.
  */
-if (ioctl(mis->userfault_fd, UFFDIO_COPY, _struct)) {
+if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize)) {
 int e = errno;
 error_report("%s: %s copy host: %p from: %p (size: %zd)",
  __func__, strerror(e), host, from, pagesize);
@@ -603,12 +616,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, 
void *host,
 trace_postcopy_place_page_zero(host);
 
 if (qemu_ram_pagesize(rb) == getpagesize()) {
-struct uffdio_zeropage zero_struct;
-zero_struct.range.start = (uint64_t)(uintptr_t)host;
-zero_struct.range.len = getpagesize();
-zero_struct.mode = 0;
-
-if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, _struct)) {
+if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize())) 
{
 int e = errno;
 error_report("%s: %s zero host: %p",
  __func__, strerror(e), host);
-- 
1.8.3.1




[Qemu-devel] [PATCH v5 3/3] migration: add bitmap for received page

2017-06-27 Thread Alexey Perevalov
This patch adds ability to track down already received
pages, it's necessary for calculation vCPU block time in
postcopy migration feature, maybe for restore after
postcopy migration failure.
Also it's necessary to solve shared memory issue in
postcopy livemigration. Information about received pages
will be transferred to the software virtual bridge
(e.g. OVS-VSWITCHD), to avoid fallocate (unmap) for
already received pages. fallocate syscall is required for
remmaped shared memory, due to remmaping itself blocks
ioctl(UFFDIO_COPY, ioctl in this case will end with EEXIT
error (struct page is exists after remmap).

Bitmap is placed into RAMBlock as another postcopy/precopy
related bitmaps.

Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
---
 include/exec/ram_addr.h  | 10 ++
 migration/migration.c|  1 +
 migration/postcopy-ram.c | 16 +++-
 migration/ram.c  | 42 +++---
 migration/ram.h  |  6 ++
 5 files changed, 67 insertions(+), 8 deletions(-)

diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index 140efa8..4170656 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -47,6 +47,8 @@ struct RAMBlock {
  * of the postcopy phase
  */
 unsigned long *unsentmap;
+/* bitmap of already received pages in postcopy */
+unsigned long *receivedmap;
 };
 
 static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset)
@@ -60,6 +62,14 @@ static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t 
offset)
 return (char *)block->host + offset;
 }
 
+static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr,
+RAMBlock *rb)
+{
+uint64_t host_addr_offset =
+(uint64_t)(uintptr_t)(host_addr - (void *)rb->host);
+return host_addr_offset >> TARGET_PAGE_BITS;
+}
+
 long qemu_getrampagesize(void);
 unsigned long last_ram_page(void);
 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
diff --git a/migration/migration.c b/migration/migration.c
index 71e38bc..53fbd41 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -143,6 +143,7 @@ MigrationIncomingState *migration_incoming_get_current(void)
 qemu_mutex_init(_current.rp_mutex);
 qemu_event_init(_current.main_thread_load_event, false);
 once = true;
+ramblock_recv_map_init();
 }
 return _current;
 }
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 293db97..f980d93 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -562,22 +562,27 @@ int postcopy_ram_enable_notify(MigrationIncomingState 
*mis)
 }
 
 static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr,
-void *from_addr, uint64_t pagesize)
+   void *from_addr, uint64_t pagesize, RAMBlock 
*rb)
 {
+int ret;
 if (from_addr) {
 struct uffdio_copy copy_struct;
 copy_struct.dst = (uint64_t)(uintptr_t)host_addr;
 copy_struct.src = (uint64_t)(uintptr_t)from_addr;
 copy_struct.len = pagesize;
 copy_struct.mode = 0;
-return ioctl(userfault_fd, UFFDIO_COPY, _struct);
+ret = ioctl(userfault_fd, UFFDIO_COPY, _struct);
 } else {
 struct uffdio_zeropage zero_struct;
 zero_struct.range.start = (uint64_t)(uintptr_t)host_addr;
 zero_struct.range.len = pagesize;
 zero_struct.mode = 0;
-return ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct);
+ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, _struct);
+}
+if (!ret) {
+ramblock_recv_bitmap_set(host_addr, rb);
 }
+return ret;
 }
 
 /*
@@ -594,7 +599,7 @@ int postcopy_place_page(MigrationIncomingState *mis, void 
*host, void *from,
  * which would be slightly cheaper, but we'd have to be careful
  * of the order of updating our page state.
  */
-if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize)) {
+if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize, rb)) {
 int e = errno;
 error_report("%s: %s copy host: %p from: %p (size: %zd)",
  __func__, strerror(e), host, from, pagesize);
@@ -616,7 +621,8 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, 
void *host,
 trace_postcopy_place_page_zero(host);
 
 if (qemu_ram_pagesize(rb) == getpagesize()) {
-if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize())) 
{
+if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize(),
+rb)) {
 int e = errno;
 error_report("%s: %s zero host: %p",
  __func__, strerror(e), host);
diff --git a/migration/ram.c b/migration/ram.c
index f50479d..95962a0 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -151,6 +151,32 @@ out:
 

  1   2   3   >