Introduce capability switchover_abort and migration parameter switchover_limit
to allow for live migration abort when the source downtime exceeded by
switchover_limit.

Signed-off-by: Elena Ufimtseva <elena.ufimts...@oracle.com>
---
 hw/core/machine.c                  |  1 +
 include/migration/client-options.h |  1 +
 migration/migration-hmp-cmds.c     | 10 ++++++
 migration/migration.c              | 39 +++++++++++++++++++++
 migration/migration.h              |  5 +++
 migration/options.c                | 56 ++++++++++++++++++++++++++++++
 migration/options.h                |  1 +
 migration/savevm.c                 | 13 +++++++
 qapi/migration.json                | 27 +++++++++++---
 9 files changed, 149 insertions(+), 4 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 655d75c21f..9459c7adbb 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -38,6 +38,7 @@ GlobalProperty hw_compat_9_0[] = {
     {"arm-cpu", "backcompat-cntfrq", "true" },
     {"scsi-disk-base", "migrate-emulated-scsi-request", "false" },
     {"vfio-pci", "skip-vsc-check", "false" },
+    { "migration", "x-switchover-abort", "off" },
 };
 const size_t hw_compat_9_0_len = G_N_ELEMENTS(hw_compat_9_0);
 
diff --git a/include/migration/client-options.h 
b/include/migration/client-options.h
index 59f4b55cf4..0e9d17f507 100644
--- a/include/migration/client-options.h
+++ b/include/migration/client-options.h
@@ -16,6 +16,7 @@ bool migrate_background_snapshot(void);
 bool migrate_dirty_limit(void);
 bool migrate_postcopy_ram(void);
 bool migrate_switchover_ack(void);
+bool migrate_switchover_abort(void);
 
 /* parameters */
 
diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
index 9f0e8029e0..4dc8d0ba87 100644
--- a/migration/migration-hmp-cmds.c
+++ b/migration/migration-hmp-cmds.c
@@ -312,6 +312,11 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict 
*qdict)
         monitor_printf(mon, "%s: '%s'\n",
             MigrationParameter_str(MIGRATION_PARAMETER_TLS_AUTHZ),
             params->tls_authz);
+        assert(params->has_switchover_limit);
+        monitor_printf(mon, "%s: %" PRIu64 " ms\n",
+            MigrationParameter_str(MIGRATION_PARAMETER_SWITCHOVER_LIMIT),
+            params->switchover_limit);
+
 
         if (params->has_block_bitmap_mapping) {
             const BitmapMigrationNodeAliasList *bmnal;
@@ -624,6 +629,11 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict 
*qdict)
         p->has_mode = true;
         visit_type_MigMode(v, param, &p->mode, &err);
         break;
+    case MIGRATION_PARAMETER_SWITCHOVER_LIMIT:
+        p->has_switchover_limit = true;
+        visit_type_size(v, param, &p->switchover_limit, &err);
+        break;
+
     default:
         assert(0);
     }
diff --git a/migration/migration.c b/migration/migration.c
index e1b269624c..5cc304d2db 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -113,6 +113,7 @@ static void migration_downtime_start(MigrationState *s)
 {
     trace_vmstate_downtime_checkpoint("src-downtime-start");
     s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+    s->downtime_now = s->downtime_start;
 }
 
 static void migration_downtime_end(MigrationState *s)
@@ -204,6 +205,10 @@ static int migration_stop_vm(MigrationState *s, RunState 
state)
     trace_vmstate_downtime_checkpoint("src-vm-stopped");
     trace_migration_completion_vm_stop(ret);
 
+    if (migration_downtime_exceeded()) {
+        migration_set_downtime_exceeded_error(s, s->to_dst_file);
+        ret = -1;
+    }
     return ret;
 }
 
@@ -1652,6 +1657,7 @@ int migrate_init(MigrationState *s, Error **errp)
     s->mbps = 0.0;
     s->pages_per_second = 0.0;
     s->downtime = 0;
+    s->downtime_now = 0;
     s->expected_downtime = 0;
     s->setup_time = 0;
     s->start_postcopy = false;
@@ -2758,6 +2764,39 @@ static void migration_completion_failed(MigrationState 
*s,
                       MIGRATION_STATUS_FAILED);
 }
 
+int64_t migration_get_current_downtime(MigrationState *s)
+{
+    s->downtime_now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+
+    return s->downtime_now - s->downtime_start;
+}
+
+bool migration_downtime_exceeded(void)
+{
+    MigrationState *s = migrate_get_current();
+
+    if (!migrate_switchover_abort()) {
+        return 0;
+    }
+
+    return migration_get_current_downtime(s) >= s->parameters.downtime_limit +
+                                                s->parameters.switchover_limit;
+}
+
+int migration_set_downtime_exceeded_error(MigrationState *s, QEMUFile *f)
+{
+    int64_t limit = s->parameters.downtime_limit;
+    Error *errp = NULL;
+
+    error_setg(&errp, "Downtime Limit of %" PRIi64" ms exceeded by %"PRIi64" 
ms",
+               limit, (s->downtime_now - s->downtime_start) - limit);
+
+    migration_cancel(errp);
+    error_free(errp);
+
+    return -EFAULT;
+}
+
 /**
  * migration_completion: Used by migration_thread when there's not much left.
  *   The caller 'breaks' the loop when this returns.
diff --git a/migration/migration.h b/migration/migration.h
index 6af01362d4..aa56b70795 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -338,6 +338,8 @@ struct MigrationState {
     /* Timestamp when VM is down (ms) to migrate the last stuff */
     int64_t downtime_start;
     int64_t downtime;
+    /* Current measured downtime on source */
+    int64_t downtime_now;
     int64_t expected_downtime;
     bool capabilities[MIGRATION_CAPABILITY__MAX];
     int64_t setup_time;
@@ -519,6 +521,9 @@ void migration_consume_urgent_request(void);
 bool migration_rate_limit(void);
 void migration_bh_schedule(QEMUBHFunc *cb, void *opaque);
 void migration_cancel(const Error *error);
+int64_t migration_get_current_downtime(MigrationState *s);
+int migration_set_downtime_exceeded_error(MigrationState *s, QEMUFile *f);
+bool migration_downtime_exceeded(void);
 
 void migration_populate_vfio_info(MigrationInfo *info);
 void migration_reset_vfio_bytes_transferred(void);
diff --git a/migration/options.c b/migration/options.c
index 5ab5b6d85d..a1a22d389c 100644
--- a/migration/options.c
+++ b/migration/options.c
@@ -40,6 +40,13 @@
  * for sending the last part */
 #define DEFAULT_MIGRATE_SET_DOWNTIME 300
 
+/*
+ * Time in milliseconds that downtime can exceed downtime limit
+ * on source or destination before migration aborts if capability
+ * switchover_abort is enabled
+ */
+#define DEFAULT_MIGRATE_SET_SWITCHOVER_LIMIT 0
+
 /* Define default autoconverge cpu throttle migration parameters */
 #define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50
 #define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20
@@ -162,6 +169,9 @@ Property migration_properties[] = {
     DEFINE_PROP_ZERO_PAGE_DETECTION("zero-page-detection", MigrationState,
                        parameters.zero_page_detection,
                        ZERO_PAGE_DETECTION_MULTIFD),
+    DEFINE_PROP_UINT64("x-switchover-limit", MigrationState,
+                       parameters.switchover_limit,
+                       DEFAULT_MIGRATE_SET_SWITCHOVER_LIMIT),
 
     /* Migration capabilities */
     DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
@@ -184,6 +194,8 @@ Property migration_properties[] = {
 #endif
     DEFINE_PROP_MIG_CAP("x-switchover-ack",
                         MIGRATION_CAPABILITY_SWITCHOVER_ACK),
+    DEFINE_PROP_MIG_CAP("x-switchover-abort",
+                        MIGRATION_CAPABILITY_SWITCHOVER_ABORT),
     DEFINE_PROP_MIG_CAP("x-dirty-limit", MIGRATION_CAPABILITY_DIRTY_LIMIT),
     DEFINE_PROP_MIG_CAP("mapped-ram", MIGRATION_CAPABILITY_MAPPED_RAM),
     DEFINE_PROP_END_OF_LIST(),
@@ -315,6 +327,13 @@ bool migrate_switchover_ack(void)
     return s->capabilities[MIGRATION_CAPABILITY_SWITCHOVER_ACK];
 }
 
+bool migrate_switchover_abort(void)
+{
+    MigrationState *s = migrate_get_current();
+
+    return s->capabilities[MIGRATION_CAPABILITY_SWITCHOVER_ABORT];
+}
+
 bool migrate_validate_uuid(void)
 {
     MigrationState *s = migrate_get_current();
@@ -592,6 +611,14 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, 
Error **errp)
         }
     }
 
+    if (new_caps[MIGRATION_CAPABILITY_SWITCHOVER_ABORT]) {
+        if (!new_caps[MIGRATION_CAPABILITY_RETURN_PATH]) {
+            error_setg(errp, "Capability 'switchover-abort' requires 
capability "
+                             "'return-path'");
+            return false;
+        }
+    }
+
     return true;
 }
 
@@ -824,6 +851,13 @@ ZeroPageDetection migrate_zero_page_detection(void)
     return s->parameters.zero_page_detection;
 }
 
+void migrate_set_switchover_limit(uint64_t value)
+{
+    MigrationState *s = migrate_get_current();
+
+   s->parameters.switchover_limit = value;
+}
+
 /* parameters helpers */
 
 AnnounceParameters *migrate_announce_params(void)
@@ -905,6 +939,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error 
**errp)
     params->mode = s->parameters.mode;
     params->has_zero_page_detection = true;
     params->zero_page_detection = s->parameters.zero_page_detection;
+    params->has_switchover_limit = true;
+    params->switchover_limit = s->parameters.switchover_limit;
 
     return params;
 }
@@ -937,6 +973,7 @@ void migrate_params_init(MigrationParameters *params)
     params->has_vcpu_dirty_limit = true;
     params->has_mode = true;
     params->has_zero_page_detection = true;
+    params->has_switchover_limit = true;
 }
 
 /*
@@ -1110,6 +1147,15 @@ bool migrate_params_check(MigrationParameters *params, 
Error **errp)
         return false;
     }
 
+    if (params->has_switchover_limit &&
+        (params->switchover_limit > MAX_MIGRATE_DOWNTIME)) {
+        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+                   "switchover_limit",
+                   "an integer in the range of 0 to "
+                    stringify(MAX_MIGRATE_DOWNTIME)" ms");
+        return false;
+    }
+
     return true;
 }
 
@@ -1216,6 +1262,11 @@ static void 
migrate_params_test_apply(MigrateSetParameters *params,
     if (params->has_zero_page_detection) {
         dest->zero_page_detection = params->zero_page_detection;
     }
+
+    if (params->has_switchover_limit) {
+        dest->switchover_limit = params->switchover_limit;
+    }
+
 }
 
 static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
@@ -1341,6 +1392,11 @@ static void migrate_params_apply(MigrateSetParameters 
*params, Error **errp)
     if (params->has_zero_page_detection) {
         s->parameters.zero_page_detection = params->zero_page_detection;
     }
+
+    if (params->has_switchover_limit) {
+        s->parameters.switchover_limit = params->switchover_limit;
+    }
+
 }
 
 void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
diff --git a/migration/options.h b/migration/options.h
index 4b21cc2669..7c57789970 100644
--- a/migration/options.h
+++ b/migration/options.h
@@ -84,6 +84,7 @@ const char *migrate_tls_creds(void);
 const char *migrate_tls_hostname(void);
 uint64_t migrate_xbzrle_cache_size(void);
 ZeroPageDetection migrate_zero_page_detection(void);
+void migrate_set_switchover_limit(uint64_t value);
 
 /* parameters helpers */
 
diff --git a/migration/savevm.c b/migration/savevm.c
index c621f2359b..031ab03915 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1492,6 +1492,7 @@ int qemu_savevm_state_complete_precopy_iterable(QEMUFile 
*f, bool in_postcopy)
 {
     int64_t start_ts_each, end_ts_each;
     SaveStateEntry *se;
+    MigrationState *s = migrate_get_current();
     int ret;
 
     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
@@ -1523,6 +1524,11 @@ int qemu_savevm_state_complete_precopy_iterable(QEMUFile 
*f, bool in_postcopy)
         end_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
         trace_vmstate_downtime_save("iterable", se->idstr, se->instance_id,
                                     end_ts_each - start_ts_each);
+        if (migration_downtime_exceeded()) {
+            if (migration_set_downtime_exceeded_error(s, f)) {
+                return -1;
+            }
+        }
     }
 
     trace_vmstate_downtime_checkpoint("src-iterable-saved");
@@ -1561,6 +1567,13 @@ int 
qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
         end_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
         trace_vmstate_downtime_save("non-iterable", se->idstr, se->instance_id,
                                     end_ts_each - start_ts_each);
+
+        if (migration_downtime_exceeded()) {
+            if (migration_set_downtime_exceeded_error(ms, f)) {
+                return -1;
+            }
+        }
+
     }
 
     if (inactivate_disks) {
diff --git a/qapi/migration.json b/qapi/migration.json
index 470f746cc5..069a44f207 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -469,6 +469,10 @@
 #     each RAM page.  Requires a migration URI that supports seeking,
 #     such as a file.  (since 9.0)
 #
+# @switchover-abort: abort migration if downtime exceeds the downtime
+#     limit configured by the specified value by switchover-limit
+#     migration parameter.
+#
 # Features:
 #
 # @unstable: Members @x-colo and @x-ignore-shared are experimental.
@@ -485,7 +489,7 @@
            { 'name': 'x-ignore-shared', 'features': [ 'unstable' ] },
            'validate-uuid', 'background-snapshot',
            'zero-copy-send', 'postcopy-preempt', 'switchover-ack',
-           'dirty-limit', 'mapped-ram'] }
+           'dirty-limit', 'mapped-ram', 'switchover-abort'] }
 
 ##
 # @MigrationCapabilityStatus:
@@ -821,6 +825,10 @@
 #     See description in @ZeroPageDetection.  Default is 'multifd'.
 #     (since 9.0)
 #
+# @switchover-limit: Switchover limit (ms) that would be used to
+#     intiate abort of live migration if the total switchover time
+#     exceeded downtime_limit + switchover_limit (Since 9.1)
+#
 # Features:
 #
 # @unstable: Members @x-checkpoint-delay and
@@ -845,7 +853,8 @@
            { 'name': 'x-vcpu-dirty-limit-period', 'features': ['unstable'] },
            'vcpu-dirty-limit',
            'mode',
-           'zero-page-detection'] }
+           'zero-page-detection',
+           'switchover-limit'] }
 
 ##
 # @MigrateSetParameters:
@@ -991,6 +1000,10 @@
 #     See description in @ZeroPageDetection.  Default is 'multifd'.
 #     (since 9.0)
 #
+# @switchover-limit: Switchover limit (ms) that would be used to
+#     intiate abort of live migration if the total switchover time
+#     exceeded downtime_limit + switchover_limit (Since 9.1)
+#
 # Features:
 #
 # @unstable: Members @x-checkpoint-delay and
@@ -1030,7 +1043,8 @@
                                             'features': [ 'unstable' ] },
             '*vcpu-dirty-limit': 'uint64',
             '*mode': 'MigMode',
-            '*zero-page-detection': 'ZeroPageDetection'} }
+            '*zero-page-detection': 'ZeroPageDetection',
+            '*switchover-limit': 'uint64'} }
 
 ##
 # @migrate-set-parameters:
@@ -1190,6 +1204,10 @@
 #     See description in @ZeroPageDetection.  Default is 'multifd'.
 #     (since 9.0)
 #
+# @switchover-limit: Switchover limit (ms) that would be used to
+#     intiate abort of live migration if the total switchover time
+#     exceeded downtime_limit + switchover_limit (Since 9.1)
+#
 # Features:
 #
 # @unstable: Members @x-checkpoint-delay and
@@ -1226,7 +1244,8 @@
                                             'features': [ 'unstable' ] },
             '*vcpu-dirty-limit': 'uint64',
             '*mode': 'MigMode',
-            '*zero-page-detection': 'ZeroPageDetection'} }
+            '*zero-page-detection': 'ZeroPageDetection',
+            '*switchover-limit': 'uint64'} }
 
 ##
 # @query-migrate-parameters:
-- 
2.34.1


Reply via email to