[libvirt] [PATCH v4 2/3] qemu_monitor: Introduce handler for 'query-current-machine' command

2019-04-24 Thread Daniel Henrique Barboza
From: Michal Privoznik 

So far, this command returns a structure with only one member:
'wakeup-suspend-support'. But that's okay. It's what we are after
anyway.

Based-on-work-of: Daniel Henrique Barboza 
Signed-off-by: Michal Privoznik 
---
 src/qemu/qemu_monitor.c  | 10 
 src/qemu/qemu_monitor.h  |  9 +++
 src/qemu/qemu_monitor_json.c | 50 
 src/qemu/qemu_monitor_json.h |  5 
 4 files changed, 74 insertions(+)

diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
index babcbde878..e1fcbac13f 100644
--- a/src/qemu/qemu_monitor.c
+++ b/src/qemu/qemu_monitor.c
@@ -4472,3 +4472,13 @@ qemuMonitorGetPRManagerInfo(qemuMonitorPtr mon,
 virHashFree(info);
 return ret;
 }
+
+
+int
+qemuMonitorGetCurrentMachineInfo(qemuMonitorPtr mon,
+ qemuMonitorCurrentMachineInfoPtr info)
+{
+QEMU_CHECK_MONITOR(mon);
+
+return qemuMonitorJSONGetCurrentMachineInfo(mon, info);
+}
diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h
index caf62af5e2..9242d37407 100644
--- a/src/qemu/qemu_monitor.h
+++ b/src/qemu/qemu_monitor.h
@@ -1221,4 +1221,13 @@ struct _qemuMonitorPRManagerInfo {
 int qemuMonitorGetPRManagerInfo(qemuMonitorPtr mon,
 virHashTablePtr *retinfo);
 
+typedef struct  _qemuMonitorCurrentMachineInfo qemuMonitorCurrentMachineInfo;
+typedef qemuMonitorCurrentMachineInfo *qemuMonitorCurrentMachineInfoPtr;
+struct _qemuMonitorCurrentMachineInfo {
+bool wakeupSuspendSupport;
+};
+
+int qemuMonitorGetCurrentMachineInfo(qemuMonitorPtr mon,
+ qemuMonitorCurrentMachineInfoPtr info);
+
 #endif /* LIBVIRT_QEMU_MONITOR_H */
diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c
index e7d063a5a8..908967f46c 100644
--- a/src/qemu/qemu_monitor_json.c
+++ b/src/qemu/qemu_monitor_json.c
@@ -8459,3 +8459,53 @@ qemuMonitorJSONGetPRManagerInfo(qemuMonitorPtr mon,
 return ret;
 
 }
+
+
+static int
+qemuMonitorJSONExtractCurrentMachineInfo(virJSONValuePtr reply,
+ qemuMonitorCurrentMachineInfoPtr info)
+{
+virJSONValuePtr data;
+
+data = virJSONValueObjectGetObject(reply, "return");
+if (!data)
+goto malformed;
+
+if (virJSONValueObjectGetBoolean(data, "wakeup-suspend-support",
+ &info->wakeupSuspendSupport) < 0)
+goto malformed;
+
+return 0;
+
+ malformed:
+virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+   _("malformed qemu-current-machine reply"));
+return -1;
+}
+
+
+int
+qemuMonitorJSONGetCurrentMachineInfo(qemuMonitorPtr mon,
+ qemuMonitorCurrentMachineInfoPtr info)
+{
+int ret = -1;
+virJSONValuePtr cmd;
+virJSONValuePtr reply = NULL;
+
+if (!(cmd = qemuMonitorJSONMakeCommand("query-current-machine",
+   NULL)))
+return -1;
+
+if (qemuMonitorJSONCommand(mon, cmd, &reply) < 0)
+goto cleanup;
+
+if (qemuMonitorJSONCheckReply(cmd, reply, VIR_JSON_TYPE_OBJECT) < 0)
+goto cleanup;
+
+ret = qemuMonitorJSONExtractCurrentMachineInfo(reply, info);
+
+ cleanup:
+virJSONValueFree(cmd);
+virJSONValueFree(reply);
+return ret;
+}
diff --git a/src/qemu/qemu_monitor_json.h b/src/qemu/qemu_monitor_json.h
index c10513da15..746b7072ca 100644
--- a/src/qemu/qemu_monitor_json.h
+++ b/src/qemu/qemu_monitor_json.h
@@ -576,4 +576,9 @@ int qemuMonitorJSONGetPRManagerInfo(qemuMonitorPtr mon,
 virHashTablePtr info)
 ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2);
 
+int
+qemuMonitorJSONGetCurrentMachineInfo(qemuMonitorPtr mon,
+ qemuMonitorCurrentMachineInfoPtr info)
+ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2);
+
 #endif /* LIBVIRT_QEMU_MONITOR_JSON_H */
-- 
2.20.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH v4 1/3] qemu_capabilities: Add QEMU_CAPS_QUERY_CURRENT_MACHINE

2019-04-24 Thread Daniel Henrique Barboza
QEMU commit 46ea94ca9cf ("qmp: query-current-machine with
wakeup-suspend-support") added a new QMP command called
'query-current-machine' that retrieves guest parameters that
can vary in the same machine model (e.g. ACPI support for x86 VMs
depends on the '--no-acpi' option). Currently, this API has a single
flag, 'wakeup-suspend-support', that indicates whether the guest has
the capability of waking up from suspended state.

Introduce a libvirt capability that reflects whether qemu has the
monitor command.

Signed-off-by: Daniel Henrique Barboza 
Signed-off-by: Michal Privoznik 
---
 src/qemu/qemu_capabilities.c  | 2 ++
 src/qemu/qemu_capabilities.h  | 1 +
 tests/qemucapabilitiesdata/caps_4.0.0.riscv32.xml | 1 +
 tests/qemucapabilitiesdata/caps_4.0.0.riscv64.xml | 1 +
 tests/qemucapabilitiesdata/caps_4.0.0.x86_64.xml  | 1 +
 5 files changed, 6 insertions(+)

diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c
index f8ea66b577..a0b2ca73fb 100644
--- a/src/qemu/qemu_capabilities.c
+++ b/src/qemu/qemu_capabilities.c
@@ -524,6 +524,7 @@ VIR_ENUM_IMPL(virQEMUCaps,
   "scsi-disk.device_id",
   "virtio-pci-non-transitional",
   "overcommit",
+  "query-current-machine",
 );
 
 
@@ -969,6 +970,7 @@ struct virQEMUCapsStringFlags virQEMUCapsCommands[] = {
 { "query-cpus-fast", QEMU_CAPS_QUERY_CPUS_FAST },
 { "qom-list-properties", QEMU_CAPS_QOM_LIST_PROPERTIES },
 { "blockdev-del", QEMU_CAPS_BLOCKDEV_DEL },
+{ "query-current-machine", QEMU_CAPS_QUERY_CURRENT_MACHINE },
 };
 
 struct virQEMUCapsStringFlags virQEMUCapsMigration[] = {
diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h
index 23ecef8c63..67c8e80462 100644
--- a/src/qemu/qemu_capabilities.h
+++ b/src/qemu/qemu_capabilities.h
@@ -506,6 +506,7 @@ typedef enum { /* virQEMUCapsFlags grouping marker for 
syntax-check */
 QEMU_CAPS_SCSI_DISK_DEVICE_ID, /* 'device_id' property of scsi disk */
 QEMU_CAPS_VIRTIO_PCI_TRANSITIONAL, /* virtio *-pci-{non-}transitional 
devices */
 QEMU_CAPS_OVERCOMMIT, /* -overcommit */
+QEMU_CAPS_QUERY_CURRENT_MACHINE, /* query-current-machine command */
 
 QEMU_CAPS_LAST /* this must always be the last item */
 } virQEMUCapsFlags;
diff --git a/tests/qemucapabilitiesdata/caps_4.0.0.riscv32.xml 
b/tests/qemucapabilitiesdata/caps_4.0.0.riscv32.xml
index d6fc996528..2b6aa7b371 100644
--- a/tests/qemucapabilitiesdata/caps_4.0.0.riscv32.xml
+++ b/tests/qemucapabilitiesdata/caps_4.0.0.riscv32.xml
@@ -163,6 +163,7 @@
   
   
   
+  
   3001091
   0
   0
diff --git a/tests/qemucapabilitiesdata/caps_4.0.0.riscv64.xml 
b/tests/qemucapabilitiesdata/caps_4.0.0.riscv64.xml
index fcc34b0ae6..8c0965a634 100644
--- a/tests/qemucapabilitiesdata/caps_4.0.0.riscv64.xml
+++ b/tests/qemucapabilitiesdata/caps_4.0.0.riscv64.xml
@@ -163,6 +163,7 @@
   
   
   
+  
   3001091
   0
   0
diff --git a/tests/qemucapabilitiesdata/caps_4.0.0.x86_64.xml 
b/tests/qemucapabilitiesdata/caps_4.0.0.x86_64.xml
index bc8e35e226..f4be7017fe 100644
--- a/tests/qemucapabilitiesdata/caps_4.0.0.x86_64.xml
+++ b/tests/qemucapabilitiesdata/caps_4.0.0.x86_64.xml
@@ -201,6 +201,7 @@
   
   
   
+  
   3001050
   0
   43100758
-- 
2.20.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH v4 0/3] Don't pm suspend guest if it's unable to wake up

2019-04-24 Thread Daniel Henrique Barboza
changes from v3:

- patch 3 removed. We no longer consider PM_WAKEUP_SUPPORT a 
cap. It is confusing to have a 'runtime cap' that can't be resolved 
inside qemu_capabilities.c. The suspend operation isn't time
critical per se, and a call to QMP each time 'virsh dompmsuspend'
is executed is not dreadful to the user experience. More information
and discussion can be found at [1].

- due to the change above, patch 4 now executes the
query-current-machine QMP call (if the running QEMU instance supports
it) during qemuDomainPMSuspendForDuration to check if the guest
has wakeup support.

- previous patch link:

[1] https://www.redhat.com/archives/libvir-list/2019-April/msg00767.html


Daniel Henrique Barboza (2):
  qemu_capabilities: Add QEMU_CAPS_QUERY_CURRENT_MACHINE
  qemuDomainPMSuspendForDuration: check for wake-up support

Michal Prívozník (1):
  qemu_monitor: Introduce handler for 'query-current-machine' command

 src/qemu/qemu_capabilities.c  |  2 +
 src/qemu/qemu_capabilities.h  |  1 +
 src/qemu/qemu_driver.c| 54 +++
 src/qemu/qemu_monitor.c   | 10 
 src/qemu/qemu_monitor.h   |  9 
 src/qemu/qemu_monitor_json.c  | 50 +
 src/qemu/qemu_monitor_json.h  |  5 ++
 .../caps_4.0.0.riscv32.xml|  1 +
 .../caps_4.0.0.riscv64.xml|  1 +
 .../caps_4.0.0.x86_64.xml |  1 +
 10 files changed, 134 insertions(+)

-- 
2.20.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

[libvirt] [PATCH v4 3/3] qemuDomainPMSuspendForDuration: check for wake-up support

2019-04-24 Thread Daniel Henrique Barboza
If the current QEMU guest can't wake up from suspend properly,
and we are able to determine that, avoid suspending the guest
at all. To be able to determine this support, QEMU needs to
implement the 'query-current-machine' QMP call. This is reflected
by the QEMU_CAPS_QUERY_CURRENT_MACHINE cap.

If the cap is enabled, a new function qemuDomainProbeQMPCurrentMachine
is called. This is wrapper for qemuMonitorGetCurrentMachineInfo,
where the 'wakeup-suspend-support' flag is retrieved from
'query-current-machine'. If wakeupSuspendSupport is true,
proceed with the regular flow of qemuDomainPMSuspendForDuration.

The absence of QEMU_CAPS_QUERY_CURRENT_MACHINE indicates that
we're dealing with a QEMU version older than 4.0 (which implements
the required QMP API). In this case, proceed as usual with the
suspend logic of qemuDomainPMSuspendForDuration, since we can't
assume whether the guest has support or not.

Fixes: https://bugs.launchpad.net/ubuntu/+source/qemu/+bug/1759509
Reported-by: Balamuruhan S 
Signed-off-by: Daniel Henrique Barboza 
---
 src/qemu/qemu_driver.c | 54 ++
 1 file changed, 54 insertions(+)

diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 31d8647eee..d584f6ae66 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -19145,6 +19145,35 @@ qemuDomainGetCPUStats(virDomainPtr domain,
 return ret;
 }
 
+static int
+qemuDomainProbeQMPCurrentMachine(virQEMUDriverPtr driver,
+ virDomainObjPtr vm,
+ bool *enabled)
+{
+qemuMonitorCurrentMachineInfo info = { 0 };
+qemuDomainObjPrivatePtr priv;
+int ret = -1;
+
+*enabled = false;
+priv = vm->privateData;
+
+qemuDomainObjEnterMonitor(driver, vm);
+
+if (qemuMonitorGetCurrentMachineInfo(priv->mon, &info) < 0)
+goto cleanup;
+
+ret = 0;
+
+if (info.wakeupSuspendSupport)
+*enabled = true;
+
+ cleanup:
+if (qemuDomainObjExitMonitor(driver, vm) < 0)
+ret = -1;
+
+return ret;
+}
+
 static int
 qemuDomainPMSuspendForDuration(virDomainPtr dom,
unsigned int target,
@@ -19152,6 +19181,7 @@ qemuDomainPMSuspendForDuration(virDomainPtr dom,
unsigned int flags)
 {
 virQEMUDriverPtr driver = dom->conn->privateData;
+qemuDomainObjPrivatePtr priv;
 virDomainObjPtr vm;
 qemuAgentPtr agent;
 int ret = -1;
@@ -19185,6 +19215,30 @@ qemuDomainPMSuspendForDuration(virDomainPtr dom,
 if (virDomainObjCheckActive(vm) < 0)
 goto endjob;
 
+/*
+ * The case we want to handle here is when QEMU has the API (i.e.
+ * QEMU_CAPS_QUERY_CURRENT_MACHINE is set). Otherwise, do not interfere
+ * with the suspend process. This means that existing running domains,
+ * that don't know about this cap, will keep their old behavior of
+ * suspending 'in the dark'.
+ */
+priv = vm->privateData;
+if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_QUERY_CURRENT_MACHINE)) {
+bool enabled;
+
+if (qemuDomainProbeQMPCurrentMachine(driver, vm, &enabled) < 0) {
+virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+   _("Error executing query-current-machine call"));
+goto endjob;
+}
+
+if (!enabled) {
+virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s",
+  _("Domain does not have suspend support"));
+goto endjob;
+}
+}
+
 if (vm->def->pm.s3 || vm->def->pm.s4) {
 if (vm->def->pm.s3 == VIR_TRISTATE_BOOL_NO &&
 (target == VIR_NODE_SUSPEND_TARGET_MEM ||
-- 
2.20.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH] Handle copying bitmaps to larger data buffers

2019-04-24 Thread Allen, John
Gentle reminder. Any comments on this patch?

Thanks,
John

On Mon, Apr 15, 2019 at 09:43:07AM -0500, Allen, John wrote:
> If a bitmap of a shorter length than the data buffer is passed to
> virBitmapToDataBuf, it will read off the end of the bitmap and copy junk
> into the returned buffer. Add a check to only copy the length of the
> bitmap to the buffer.
> 
> The problem can be observed after setting a vcpu affinity using the vcpupin
> command on a system with a large number of cores:
>   # virsh vcpupin example_domain 0 0
>   # virsh vcpupin example_domain 0
>  VCPU   CPU Affinity
> ---
>  0  0,192,197-198,202
> 
> Signed-off-by: John Allen 
> ---
> diff --git a/src/util/virbitmap.c b/src/util/virbitmap.c
> index d074f29e54..021174cbb3 100644
> --- a/src/util/virbitmap.c
> +++ b/src/util/virbitmap.c
> @@ -824,11 +824,15 @@ virBitmapToDataBuf(virBitmapPtr bitmap,
> unsigned char *bytes,
> size_t len)
>  {
> +size_t nbytes = bitmap->map_len * (VIR_BITMAP_BITS_PER_UNIT / CHAR_BIT);
>  unsigned long *l;
>  size_t i, j;
>  
>  memset(bytes, 0, len);
>  
> +/* If bitmap and buffer differ in size, only fill to the smaller length 
> */
> +len = MIN(len, nbytes);
> +
>  /* htole64 is not provided by gnulib, so we do the conversion by hand */
>  l = bitmap->map;
>  for (i = j = 0; i < len; i++, j++) {

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH v8 03/21] backup: Introduce virDomainCheckpoint APIs

2019-04-24 Thread Eric Blake
On 4/24/19 8:26 AM, Peter Krempa wrote:
> On Wed, Apr 17, 2019 at 09:09:03 -0500, Eric Blake wrote:
>> Introduce a bunch of new public APIs related to backup checkpoints.
>> Checkpoints are modeled heavily after virDomainSnapshotPtr (both
>> represent a point in time of the guest), although a snapshot exists
>> with the intent of rolling back to that state, while a checkpoint
>> exists to make it possible to create an incremental backup at a later
>> time.
>>
>> The following map shows the API relations to snapshots, with new APIs
>> on the right:
>>
>> Operate on a domain object to create/redefine a child:
>> virDomainSnapshotCreateXML  virDomainCheckpointCreateXML
>>
>> Operate on a child object for lifetime management:
>> virDomainSnapshotDelete virDomainCheckpointDelete
>> virDomainSnapshotFree   virDomainCheckpointFree
>> virDomainSnapshotRefvirDomainCheckpointRef
>>
>> Operate on a child object to learn more about it:
>> virDomainSnapshotGetXMLDesc virDomainCheckpointGetXMLDesc
>> virDomainSnapshotGetConnect virDomainCheckpointGetConnect
>> virDomainSnapshotGetDomain  virDomainCheckpointGetDomain
>> virDomainSnapshotGetNamevirDomainCheckpiontGetName
>> virDomainSnapshotGetParent  virDomainCheckpiontGetParent
>> virDomainSnapshotHasMetadatavirDomainCheckpointHasMetadata
>> virDomainSnapshotIsCurrent  virDomainCheckpointIsCurrent
> 
> The 'current' checkpoint has very sparse documentation. While it makes
> some sense for the current snapshot to exist I'm not persuaded we need
> this for checkpoints.
> 
> In case of checkpoints the bitmaps backing it track the state even if
> you create a new bitmap unlike checkpoints. Thus it does not seem to
> make sense to me.
> 
> If you think it does please elaborate, ideally in form of documentation.

I can add documentation, but the short answer is that there are two ways
to implement differential backups:

1. One active bitmap for every point in time that you might ever need a
differential backup from; if you have 20 checkpoints, then you have 20
active bitmaps, and each guest write has to potentially modify all 20
bitmaps, but backups can directly use the desired bitmap and nothing
else for that particular backup:

Time:T1T2 T3 T4   present
Bitmap1: 
Bitmap2:   --
Bitmap3:  ---
Bitmap4: 

2. One disabled bitmap for every delta between two consecutive points in
time, plus one active bitmap since the most recent point in time; if you
have 20 checkpoints, then you have 19 disabled bitmap and 1 active 1,
where each guest write only has to modify 1 bitmap, but backups have to
construct a temporary bitmap that contains the union of all bitmaps that
span the time in question:

Time:T1T2 T3 T4   present
Bitmap1: --
Bitmap2:   ---
Bitmap3:  ---
Bitmap4: 

Given that guest writes are more frequent than backup operations, and
that performing the creation of a temporary bitmap to merge together
other bitmaps at the start of a bitmap is not all that onerous, the code
goes with approach 2. And, since there is always 1 active bitmap while
the rest are disabled, it lends itself well to the notion of a current
checkpoint (one that is tracking active guest writes).

> 
>> Operate on a domain object to list all children:
>> virDomainSnapshotNum(no counterpart, this is the old
>> virDomainSnapshotListNames   racy interface)
>> virDomainSnapshotListAllSnapshots   virDomainListAllCheckpoints
>>
>> Operate on a child object to list descendents:
>> virDomainSnapshotNumChildren(no counterpart, this is the old
>> virDomainSnapshotListChildrenNames   racy interface)
>> virDomainSnapshotListAllChildrenvirDomainCheckpointListAllChildren
>>
>> Operate on a domain to locate a particular child:
>> virDomainSnapshotLookupByName   virDomainCheckpointLookupByName
>> virDomainHasCurrentSnapshot virDomainHasCurrentCheckpoint
>> virDomainSnapshotCurrentvirDomainCheckpointCurrent
>>
>> Operate on a snapshot to roll back to earlier state:
>> virDomainSnapshotRevert (no counterpart, instead checkpoints
>>  are used in incremental backups via
> 
> This patch or a different one should also add docs to virDomainSnapshotRevert
> that outline what happens to the checkpoints when reverting snapshots.

Right now, it needs to be an error (until we actually design how the two
should play together nicely, the most conservative approach is to state
that reverting a domain with checkpoints risks enough confusion to be
forbidden in the initial implementation, even if we lift that
restriction later when we iron out how it should work).

> 
> Basically what should happen is that an alternate real

[libvirt] [PATCH v2] test_driver: provide virDomainGetTime implementation

2019-04-24 Thread Ilias Stamatis
Implement testDomainGetTime by returning a fixed timestamp.

Signed-off-by: Ilias Stamatis 
---
 src/test/test_driver.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/src/test/test_driver.c b/src/test/test_driver.c
index d5eecf4b7f..9b78c5c2ca 100644
--- a/src/test/test_driver.c
+++ b/src/test/test_driver.c
@@ -1943,6 +1943,18 @@ testDomainGetState(virDomainPtr domain,
 return 0;
 }
 
+static int
+testDomainGetTime(virDomainPtr dom ATTRIBUTE_UNUSED,
+  long long *seconds,
+  unsigned int *nseconds,
+  unsigned int flags ATTRIBUTE_UNUSED)
+{
+*seconds = 627319920;
+*nseconds = 0;
+
+return 0;
+}
+
 #define TEST_SAVE_MAGIC "TestGuestMagic"
 
 static int
@@ -6786,6 +6798,7 @@ static virHypervisorDriver testHypervisorDriver = {
 .domainSetMemory = testDomainSetMemory, /* 0.1.4 */
 .domainGetInfo = testDomainGetInfo, /* 0.1.1 */
 .domainGetState = testDomainGetState, /* 0.9.2 */
+.domainGetTime = testDomainGetTime, /* 5.3.0 */
 .domainSave = testDomainSave, /* 0.3.2 */
 .domainSaveFlags = testDomainSaveFlags, /* 0.9.4 */
 .domainRestore = testDomainRestore, /* 0.3.2 */
-- 
2.21.0

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH] conf: add cpu check attribute to ABI check

2019-04-24 Thread Jiri Denemark
On Wed, Apr 24, 2019 at 12:56:58 +0300, Nikolay Shirokovskiy wrote:
> Different check values is not ABI compatible. For example
> if on migration we change 'full' to 'partial' then guest cpu
> on destination can be different.
> 
> Signed-off-by: Nikolay Shirokovskiy 

Reviewed-by: Jiri Denemark 

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH v8 06/21] backup: Parse and output checkpoint XML

2019-04-24 Thread Peter Krempa
On Wed, Apr 17, 2019 at 09:09:06 -0500, Eric Blake wrote:
> Add a new file checkpoint_conf.c that performs the translation to and
> from new XML describing a checkpoint. The code shares a common base
> class with snapshots, since a checkpoint similarly represents the
> domain state at a moment in time. Add some basic testing of round trip
> XML handling through the new code.
> 
> Signed-off-by: Eric Blake 
> ---
>  src/conf/checkpoint_conf.h|  96 +++
>  src/conf/virconftypes.h   |   3 +
>  po/POTFILES   |   1 +
>  src/conf/Makefile.inc.am  |   2 +
>  src/conf/checkpoint_conf.c| 550 ++
>  src/libvirt_private.syms  |   8 +
>  tests/Makefile.am |   9 +-
>  .../internal-active-invalid.xml   |  53 ++
>  .../internal-inactive-invalid.xml |  53 ++
>  tests/domaincheckpointxml2xmltest.c   | 223 +++
>  10 files changed, 996 insertions(+), 2 deletions(-)
>  create mode 100644 src/conf/checkpoint_conf.h
>  create mode 100644 src/conf/checkpoint_conf.c
>  create mode 100644 
> tests/domaincheckpointxml2xmlout/internal-active-invalid.xml
>  create mode 100644 
> tests/domaincheckpointxml2xmlout/internal-inactive-invalid.xml
>  create mode 100644 tests/domaincheckpointxml2xmltest.c
> 
> diff --git a/src/conf/checkpoint_conf.h b/src/conf/checkpoint_conf.h
> new file mode 100644
> index 00..6647d0cd7c
> --- /dev/null
> +++ b/src/conf/checkpoint_conf.h
> @@ -0,0 +1,96 @@
> +/*
> + * checkpoint_conf.h: domain checkpoint XML processing
> + * (based on snapshot_conf.h)
> + *
> + * Copyright (C) 2006-2019 Red Hat, Inc.
> + * Copyright (C) 2006-2008 Daniel P. Berrange
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library.  If not, see
> + * .
> + */
> +
> +#ifndef LIBVIRT_CHECKPOINT_CONF_H
> +# define LIBVIRT_CHECKPOINT_CONF_H
> +
> +# include "internal.h"
> +# include "domain_conf.h"
> +# include "moment_conf.h"
> +
> +/* Items related to checkpoint state */
> +
> +typedef enum {
> +VIR_DOMAIN_CHECKPOINT_TYPE_DEFAULT = 0,
> +VIR_DOMAIN_CHECKPOINT_TYPE_NONE,
> +VIR_DOMAIN_CHECKPOINT_TYPE_BITMAP,
> +
> +VIR_DOMAIN_CHECKPOINT_TYPE_LAST
> +} virDomainCheckpointType;
> +
> +/* Stores disk-checkpoint information */
> +typedef struct _virDomainCheckpointDiskDef virDomainCheckpointDiskDef;
> +typedef virDomainCheckpointDiskDef *virDomainCheckpointDiskDefPtr;
> +struct _virDomainCheckpointDiskDef {
> +char *name; /* name matching the  +int idx;/* index within checkpoint->dom->disks that matches name 
> */
> +int type;   /* virDomainCheckpointType */
> +char *bitmap;   /* bitmap name, if type is bitmap */
> +unsigned long long size; /* current checkpoint size in bytes */
> +};
> +
> +/* Stores the complete checkpoint metadata */
> +struct _virDomainCheckpointDef {
> +virDomainMomentDef common;
> +
> +/* Additional Public XML.  */
> +size_t ndisks; /* should not exceed dom->ndisks */
> +virDomainCheckpointDiskDef *disks;
> +};
> +
> +
> +typedef enum {
> +VIR_DOMAIN_CHECKPOINT_PARSE_REDEFINE = 1 << 0,
> +VIR_DOMAIN_CHECKPOINT_PARSE_INTERNAL = 1 << 1,

These allow an easy pass to sneak validation into the parser ... I'm not
a fan of that.

> +} virDomainCheckpointParseFlags;
> +
> +typedef enum {
> +VIR_DOMAIN_CHECKPOINT_FORMAT_SECURE= 1 << 0,
> +VIR_DOMAIN_CHECKPOINT_FORMAT_NO_DOMAIN = 1 << 1,
> +VIR_DOMAIN_CHECKPOINT_FORMAT_SIZE  = 1 << 2,
> +VIR_DOMAIN_CHECKPOINT_FORMAT_INTERNAL  = 1 << 3,
> +VIR_DOMAIN_CHECKPOINT_FORMAT_CURRENT   = 1 << 4,
> +} virDomainCheckpointFormatFlags;

[...]

> +
> +static int
> +virDomainCheckpointDiskDefParseXML(xmlNodePtr node,
> +   xmlXPathContextPtr ctxt,
> +   virDomainCheckpointDiskDefPtr def)
> +{
> +int ret = -1;
> +char *checkpoint = NULL;
> +char *bitmap = NULL;
> +xmlNodePtr saved = ctxt->node;
> +
> +ctxt->node = node;
> +
> +def->name = virXMLPropString(node, "name");
> +if (!def->name) {
> +virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +   _("missing name from disk checkpoint element"));
> +goto cleanup;
>

Re: [libvirt] [PATCH v8 05/21] backup: Document nuances between different state capture APIs

2019-04-24 Thread Peter Krempa
On Wed, Apr 17, 2019 at 09:09:05 -0500, Eric Blake wrote:
> Now that various new API have been added, it is worth a landing page
> that gives an overview of capturing various pieces of guest state, and
> which APIs are best suited to which tasks.
> 
> Signed-off-by: Eric Blake 
> Reviewed-by: John Ferlan 
> Reviewed-by: Daniel P. Berrangé 
> ---
>  docs/docs.html.in   |   5 +
>  docs/domainstatecapture.html.in | 315 
>  docs/formatbackup.html.in   |   4 +-
>  docs/formatcheckpoint.html.in   |   4 +-
>  docs/formatsnapshot.html.in |   2 +
>  5 files changed, 328 insertions(+), 2 deletions(-)
>  create mode 100644 docs/domainstatecapture.html.in
> 
> diff --git a/docs/docs.html.in b/docs/docs.html.in
> index 400b149791..8c08ace402 100644
> --- a/docs/docs.html.in
> +++ b/docs/docs.html.in
> @@ -124,6 +124,11 @@
> 
>  Secure usage
>  Secure usage of the libvirt APIs
> +
> +Domain state
> +capture
> +Comparison between different methods of capturing domain
> +  state
>
>  
> 
> diff --git a/docs/domainstatecapture.html.in b/docs/domainstatecapture.html.in
> new file mode 100644
> index 00..1d29d70e76
> --- /dev/null
> +++ b/docs/domainstatecapture.html.in
> @@ -0,0 +1,315 @@
> +
> +
> +http://www.w3.org/1999/xhtml";>
> +  
> +
> +Domain state capture using Libvirt
> +
> +
> +
> +
> +  In order to aid application developers to choose which
> +  operations best suit their needs, this page compares the
> +  different means for capturing state related to a domain managed
> +  by libvirt.
> +
> +
> +
> +  The information here is primarily geared towards capturing the
> +  state of an active domain. Capturing the state of an inactive
> +  domain essentially amounts to copying the contents of guest
> +  disks, followed by a fresh boot with disks restored to that
> +  state.
> +
> +
> +State capture trade-offs
> +
> +One of the features made possible with virtual machines is live
> +  migration -- transferring all state related to the guest from
> +  one host to another with minimal interruption to the guest's
> +  activity. In this case, state includes domain memory (including
> +  register and device contents), and domain storage (whether the
> +  guest's view of the disks are backed by local storage on the
> +  host, or by the hypervisor accessing shared storage over a
> +  network).  A clever observer will then note that if all state is
> +  available for live migration, then there is nothing stopping a
> +  user from saving some or all of that state at a given point of
> +  time in order to be able to later rewind guest execution back to
> +  the state it previously had. The astute reader will also realize
> +  that state capture at any level requires that the data must be
> +  stored and managed by some mechanism. This processing might fit
> +  in a single file, or more likely require a chain of related
> +  files, and may require synchronization with third-party tools
> +  built around managing the amount of data resulting from
> +  capturing the state of multiple guests that each use multiple
> +  disks.
> +
> +
> +
> +  There are several libvirt APIs associated with capturing the
> +  state of a guest, which can later be used to rewind that guest
> +  to the conditions it was in earlier.  The following is a list of
> +  trade-offs and differences between the various facets that
> +  affect capturing domain state for active domains:
> +
> +
> +
> +  Duration
> +  Capturing state can be a lengthy process, so while the
> +captured state ideally represents an atomic point in time
> +corresponding to something the guest was actually executing,
> +capturing state tends to focus on minimizing guest downtime
> +while performing the rest of the state capture in parallel
> +with guest execution.  Some interfaces require up-front
> +preparation (the state captured is not complete until the API
> +ends, which may be some time after the command was first
> +started), while other interfaces track the state when the
> +command was first issued, regardless of the time spent in
> +capturing the rest of the state.  Also, time spent in state
> +capture may be longer than the time required for live
> +migration, when state must be duplicated rather than shared.
> +  
> +
> +  Amount of state
> +  For an online guest, there is a choice between capturing the
> +guest's memory (all that is needed during live migration when
> +the storage is already shared between source and destination),
> +the guest's disk state (all that is needed if there are no
> +pending guest I/O transactions that would

Re: [libvirt] [PATCH 1/2] vfio/mdev: add version field as mandatory attribute for mdev device

2019-04-24 Thread Alex Williamson
On Tue, 23 Apr 2019 23:39:34 -0400
Yan Zhao  wrote:

> On Tue, Apr 23, 2019 at 11:02:56PM +0800, Alex Williamson wrote:
> > On Tue, 23 Apr 2019 01:41:57 -0400
> > Yan Zhao  wrote:
> >   
> > > On Tue, Apr 23, 2019 at 09:21:00AM +0800, Alex Williamson wrote:  
> > > > On Mon, 22 Apr 2019 21:01:52 -0400
> > > > Yan Zhao  wrote:
> > > >  
> > > > > On Mon, Apr 22, 2019 at 10:39:50PM +0800, Alex Williamson wrote:  
> > > > > > On Fri, 19 Apr 2019 04:35:04 -0400
> > > > > > Yan Zhao  wrote:
> > > > > >  
> > > > > > > device version attribute in mdev sysfs is used by user space 
> > > > > > > software
> > > > > > > (e.g. libvirt) to query device compatibility for live migration 
> > > > > > > of VFIO
> > > > > > > mdev devices. This attribute is mandatory if a mdev device 
> > > > > > > supports live
> > > > > > > migration.  
> > > > > >
> > > > > > The Subject: doesn't quite match what's being proposed here.
> > > > > >  
> > > > > > > It consists of two parts: common part and vendor proprietary part.
> > > > > > > common part: 32 bit. lower 16 bits is vendor id and higher 16 bits
> > > > > > >  identifies device type. e.g., for pci device, it is
> > > > > > >  "pci vendor id" | (VFIO_DEVICE_FLAGS_PCI << 16).  
> > > > > >
> > > > > > What purpose does this serve?  If it's intended as some sort of
> > > > > > namespace feature, shouldn't we first assume that we can only 
> > > > > > support
> > > > > > migration to devices of the same type?  Therefore each type would
> > > > > > already have its own namespace.  Also that would make the trailing 
> > > > > > bit
> > > > > > of the version string listed below in the example redundant.  A 
> > > > > > vendor
> > > > > > is still welcome to include this in their version string if they 
> > > > > > wish,
> > > > > > but I think the string should be entirely vendor defined.
> > > > > >  
> > > > > hi Alex,
> > > > > This common part is a kind of namespace.
> > > > > Because if version string is entirely defined by vendors, I'm worried 
> > > > > about
> > > > > if there is a case that one vendor's version string happens to 
> > > > > deceive and
> > > > > interfere with another vendor's version checking?
> > > > > e.g.
> > > > > vendor A has a version string like: vendor id + device id + mdev type
> > > > > vendor B has a version string like: device id + vendor id + mdev type
> > > > > but vendor A's vendor id is 0x8086, device id is 0x1217
> > > > > vendor B's vendor id is 0x1217, device id is 0x8086.
> > > > >
> > > > > In this corner case, the two vendors may regard the two device is
> > > > > migratable but actually they are not.
> > > > >
> > > > > That's the reason for this common part that serve as a kind of 
> > > > > namespace
> > > > > that all vendors will comply with to avoid overlap.  
> > > >
> > > > If we assume that migration can only occur between matching mdev types,
> > > > this is redundant, each type already has their own namespace.
> > > >  
> > > hi Alex,
> > > do you mean user space software like libvirt needs to first check whether
> > > mdev type is matching and then check whether version is matching?  
> > 
> > Yes.
> >  
> may I know the drawback of defining the this common part?
>   common part: 32 bit. lower 16 bits is vendor id and higher 16 bits
>  identifies device type. e.g., for pci device, it is
>  "pci vendor id" | (VFIO_DEVICE_FLAGS_PCI << 16).
> By doing so, user space software has no need to first check whether mdev type
> is matching.  A confident vendor driver can even allow devices migrating
> between different mdev types.


It's not practical to expect userspace to test the version of every
mdev type in the system to determine a match, let alone across a data
center.  Additionally, in order to be migration compatible the mdev
types must be software compatible to the VM, which is the basic
definition of the differences between mdev types.  Therefore let me
flip the question around, why would a vendor driver choose to create a
new mdev type for software compatible devices?  If the vendor wants to
maintain compatibility, indicate basic compatibility using the same
mdev type.  The original intention of the version attribute is to be
entirely opaque to userspace, introducing "common" parts is unnecessary
as above, degrades the original concept, and only defines a solution for
PCI devices. Thanks,

Alex

> > > if user space software only checks version for migration, it means vendor
> > > driver has to include mdev type in their vendor proprietary part string,
> > > right?  
> > 
> > Userspace attempting to migrate an nvidia-64 to an i915-GVT_V5_4 would
> > be a failure on the part of the user.
> >   
> > > Another thing is that could there be any future mdev parent driver that
> > > applies to all mdev devices, just like vfio-pci? like Yi's vfio-pci-mdev
> > > driver (https://lkml.org/lkml/2019/3/13/114)?  
> > 
> > For starters, this is just a sample driver from which vendor

Re: [libvirt] [PATCH v8 04/21] backup: Introduce virDomainBackup APIs

2019-04-24 Thread Peter Krempa
On Wed, Apr 17, 2019 at 09:09:04 -0500, Eric Blake wrote:
> Introduce a few new public APIs related to incremental backups.  This
> builds on the previous notion of a checkpoint (without an existing
> checkpoint, the new API is a full backup, differing from
> virDomainBlockCopy in the point of time chosen and in operation on
> multiple disks at once); and also allows creation of a new checkpoint
> at the same time as starting the backup (after all, an incremental
> backup is only useful if it covers the state since the previous
> backup).  Snapshot creation is also a point in time at which creating
> a checkpoint atomically can be useful; as checkpoints are independent
> objects, it is not worth embedding  inside
> , and therefore we need a more powerful version of
> virDomainSnapshotCreateXML(), where we borrow from the naming pattern
> of virDomainMigrate2() and friends.
> 
> A backup job also affects filtering a listing of domains, as well as
> adding event reporting for signaling when a push model backup
> completes (where the hypervisor creates the backup); note that the
> pull model does not have an event (starting the backup lets a third
> party access the data, and only the third party knows when it is
> finished).
> 
> Since multiple backup jobs can be run in parallel in the future (well,
> qemu doesn't support it yet, but we don't want to preclude the idea),
> virDomainBackupBegin() returns a positive job id, and the id is also
> visible in the backup XML. But until a future libvirt release adds a
> bunch of APIs related to parallel job management where job ids will
> actually matter, the documentation is also clear that job id 0 means
> the 'currently running backup job' (provided one exists), for use in
> virDomainBackupGetXMLDesc() and virDomainBackupEnd().
> 
> The full list of new APIs:
> virDomainBackupBegin;
> virDomainBackupEnd;
> virDomainBackupGetXMLDesc;
> virDomainSnapshotCreateXML2;
> 
> Signed-off-by: Eric Blake 
> Reviewed-by: Daniel P. Berrangé 
> ---
>  include/libvirt/libvirt-domain-snapshot.h |   8 +-
>  include/libvirt/libvirt-domain.h  |  41 +++-
>  src/driver-hypervisor.h   |  21 +++
>  src/qemu/qemu_blockjob.h  |   1 +
>  examples/object-events/event-test.c   |   3 +
>  src/conf/domain_conf.c|   2 +-
>  src/libvirt-domain-snapshot.c |  89 +
>  src/libvirt-domain.c  | 219 ++
>  src/libvirt_public.syms   |   4 +
>  tools/virsh-domain.c  |   8 +-
>  10 files changed, 390 insertions(+), 6 deletions(-)

[...]

> 
> @@ -78,6 +78,12 @@ virDomainSnapshotPtr 
> virDomainSnapshotCreateXML(virDomainPtr domain,
>  const char *xmlDesc,
>  unsigned int flags);
> 
> +/* Take a snapshot of the current VM state, possibly creating a checkpoint */
> +virDomainSnapshotPtr virDomainSnapshotCreateXML2(virDomainPtr domain,
> + const char *xmlDesc,
> + const char *checkpointXml,
> + unsigned int flags);
> +

This feels weird in this patch. It does not deal with backup in any way. It 
would
be better to have this separately or at least together with checkpoint
creating.

>  typedef enum {
>  VIR_DOMAIN_SNAPSHOT_XML_SECURE = VIR_DOMAIN_XML_SECURE, /* dump 
> security sensitive information too */
>  } virDomainSnapshotXMLFlags;

[...]

> @@ -1376,6 +1382,17 @@ typedef int
>  (*virDrvDomainCheckpointDelete)(virDomainCheckpointPtr checkpoint,
>  unsigned int flags);
> 
> +typedef int
> +(*virDrvDomainBackupBegin)(virDomainPtr domain, const char *diskXml,
> +   const char *checkpointXml, unsigned int flags);
> +
> +typedef char *
> +(*virDrvDomainBackupGetXMLDesc)(virDomainPtr domain, int id,
> +unsigned int flags);
> +
> +typedef int
> +(*virDrvDomainBackupEnd)(virDomainPtr domain, int id, unsigned int flags);

Since the getter APIs are overloaded on top of shouldn't this be
overloaded with qemuDomainAbortJob as well?

> +
>  typedef struct _virHypervisorDriver virHypervisorDriver;
>  typedef virHypervisorDriver *virHypervisorDriverPtr;

[...]


> diff --git a/src/qemu/qemu_blockjob.h b/src/qemu/qemu_blockjob.h
> index c7325c6daf..95f53dde16 100644
> --- a/src/qemu/qemu_blockjob.h
> +++ b/src/qemu/qemu_blockjob.h
> @@ -55,6 +55,7 @@ typedef enum {
>  QEMU_BLOCKJOB_TYPE_COPY = VIR_DOMAIN_BLOCK_JOB_TYPE_COPY,
>  QEMU_BLOCKJOB_TYPE_COMMIT = VIR_DOMAIN_BLOCK_JOB_TYPE_COMMIT,
>  QEMU_BLOCKJOB_TYPE_ACTIVE_COMMIT = 
> VIR_DOMAIN_BLOCK_JOB_TYPE_ACTIVE_COMMIT,
> +QEMU_BLOCKJOB_TYPE_BACKUP = VIR_DOMAIN_BLOCK_JOB_TYPE_BACKUP,

Here it also tries to behave like a block job. Th

[libvirt] [PATCH] vircgroup: no need to ifdef virCgroupFree

2019-04-24 Thread Pavel Hrdina
virCgroup struct is always defined and the free function is not calling
anything that would require OS supporting cgroups.

This fixes an issue if we try to start a VM with QEMU binary that
doesn't support QXL.  The start operation will fail in
qemuProcessStartValidateVideo() which will set correct error message,
but later in one of the cleanup paths we will call
qemuDomainObjPrivateDataClear() which always calls virCgroupFree()
and that will fail on OS that doesn't support cgroups and it will
set a new error which will be eventually reported to user.

Signed-off-by: Pavel Hrdina 
---
 src/util/vircgroup.c | 62 +++-
 1 file changed, 27 insertions(+), 35 deletions(-)

diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c
index 4238d7014b..f58e336404 100644
--- a/src/util/vircgroup.c
+++ b/src/util/vircgroup.c
@@ -1279,33 +1279,6 @@ virCgroupNewIgnoreError(void)
 }
 
 
-/**
- * virCgroupFree:
- *
- * @group: The group structure to free
- */
-void
-virCgroupFree(virCgroupPtr *group)
-{
-size_t i;
-
-if (*group == NULL)
-return;
-
-for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
-VIR_FREE((*group)->legacy[i].mountPoint);
-VIR_FREE((*group)->legacy[i].linkPoint);
-VIR_FREE((*group)->legacy[i].placement);
-}
-
-VIR_FREE((*group)->unified.mountPoint);
-VIR_FREE((*group)->unified.placement);
-
-VIR_FREE((*group)->path);
-VIR_FREE(*group);
-}
-
-
 /**
  * virCgroupHasController: query whether a cgroup controller is present
  *
@@ -2917,14 +2890,6 @@ virCgroupNewIgnoreError(void)
 }
 
 
-void
-virCgroupFree(virCgroupPtr *group ATTRIBUTE_UNUSED)
-{
-virReportSystemError(ENXIO, "%s",
- _("Control groups not supported on this platform"));
-}
-
-
 bool
 virCgroupHasController(virCgroupPtr cgroup ATTRIBUTE_UNUSED,
int controller ATTRIBUTE_UNUSED)
@@ -3565,6 +3530,33 @@ virCgroupControllerAvailable(int controller 
ATTRIBUTE_UNUSED)
 #endif /* !__linux__ */
 
 
+/**
+ * virCgroupFree:
+ *
+ * @group: The group structure to free
+ */
+void
+virCgroupFree(virCgroupPtr *group)
+{
+size_t i;
+
+if (*group == NULL)
+return;
+
+for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
+VIR_FREE((*group)->legacy[i].mountPoint);
+VIR_FREE((*group)->legacy[i].linkPoint);
+VIR_FREE((*group)->legacy[i].placement);
+}
+
+VIR_FREE((*group)->unified.mountPoint);
+VIR_FREE((*group)->unified.placement);
+
+VIR_FREE((*group)->path);
+VIR_FREE(*group);
+}
+
+
 int
 virCgroupDelThread(virCgroupPtr cgroup,
virCgroupThreadName nameval,
-- 
2.20.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH v8 03/21] backup: Introduce virDomainCheckpoint APIs

2019-04-24 Thread Peter Krempa
On Wed, Apr 17, 2019 at 09:09:03 -0500, Eric Blake wrote:
> Introduce a bunch of new public APIs related to backup checkpoints.
> Checkpoints are modeled heavily after virDomainSnapshotPtr (both
> represent a point in time of the guest), although a snapshot exists
> with the intent of rolling back to that state, while a checkpoint
> exists to make it possible to create an incremental backup at a later
> time.
> 
> The following map shows the API relations to snapshots, with new APIs
> on the right:
> 
> Operate on a domain object to create/redefine a child:
> virDomainSnapshotCreateXML  virDomainCheckpointCreateXML
> 
> Operate on a child object for lifetime management:
> virDomainSnapshotDelete virDomainCheckpointDelete
> virDomainSnapshotFree   virDomainCheckpointFree
> virDomainSnapshotRefvirDomainCheckpointRef
> 
> Operate on a child object to learn more about it:
> virDomainSnapshotGetXMLDesc virDomainCheckpointGetXMLDesc
> virDomainSnapshotGetConnect virDomainCheckpointGetConnect
> virDomainSnapshotGetDomain  virDomainCheckpointGetDomain
> virDomainSnapshotGetNamevirDomainCheckpiontGetName
> virDomainSnapshotGetParent  virDomainCheckpiontGetParent
> virDomainSnapshotHasMetadatavirDomainCheckpointHasMetadata
> virDomainSnapshotIsCurrent  virDomainCheckpointIsCurrent

The 'current' checkpoint has very sparse documentation. While it makes
some sense for the current snapshot to exist I'm not persuaded we need
this for checkpoints.

In case of checkpoints the bitmaps backing it track the state even if
you create a new bitmap unlike checkpoints. Thus it does not seem to
make sense to me.

If you think it does please elaborate, ideally in form of documentation.

> Operate on a domain object to list all children:
> virDomainSnapshotNum(no counterpart, this is the old
> virDomainSnapshotListNames   racy interface)
> virDomainSnapshotListAllSnapshots   virDomainListAllCheckpoints
> 
> Operate on a child object to list descendents:
> virDomainSnapshotNumChildren(no counterpart, this is the old
> virDomainSnapshotListChildrenNames   racy interface)
> virDomainSnapshotListAllChildrenvirDomainCheckpointListAllChildren
> 
> Operate on a domain to locate a particular child:
> virDomainSnapshotLookupByName   virDomainCheckpointLookupByName
> virDomainHasCurrentSnapshot virDomainHasCurrentCheckpoint
> virDomainSnapshotCurrentvirDomainCheckpointCurrent
> 
> Operate on a snapshot to roll back to earlier state:
> virDomainSnapshotRevert (no counterpart, instead checkpoints
>  are used in incremental backups via

This patch or a different one should also add docs to virDomainSnapshotRevert
that outline what happens to the checkpoints when reverting snapshots.

Basically what should happen is that an alternate reality should
probably be created as the disk images can change at that point


>XML to virDomainBackupBegin)
> 
> Signed-off-by: Eric Blake 
> Reviewed-by: Daniel P. Berrangé 


signature.asc
Description: PGP signature
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

[libvirt] [PATCH] conf: add cpu check attribute to ABI check

2019-04-24 Thread Nikolay Shirokovskiy
Different check values is not ABI compatible. For example
if on migration we change 'full' to 'partial' then guest cpu
on destination can be different.

Signed-off-by: Nikolay Shirokovskiy 
---
 src/conf/cpu_conf.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/src/conf/cpu_conf.c b/src/conf/cpu_conf.c
index 33c8b99..bd2beab 100644
--- a/src/conf/cpu_conf.c
+++ b/src/conf/cpu_conf.c
@@ -894,6 +894,13 @@ virCPUDefIsEqual(virCPUDefPtr src,
 goto cleanup;
 }
 
+if (src->check != dst->check) {
+MISMATCH(_("Target CPU check %s does not match source %s"),
+ virCPUCheckTypeToString(dst->check),
+ virCPUCheckTypeToString(src->check));
+goto cleanup;
+}
+
 if (src->arch != dst->arch) {
 MISMATCH(_("Target CPU arch %s does not match source %s"),
  virArchToString(dst->arch),
-- 
1.8.3.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [Qemu-devel] [PATCH 1/2] vfio/mdev: add version field as mandatory attribute for mdev device

2019-04-24 Thread Christophe de Dinechin


> On 23 Apr 2019, at 12:39, Daniel P. Berrangé  wrote:
> 
> On Fri, Apr 19, 2019 at 04:35:04AM -0400, Yan Zhao wrote:
>> device version attribute in mdev sysfs is used by user space software
>> (e.g. libvirt) to query device compatibility for live migration of VFIO
>> mdev devices. This attribute is mandatory if a mdev device supports live
>> migration.
>> 
>> It consists of two parts: common part and vendor proprietary part.
>> common part: 32 bit. lower 16 bits is vendor id and higher 16 bits
>> identifies device type. e.g., for pci device, it is
>> "pci vendor id" | (VFIO_DEVICE_FLAGS_PCI << 16).
>> vendor proprietary part: this part is varied in length. vendor driver can
>> specify any string to identify a device.
>> 
>> When reading this attribute, it should show device version string of the
>> device of type . If a device does not support live migration, it
>> should return errno.
>> When writing a string to this attribute, it returns errno for
>> incompatibility or returns written string length in compatibility case.
>> If a device does not support live migration, it always returns errno.
>> 
>> For user space software to use:
>> 1.
>> Before starting live migration, user space software first reads source side
>> mdev device's version. e.g.
>> "#cat \
>> /sys/bus/pci/devices/\:00\:02.0/5ac1fb20-2bbf-4842-bb7e-36c58c3be9cd/mdev_type/version"
>> 00028086-193b-i915-GVTg_V5_4
>> 
>> 2.
>> Then, user space software writes the source side returned version string
>> to device version attribute in target side, and checks the return value.
>> If a negative errno is returned in the target side, then mdev devices in
>> source and target sides are not compatible;
>> If a positive number is returned and it equals to the length of written
>> string, then the two mdev devices in source and target side are compatible.
>> e.g.
>> (a) compatibility case
>> "# echo 00028086-193b-i915-GVTg_V5_4 >
>> /sys/bus/pci/devices/\:00\:02.0/882cc4da-dede-11e7-9180-078a62063ab1/mdev_type/version"
>> 
>> (b) incompatibility case
>> "#echo 00028086-193b-i915-GVTg_V5_1 >
>> /sys/bus/pci/devices/\:00\:02.0/882cc4da-dede-11e7-9180-078a62063ab1/mdev_type/version"
>> -bash: echo: write error: Invalid argument
> 
> What you have written here seems to imply that each mdev type is able to
> support many different versions at the same time. Writing a version into
> this sysfs file then chooses which of the many versions to actually use.
> 
> This is good as it allows for live migration across driver software upgrades.
> 
> A mgmt application may well want to know what versions are supported for an
> mdev type *before* starting a migration. A mgmt app can query all the 100's
> of hosts it knows and thus figure out which are valid to use as the target
> of a migration.
> 
> IOW, we want to avoid the ever hitting the incompatibility case in the
> first place, by only choosing to migrate to a host that we know is going
> to be compatible.
> 
> This would need some kind of way to report the full list of supported
> versions against the mdev supported types on the host.
> 
> 
>> 3. if two mdev devices are compatible, user space software can start
>> live migration, and vice versa.
>> 
>> Note: if a mdev device does not support live migration, it either does
>> not provide a version attribute, or always returns errno when its version
>> attribute is read/written.
>> 
>> Cc: Alex Williamson 
>> Cc: Erik Skultety 
>> Cc: "Dr. David Alan Gilbert" 
>> Cc: Cornelia Huck 
>> Cc: "Tian, Kevin" 
>> Cc: Zhenyu Wang 
>> Cc: "Wang, Zhi A" 
>> Cc: Neo Jia 
>> Cc: Kirti Wankhede 
>> 
>> Signed-off-by: Yan Zhao 
>> ---
>> Documentation/vfio-mediated-device.txt | 36 ++
>> samples/vfio-mdev/mbochs.c | 17 
>> samples/vfio-mdev/mdpy.c   | 16 
>> samples/vfio-mdev/mtty.c   | 16 
>> 4 files changed, 85 insertions(+)
>> 
>> diff --git a/Documentation/vfio-mediated-device.txt 
>> b/Documentation/vfio-mediated-device.txt
>> index c3f69bcaf96e..bc28471c0667 100644
>> --- a/Documentation/vfio-mediated-device.txt
>> +++ b/Documentation/vfio-mediated-device.txt
>> @@ -202,6 +202,7 @@ Directories and files under the sysfs for Each Physical 
>> Device
>>   | |   |--- available_instances
>>   | |   |--- device_api
>>   | |   |--- description
>> +  | |   |--- version
>>   | |   |--- [devices]
>>   | |--- []
>>   | |   |--- create
>> @@ -209,6 +210,7 @@ Directories and files under the sysfs for Each Physical 
>> Device
>>   | |   |--- available_instances
>>   | |   |--- device_api
>>   | |   |--- description
>> +  | |   |--- version
>>   | |   |--- [devices]
>>   | |--- []
>>   |  |--- create
>> @@ -216,6 +218,7 @@ Directories and files under the sysfs for Each Physical 
>> Device
>>   |  |--- available_instances
>>   |  |--- device_api
>>   |  |--- des

Re: [libvirt] [PATCH 1/2] vfio/mdev: add version field as mandatory attribute for mdev device

2019-04-24 Thread Yan Zhao
On Wed, Apr 24, 2019 at 03:56:24PM +0800, Cornelia Huck wrote:
> On Tue, 23 Apr 2019 23:10:37 -0400
> Yan Zhao  wrote:
> 
> > On Tue, Apr 23, 2019 at 05:59:32PM +0800, Cornelia Huck wrote:
> > > On Fri, 19 Apr 2019 04:35:04 -0400
> > > Yan Zhao  wrote:
> > >   
> > > > device version attribute in mdev sysfs is used by user space software
> > > > (e.g. libvirt) to query device compatibility for live migration of VFIO
> > > > mdev devices. This attribute is mandatory if a mdev device supports live
> > > > migration.
> > > >
> > > > It consists of two parts: common part and vendor proprietary part.
> > > > common part: 32 bit. lower 16 bits is vendor id and higher 16 bits
> > > >  identifies device type. e.g., for pci device, it is
> > > >  "pci vendor id" | (VFIO_DEVICE_FLAGS_PCI << 16).
> > > > vendor proprietary part: this part is varied in length. vendor driver 
> > > > can
> > > >  specify any string to identify a device.
> > > >
> > > > When reading this attribute, it should show device version string of the
> > > > device of type . If a device does not support live migration, 
> > > > it
> > > > should return errno.  
> > > 
> > > It might make more sense if the driver does not register the attribute
> > > for the device in that case at all.
> > >   
> > yes. what about rephrase like this:
> > "
> > When reading this attribute, it should show device version string of the
> > device of type .
> > If a device does not support live migration, it has two choices:
> > 1. do not register this version attribute
> > 2. return -ENODEV on rw this version attribute
> 
> "return -ENODEV when accessing the version attribute" ?
Yeah, this one is better, thanks :)

> > Choice 1 is preferred.
> > "
> > 
> > 
> > > > When writing a string to this attribute, it returns errno for
> > > > incompatibility or returns written string length in compatibility case.
> > > > If a device does not support live migration, it always returns errno.
> > > >
> > > > For user space software to use:
> > > > 1.
> > > > Before starting live migration, user space software first reads source 
> > > > side
> > > > mdev device's version. e.g.
> > > > "#cat \
> > > > /sys/bus/pci/devices/\:00\:02.0/5ac1fb20-2bbf-4842-bb7e-36c58c3be9cd/mdev_type/version"
> > > > 00028086-193b-i915-GVTg_V5_4
> > > >
> > > > 2.
> > > > Then, user space software writes the source side returned version string
> > > > to device version attribute in target side, and checks the return value.
> > > > If a negative errno is returned in the target side, then mdev devices in
> > > > source and target sides are not compatible;
> > > > If a positive number is returned and it equals to the length of written
> > > > string, then the two mdev devices in source and target side are 
> > > > compatible.
> > > > e.g.
> > > > (a) compatibility case
> > > > "# echo 00028086-193b-i915-GVTg_V5_4 >
> > > > /sys/bus/pci/devices/\:00\:02.0/882cc4da-dede-11e7-9180-078a62063ab1/mdev_type/version"
> > > >
> > > > (b) incompatibility case
> > > > "#echo 00028086-193b-i915-GVTg_V5_1 >
> > > > /sys/bus/pci/devices/\:00\:02.0/882cc4da-dede-11e7-9180-078a62063ab1/mdev_type/version"
> > > > -bash: echo: write error: Invalid argument
> > > >
> > > > 3. if two mdev devices are compatible, user space software can start
> > > > live migration, and vice versa.
> > > >
> > > > Note: if a mdev device does not support live migration, it either does
> > > > not provide a version attribute, or always returns errno when its 
> > > > version
> > > > attribute is read/written.
> > > >
> > > > Cc: Alex Williamson 
> > > > Cc: Erik Skultety 
> > > > Cc: "Dr. David Alan Gilbert" 
> > > > Cc: Cornelia Huck 
> > > > Cc: "Tian, Kevin" 
> > > > Cc: Zhenyu Wang 
> > > > Cc: "Wang, Zhi A" 
> > > > Cc: Neo Jia 
> > > > Cc: Kirti Wankhede 
> > > >
> > > > Signed-off-by: Yan Zhao 
> > > > ---
> > > >  Documentation/vfio-mediated-device.txt | 36 ++
> > > >  samples/vfio-mdev/mbochs.c | 17 
> > > >  samples/vfio-mdev/mdpy.c   | 16 
> > > >  samples/vfio-mdev/mtty.c   | 16 
> > > >  4 files changed, 85 insertions(+)
> > > >
> > > > diff --git a/Documentation/vfio-mediated-device.txt 
> > > > b/Documentation/vfio-mediated-device.txt
> > > > index c3f69bcaf96e..bc28471c0667 100644
> > > > --- a/Documentation/vfio-mediated-device.txt
> > > > +++ b/Documentation/vfio-mediated-device.txt
> > > > @@ -202,6 +202,7 @@ Directories and files under the sysfs for Each 
> > > > Physical Device
> > > >| |   |--- available_instances
> > > >| |   |--- device_api
> > > >| |   |--- description
> > > > +  | |   |--- version
> > > >| |   |--- [devices]
> > > >| |--- []
> > > >| |   |--- create
> > > > @@ -209,6 +210,7 @@ Directories and files under the sysfs for Each 
> > > > Physical Device
> > > >| |   |--- available_instances
> > > >|

Re: [libvirt] [PATCH 00/12] qemu: Refactor image permission/lock setting (blockdev-add saga)

2019-04-24 Thread Peter Krempa
On Thu, Apr 18, 2019 at 14:30:04 -0500, Eric Blake wrote:
> On 4/18/19 9:42 AM, Peter Krempa wrote:
> > With new blockjob handling we'll need to modify permissions for chains
> > and individual images. The individual image code was universally
> > accessible but the chain setting code reimplemented it mostly only in
> > qemu_hotplug.h.
> > 
> > Refactor the handling by moving the code to qemu_domain.c and making it
> > universal.
> 
> Oh fun - competes with my work to get incremental backup in. But from
> the subject lines alone, it seems reasonable; I can see how hard it is
> to rebase my code on top of yours.

These are the necessary changes which are needed in:

backup: Wire up qemu full pull backup commands over QMP

diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index c9f1996b2a..d3307bfc4f 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -17796,7 +17796,7 @@ qemuDomainBackupDiskCleanup(virQEMUDriverPtr driver, 
virDomainObjPtr vm,
 ret = -1;
 }
 if (disk->state >= VIR_DOMAIN_BACKUP_DISK_STATE_LABEL)
-qemuDomainDiskChainElementRevoke(driver, vm, disk->store);
+qemuDomainStorageSourceAccessRevoke(driver, vm, disk->store);
 if ((!push || !completed) &&
 disk->state >= VIR_DOMAIN_BACKUP_DISK_STATE_CREATED &&
 disk->store->detected && unlink(disk->store->path) < 0) {
@@ -17997,8 +17997,8 @@ qemuDomainBackupBegin(virDomainPtr domain, const char 
*diskXml,
 }
 disk->state = VIR_DOMAIN_BACKUP_DISK_STATE_CREATED;
 }
-if (qemuDomainDiskChainElementPrepare(driver, vm, disk->store, false,
-  true) < 0)
+if (qemuDomainStorageSourceAccessAllow(driver, vm, disk->store, false,
+   true) < 0)
 goto endmon;
 disk->state = VIR_DOMAIN_BACKUP_DISK_STATE_LABEL;
 if (disk->store->detected) {


signature.asc
Description: PGP signature
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

Re: [libvirt] [PATCH 1/2] vfio/mdev: add version field as mandatory attribute for mdev device

2019-04-24 Thread Cornelia Huck
On Tue, 23 Apr 2019 23:10:37 -0400
Yan Zhao  wrote:

> On Tue, Apr 23, 2019 at 05:59:32PM +0800, Cornelia Huck wrote:
> > On Fri, 19 Apr 2019 04:35:04 -0400
> > Yan Zhao  wrote:
> >   
> > > device version attribute in mdev sysfs is used by user space software
> > > (e.g. libvirt) to query device compatibility for live migration of VFIO
> > > mdev devices. This attribute is mandatory if a mdev device supports live
> > > migration.
> > >
> > > It consists of two parts: common part and vendor proprietary part.
> > > common part: 32 bit. lower 16 bits is vendor id and higher 16 bits
> > >  identifies device type. e.g., for pci device, it is
> > >  "pci vendor id" | (VFIO_DEVICE_FLAGS_PCI << 16).
> > > vendor proprietary part: this part is varied in length. vendor driver can
> > >  specify any string to identify a device.
> > >
> > > When reading this attribute, it should show device version string of the
> > > device of type . If a device does not support live migration, it
> > > should return errno.  
> > 
> > It might make more sense if the driver does not register the attribute
> > for the device in that case at all.
> >   
> yes. what about rephrase like this:
> "
> When reading this attribute, it should show device version string of the
> device of type .
> If a device does not support live migration, it has two choices:
> 1. do not register this version attribute
> 2. return -ENODEV on rw this version attribute

"return -ENODEV when accessing the version attribute" ?

> Choice 1 is preferred.
> "
> 
> 
> > > When writing a string to this attribute, it returns errno for
> > > incompatibility or returns written string length in compatibility case.
> > > If a device does not support live migration, it always returns errno.
> > >
> > > For user space software to use:
> > > 1.
> > > Before starting live migration, user space software first reads source 
> > > side
> > > mdev device's version. e.g.
> > > "#cat \
> > > /sys/bus/pci/devices/\:00\:02.0/5ac1fb20-2bbf-4842-bb7e-36c58c3be9cd/mdev_type/version"
> > > 00028086-193b-i915-GVTg_V5_4
> > >
> > > 2.
> > > Then, user space software writes the source side returned version string
> > > to device version attribute in target side, and checks the return value.
> > > If a negative errno is returned in the target side, then mdev devices in
> > > source and target sides are not compatible;
> > > If a positive number is returned and it equals to the length of written
> > > string, then the two mdev devices in source and target side are 
> > > compatible.
> > > e.g.
> > > (a) compatibility case
> > > "# echo 00028086-193b-i915-GVTg_V5_4 >
> > > /sys/bus/pci/devices/\:00\:02.0/882cc4da-dede-11e7-9180-078a62063ab1/mdev_type/version"
> > >
> > > (b) incompatibility case
> > > "#echo 00028086-193b-i915-GVTg_V5_1 >
> > > /sys/bus/pci/devices/\:00\:02.0/882cc4da-dede-11e7-9180-078a62063ab1/mdev_type/version"
> > > -bash: echo: write error: Invalid argument
> > >
> > > 3. if two mdev devices are compatible, user space software can start
> > > live migration, and vice versa.
> > >
> > > Note: if a mdev device does not support live migration, it either does
> > > not provide a version attribute, or always returns errno when its version
> > > attribute is read/written.
> > >
> > > Cc: Alex Williamson 
> > > Cc: Erik Skultety 
> > > Cc: "Dr. David Alan Gilbert" 
> > > Cc: Cornelia Huck 
> > > Cc: "Tian, Kevin" 
> > > Cc: Zhenyu Wang 
> > > Cc: "Wang, Zhi A" 
> > > Cc: Neo Jia 
> > > Cc: Kirti Wankhede 
> > >
> > > Signed-off-by: Yan Zhao 
> > > ---
> > >  Documentation/vfio-mediated-device.txt | 36 ++
> > >  samples/vfio-mdev/mbochs.c | 17 
> > >  samples/vfio-mdev/mdpy.c   | 16 
> > >  samples/vfio-mdev/mtty.c   | 16 
> > >  4 files changed, 85 insertions(+)
> > >
> > > diff --git a/Documentation/vfio-mediated-device.txt 
> > > b/Documentation/vfio-mediated-device.txt
> > > index c3f69bcaf96e..bc28471c0667 100644
> > > --- a/Documentation/vfio-mediated-device.txt
> > > +++ b/Documentation/vfio-mediated-device.txt
> > > @@ -202,6 +202,7 @@ Directories and files under the sysfs for Each 
> > > Physical Device
> > >| |   |--- available_instances
> > >| |   |--- device_api
> > >| |   |--- description
> > > +  | |   |--- version
> > >| |   |--- [devices]
> > >| |--- []
> > >| |   |--- create
> > > @@ -209,6 +210,7 @@ Directories and files under the sysfs for Each 
> > > Physical Device
> > >| |   |--- available_instances
> > >| |   |--- device_api
> > >| |   |--- description
> > > +  | |   |--- version
> > >| |   |--- [devices]
> > >| |--- []
> > >|  |--- create
> > > @@ -216,6 +218,7 @@ Directories and files under the sysfs for Each 
> > > Physical Device
> > >|  |--- available_instances
> > >|  |--- device_a