Re: [libvirt] [RFC v1 4/6] migration: Migration support for ephemeral hostdevs

2015-05-14 Thread Laine Stump
On 05/13/2015 10:02 PM, Chen Fan wrote:

 On 05/13/2015 10:30 PM, Laine Stump wrote:
 On 05/13/2015 05:57 AM, Daniel P. Berrange wrote:
 On Wed, May 13, 2015 at 11:36:30AM +0800, Chen Fan wrote:
 add migration support for ephemeral host devices, introduce
 two 'detach' and 'restore' functions to unplug/plug host devices
 during migration.

 Signed-off-by: Chen Fan chen.fan.f...@cn.fujitsu.com
 ---
   src/qemu/qemu_migration.c | 171
 --
   src/qemu/qemu_migration.h |   9 +++
   src/qemu/qemu_process.c   |  11 +++
   3 files changed, 187 insertions(+), 4 deletions(-)

 diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
 index 56112f9..d5a698f 100644
 --- a/src/qemu/qemu_migration.c
 +++ b/src/qemu/qemu_migration.c
 +void
 +qemuMigrationRestoreEphemeralDevices(virQEMUDriverPtr driver,
 + virConnectPtr conn,
 + virDomainObjPtr vm,
 + bool live)
 +{
 +qemuDomainObjPrivatePtr priv = vm-privateData;
 +virDomainDeviceDefPtr dev;
 +int ret = -1;
 +size_t i;
 +
 +VIR_DEBUG(Rum domain restore ephemeral devices);
 +
 +for (i = 0; i  priv-nEphemeralDevices; i++) {
 +dev = priv-ephemeralDevices[i];
 +
 +switch ((virDomainDeviceType) dev-type) {
 +case VIR_DOMAIN_DEVICE_NET:
 +if (live) {
 +ret = qemuDomainAttachNetDevice(conn, driver, vm,
 +dev-data.net);
 +} else {
 +ret = virDomainNetInsert(vm-def, dev-data.net);
 +}
 +
 +if (!ret)
 +dev-data.net = NULL;
 +break;
 +case VIR_DOMAIN_DEVICE_HOSTDEV:
 +if (live) {
 +ret = qemuDomainAttachHostDevice(conn, driver, vm,
 + dev-data.hostdev);
 +   } else {
 +ret =virDomainHostdevInsert(vm-def,
 dev-data.hostdev);
 +}
 This re-attach step is where we actually have far far far worse
 problems
 than with detach. This is blindly assuming that the guest on the target
 host can use the same hostdev that it was using on the source host.
 (kind of pointless to comment on, since pkrempa has changed my opinion
 by forcing me to think about the failure to reattach condition, but
 could be useful info for others)

 For a hostdev, yes, but not for interface type='network' (which
 would point to a libvirt network pool of VFs).

 This
 is essentially useless in the real world.
 Agreed (for plain hostdev)

 Even if the same vendor/model
 device is available on the target host, it is very unlikely to be
 available
 at the same bus/slot/function that it was on the source. It is quite
 likely
 neccessary to allocate a complete different NIC, or if using SRIOV
 allocate
 a different function. It is also not uncommon to have different
 vendor/models,
 so a completely different NIC may be required.
 In the case of a network device, a different brand/model of NIC at a
 different PCI address using a different guest driver shouldn't be a
 problem for the guest, as long as the MAC address is the same (for a
 Linux guest anyway; not sure what a Windows guest would do with a NIC
 that had the same MAC but used a different driver). This points out the
 folly of trying to do migration with attached hostdevs (managed at *any*
 level), for anything other than SRIOV VFs (which can have their MAC
 address set before attach, unlike non-SRIOV NICs).

 .
 So should we focus on implementing the feature that support migration
 with SRIOV
 VFs at first?

Not at first, but only. Adding the requirement of dealing properly
with MAC address change to the guest adds a lot of complexity to that
code with not much real gain.

And based on my newfound realization of the horrible situation that
would be created by a failure to re-attach after migration was complete
(see my response to Peter Krempa yesterday), I now agree with Dan that
this shouldn't be implemented in libvirt, but in the higher level
management, which will be able to more easily/realistically  deal with
such a failure.

(and by the way, I think I should apologize for leading you down the
road of the ephemeral patches in response to your earlier RFC. If only
I'd fully considered the post-migration re-attach failure case, and the
difficulty libvirt would have recovering from that prior to Peter
pointing it out so eloquently yesterday :-/)

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [RFC v1 4/6] migration: Migration support for ephemeral hostdevs

2015-05-14 Thread Kamezawa Hiroyuki

On 2015/05/14 17:38, Daniel P. Berrange wrote:

On Thu, May 14, 2015 at 10:02:39AM +0800, Chen Fan wrote:


On 05/13/2015 10:30 PM, Laine Stump wrote:

On 05/13/2015 05:57 AM, Daniel P. Berrange wrote:

On Wed, May 13, 2015 at 11:36:30AM +0800, Chen Fan wrote:

add migration support for ephemeral host devices, introduce
two 'detach' and 'restore' functions to unplug/plug host devices
during migration.

Signed-off-by: Chen Fan chen.fan.f...@cn.fujitsu.com
---
  src/qemu/qemu_migration.c | 171 --
  src/qemu/qemu_migration.h |   9 +++
  src/qemu/qemu_process.c   |  11 +++
  3 files changed, 187 insertions(+), 4 deletions(-)

diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index 56112f9..d5a698f 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
+void
+qemuMigrationRestoreEphemeralDevices(virQEMUDriverPtr driver,
+ virConnectPtr conn,
+ virDomainObjPtr vm,
+ bool live)
+{
+qemuDomainObjPrivatePtr priv = vm-privateData;
+virDomainDeviceDefPtr dev;
+int ret = -1;
+size_t i;
+
+VIR_DEBUG(Rum domain restore ephemeral devices);
+
+for (i = 0; i  priv-nEphemeralDevices; i++) {
+dev = priv-ephemeralDevices[i];
+
+switch ((virDomainDeviceType) dev-type) {
+case VIR_DOMAIN_DEVICE_NET:
+if (live) {
+ret = qemuDomainAttachNetDevice(conn, driver, vm,
+dev-data.net);
+} else {
+ret = virDomainNetInsert(vm-def, dev-data.net);
+}
+
+if (!ret)
+dev-data.net = NULL;
+break;
+case VIR_DOMAIN_DEVICE_HOSTDEV:
+if (live) {
+ret = qemuDomainAttachHostDevice(conn, driver, vm,
+ dev-data.hostdev);
+   } else {
+ret =virDomainHostdevInsert(vm-def, dev-data.hostdev);
+}

This re-attach step is where we actually have far far far worse problems
than with detach. This is blindly assuming that the guest on the target
host can use the same hostdev that it was using on the source host.

(kind of pointless to comment on, since pkrempa has changed my opinion
by forcing me to think about the failure to reattach condition, but
could be useful info for others)

For a hostdev, yes, but not for interface type='network' (which
would point to a libvirt network pool of VFs).


This
is essentially useless in the real world.

Agreed (for plain hostdev)


Even if the same vendor/model
device is available on the target host, it is very unlikely to be available
at the same bus/slot/function that it was on the source. It is quite likely
neccessary to allocate a complete different NIC, or if using SRIOV allocate
a different function. It is also not uncommon to have different vendor/models,
so a completely different NIC may be required.

In the case of a network device, a different brand/model of NIC at a
different PCI address using a different guest driver shouldn't be a
problem for the guest, as long as the MAC address is the same (for a
Linux guest anyway; not sure what a Windows guest would do with a NIC
that had the same MAC but used a different driver). This points out the
folly of trying to do migration with attached hostdevs (managed at *any*
level), for anything other than SRIOV VFs (which can have their MAC
address set before attach, unlike non-SRIOV NICs).

.

So should we focus on implementing the feature that support migration with
SRIOV
VFs at first?

I think that is simple to achieve my original target that implement NIC
passthrough
device migration. because sometimes we assign a native NIC to guest to keep
the
performance of network I/O, due to the MAC limitation of the non-SRIOV NICs,
as
laine said the cost of SRIOV NIC is cheaper than what we try.


No, I think you should /not/ attempt to implement this in libvirt at all
and instead focus on the higher level apps.



Hmm, I think there are some roles which libvirt can take in the whole 
operations.

Let me clarify how things will go at pci-pass through + migration.

 (1) the user(or high level apps) make a pair of pci devices which can be
replaced before/after migration.

 (2) the pair of devices in 2 hosts are described somewhere.

 (3) before starting migration, migration initiator takes care of another side 
of the pair devices
 are available at target host.

 (4) unplug pci devices, which are descrived as part of paired devices.

 (5) migration with checking all pci-passthrough devices are unplugged.

 (6)  at success, plug pci devices,  which are descrived as part of paired 
devices.
 (6') at failure, plug unplugged devices back.

I think
 (1) should be done by higher level apps or user (by hand).
 (2) should be a generic/vm-independent format
 (3) should be checked by 

Re: [libvirt] [RFC v1 4/6] migration: Migration support for ephemeral hostdevs

2015-05-14 Thread Daniel P. Berrange
On Thu, May 14, 2015 at 10:02:39AM +0800, Chen Fan wrote:
 
 On 05/13/2015 10:30 PM, Laine Stump wrote:
 On 05/13/2015 05:57 AM, Daniel P. Berrange wrote:
 On Wed, May 13, 2015 at 11:36:30AM +0800, Chen Fan wrote:
 add migration support for ephemeral host devices, introduce
 two 'detach' and 'restore' functions to unplug/plug host devices
 during migration.
 
 Signed-off-by: Chen Fan chen.fan.f...@cn.fujitsu.com
 ---
   src/qemu/qemu_migration.c | 171 
  --
   src/qemu/qemu_migration.h |   9 +++
   src/qemu/qemu_process.c   |  11 +++
   3 files changed, 187 insertions(+), 4 deletions(-)
 
 diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
 index 56112f9..d5a698f 100644
 --- a/src/qemu/qemu_migration.c
 +++ b/src/qemu/qemu_migration.c
 +void
 +qemuMigrationRestoreEphemeralDevices(virQEMUDriverPtr driver,
 + virConnectPtr conn,
 + virDomainObjPtr vm,
 + bool live)
 +{
 +qemuDomainObjPrivatePtr priv = vm-privateData;
 +virDomainDeviceDefPtr dev;
 +int ret = -1;
 +size_t i;
 +
 +VIR_DEBUG(Rum domain restore ephemeral devices);
 +
 +for (i = 0; i  priv-nEphemeralDevices; i++) {
 +dev = priv-ephemeralDevices[i];
 +
 +switch ((virDomainDeviceType) dev-type) {
 +case VIR_DOMAIN_DEVICE_NET:
 +if (live) {
 +ret = qemuDomainAttachNetDevice(conn, driver, vm,
 +dev-data.net);
 +} else {
 +ret = virDomainNetInsert(vm-def, dev-data.net);
 +}
 +
 +if (!ret)
 +dev-data.net = NULL;
 +break;
 +case VIR_DOMAIN_DEVICE_HOSTDEV:
 +if (live) {
 +ret = qemuDomainAttachHostDevice(conn, driver, vm,
 + dev-data.hostdev);
 +   } else {
 +ret =virDomainHostdevInsert(vm-def, dev-data.hostdev);
 +}
 This re-attach step is where we actually have far far far worse problems
 than with detach. This is blindly assuming that the guest on the target
 host can use the same hostdev that it was using on the source host.
 (kind of pointless to comment on, since pkrempa has changed my opinion
 by forcing me to think about the failure to reattach condition, but
 could be useful info for others)
 
 For a hostdev, yes, but not for interface type='network' (which
 would point to a libvirt network pool of VFs).
 
 This
 is essentially useless in the real world.
 Agreed (for plain hostdev)
 
 Even if the same vendor/model
 device is available on the target host, it is very unlikely to be available
 at the same bus/slot/function that it was on the source. It is quite likely
 neccessary to allocate a complete different NIC, or if using SRIOV allocate
 a different function. It is also not uncommon to have different 
 vendor/models,
 so a completely different NIC may be required.
 In the case of a network device, a different brand/model of NIC at a
 different PCI address using a different guest driver shouldn't be a
 problem for the guest, as long as the MAC address is the same (for a
 Linux guest anyway; not sure what a Windows guest would do with a NIC
 that had the same MAC but used a different driver). This points out the
 folly of trying to do migration with attached hostdevs (managed at *any*
 level), for anything other than SRIOV VFs (which can have their MAC
 address set before attach, unlike non-SRIOV NICs).
 
 .
 So should we focus on implementing the feature that support migration with
 SRIOV
 VFs at first?
 
 I think that is simple to achieve my original target that implement NIC
 passthrough
 device migration. because sometimes we assign a native NIC to guest to keep
 the
 performance of network I/O, due to the MAC limitation of the non-SRIOV NICs,
 as
 laine said the cost of SRIOV NIC is cheaper than what we try.

No, I think you should /not/ attempt to implement this in libvirt at all
and instead focus on the higher level apps.

Regards,
Daniel
-- 
|: http://berrange.com  -o-http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org  -o- http://virt-manager.org :|
|: http://autobuild.org   -o- http://search.cpan.org/~danberr/ :|
|: http://entangle-photo.org   -o-   http://live.gnome.org/gtk-vnc :|

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [RFC v1 4/6] migration: Migration support for ephemeral hostdevs

2015-05-14 Thread Daniel P. Berrange
On Wed, May 13, 2015 at 10:30:32AM -0400, Laine Stump wrote:
 On 05/13/2015 05:57 AM, Daniel P. Berrange wrote:
  On Wed, May 13, 2015 at 11:36:30AM +0800, Chen Fan wrote:
  add migration support for ephemeral host devices, introduce
  two 'detach' and 'restore' functions to unplug/plug host devices
  during migration.
 
  Signed-off-by: Chen Fan chen.fan.f...@cn.fujitsu.com
  ---
   src/qemu/qemu_migration.c | 171 
  --
   src/qemu/qemu_migration.h |   9 +++
   src/qemu/qemu_process.c   |  11 +++
   3 files changed, 187 insertions(+), 4 deletions(-)
 
  diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
  index 56112f9..d5a698f 100644
  --- a/src/qemu/qemu_migration.c
  +++ b/src/qemu/qemu_migration.c
  
  +void
  +qemuMigrationRestoreEphemeralDevices(virQEMUDriverPtr driver,
  + virConnectPtr conn,
  + virDomainObjPtr vm,
  + bool live)
  +{
  +qemuDomainObjPrivatePtr priv = vm-privateData;
  +virDomainDeviceDefPtr dev;
  +int ret = -1;
  +size_t i;
  +
  +VIR_DEBUG(Rum domain restore ephemeral devices);
  +
  +for (i = 0; i  priv-nEphemeralDevices; i++) {
  +dev = priv-ephemeralDevices[i];
  +
  +switch ((virDomainDeviceType) dev-type) {
  +case VIR_DOMAIN_DEVICE_NET:
  +if (live) {
  +ret = qemuDomainAttachNetDevice(conn, driver, vm,
  +dev-data.net);
  +} else {
  +ret = virDomainNetInsert(vm-def, dev-data.net);
  +}
  +
  +if (!ret)
  +dev-data.net = NULL;
  +break;
  +case VIR_DOMAIN_DEVICE_HOSTDEV:
  +if (live) {
  +ret = qemuDomainAttachHostDevice(conn, driver, vm,
  + dev-data.hostdev);
  +   } else {
  +ret =virDomainHostdevInsert(vm-def, dev-data.hostdev);
  +}
  
  This re-attach step is where we actually have far far far worse problems
  than with detach. This is blindly assuming that the guest on the target
  host can use the same hostdev that it was using on the source host.
 
 (kind of pointless to comment on, since pkrempa has changed my opinion
 by forcing me to think about the failure to reattach condition, but
 could be useful info for others)
 
 For a hostdev, yes, but not for interface type='network' (which
 would point to a libvirt network pool of VFs).

I should note that in OpenStack at least we don't ever use the
libvirt interface type='network' feature. This is because the
OpenStack scheduler needs to have better control over exactly
which VFs are allocated to which guest. This code runs on a
separate host, and takes into account stuff such as the NUMA
affinity of the guest, the utilization of the VFs by other
guests, and more besides. So even in the interface case
this proposal is pretty limited in usefulness.

Regards,
Daniel
-- 
|: http://berrange.com  -o-http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org  -o- http://virt-manager.org :|
|: http://autobuild.org   -o- http://search.cpan.org/~danberr/ :|
|: http://entangle-photo.org   -o-   http://live.gnome.org/gtk-vnc :|

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [RFC v1 4/6] migration: Migration support for ephemeral hostdevs

2015-05-13 Thread Daniel P. Berrange
On Wed, May 13, 2015 at 11:36:30AM +0800, Chen Fan wrote:
 add migration support for ephemeral host devices, introduce
 two 'detach' and 'restore' functions to unplug/plug host devices
 during migration.
 
 Signed-off-by: Chen Fan chen.fan.f...@cn.fujitsu.com
 ---
  src/qemu/qemu_migration.c | 171 
 --
  src/qemu/qemu_migration.h |   9 +++
  src/qemu/qemu_process.c   |  11 +++
  3 files changed, 187 insertions(+), 4 deletions(-)
 
 diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
 index 56112f9..d5a698f 100644
 --- a/src/qemu/qemu_migration.c
 +++ b/src/qemu/qemu_migration.c

 +void
 +qemuMigrationRestoreEphemeralDevices(virQEMUDriverPtr driver,
 + virConnectPtr conn,
 + virDomainObjPtr vm,
 + bool live)
 +{
 +qemuDomainObjPrivatePtr priv = vm-privateData;
 +virDomainDeviceDefPtr dev;
 +int ret = -1;
 +size_t i;
 +
 +VIR_DEBUG(Rum domain restore ephemeral devices);
 +
 +for (i = 0; i  priv-nEphemeralDevices; i++) {
 +dev = priv-ephemeralDevices[i];
 +
 +switch ((virDomainDeviceType) dev-type) {
 +case VIR_DOMAIN_DEVICE_NET:
 +if (live) {
 +ret = qemuDomainAttachNetDevice(conn, driver, vm,
 +dev-data.net);
 +} else {
 +ret = virDomainNetInsert(vm-def, dev-data.net);
 +}
 +
 +if (!ret)
 +dev-data.net = NULL;
 +break;
 +case VIR_DOMAIN_DEVICE_HOSTDEV:
 +if (live) {
 +ret = qemuDomainAttachHostDevice(conn, driver, vm,
 + dev-data.hostdev);
 +   } else {
 +ret =virDomainHostdevInsert(vm-def, dev-data.hostdev);
 +}

This re-attach step is where we actually have far far far worse problems
than with detach. This is blindly assuming that the guest on the target
host can use the same hostdev that it was using on the source host. This
is essentially useless in the real world. Even if the same vendor/model
device is available on the target host, it is very unlikely to be available
at the same bus/slot/function that it was on the source. It is quite likely
neccessary to allocate a complete different NIC, or if using SRIOV allocate
a different function. It is also not uncommon to have different vendor/models,
so a completely different NIC may be required.

It is impossible for libvirt todo anything sensible when picking the hostdev
to use on the target host as it does not have anywhere near enough knowledge
to make a correct decision. For example, it does not know which physical
network each NIC on the target host is plugged into. Even if it knew the
networks, it does not know what the I/O utilization is likel, to be able
to intelligently decide between a set of possible free NICs. In any non-trivial
mgmt app, the management app itself will have this knowledge and have policies
around which hostdevice to assign to a guest given a particular set of
circumstances. It may even decide not to assign a hostdev on the target and
instead provide 2 or 3 or more emulated devices that could be used in
bandwidth aggregation mode rather than failover mode.

In OpenStack, the compute hosts don't even decide which NICs are given to
which guests. This is down to an external schedular running on a different
host(s), and the compute host just hotplugs what has already been decided
elsewhere.

Regards,
Daniel
-- 
|: http://berrange.com  -o-http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org  -o- http://virt-manager.org :|
|: http://autobuild.org   -o- http://search.cpan.org/~danberr/ :|
|: http://entangle-photo.org   -o-   http://live.gnome.org/gtk-vnc :|

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [RFC v1 4/6] migration: Migration support for ephemeral hostdevs

2015-05-13 Thread Daniel P. Berrange
On Wed, May 13, 2015 at 10:36:34AM +0200, Peter Krempa wrote:
 On Wed, May 13, 2015 at 11:36:30 +0800, Chen Fan wrote:
  add migration support for ephemeral host devices, introduce
  two 'detach' and 'restore' functions to unplug/plug host devices
  during migration.
  
  Signed-off-by: Chen Fan chen.fan.f...@cn.fujitsu.com
  ---
   src/qemu/qemu_migration.c | 171 
  --
   src/qemu/qemu_migration.h |   9 +++
   src/qemu/qemu_process.c   |  11 +++
   3 files changed, 187 insertions(+), 4 deletions(-)
  
  diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
  index 56112f9..d5a698f 100644
  --- a/src/qemu/qemu_migration.c
  +++ b/src/qemu/qemu_migration.c
  @@ -3384,6 +3384,158 @@ qemuMigrationPrepareDef(virQEMUDriverPtr driver,
   return def;
   }
   
  +int
  +qemuMigrationDetachEphemeralDevices(virQEMUDriverPtr driver,
  +virDomainObjPtr vm,
  +bool live)
  +{
  +qemuDomainObjPrivatePtr priv = vm-privateData;
  +virDomainHostdevDefPtr hostdev;
  +virDomainNetDefPtr net;
  +virDomainDeviceDef dev;
  +virDomainDeviceDefPtr dev_copy = NULL;
  +virCapsPtr caps = NULL;
  +int actualType;
  +int ret = -1;
  +size_t i;
  +
  +VIR_DEBUG(Rum domain detach ephemeral devices);
  +
  +if (!(caps = virQEMUDriverGetCapabilities(driver, false)))
  +return ret;
  +
  +for (i = 0; i  vm-def-nnets;) {
  +net = vm-def-nets[i];
  +
  +actualType = virDomainNetGetActualType(net);
  +if (actualType != VIR_DOMAIN_NET_TYPE_HOSTDEV) {
  +i++;
  +continue;
  +}
  +
  +hostdev = virDomainNetGetActualHostdev(net);
  +if (hostdev-mode != VIR_DOMAIN_HOSTDEV_MODE_SUBSYS ||
  +hostdev-source.subsys.type != 
  VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI ||
  +!hostdev-ephemeral) {
  +i++;
  +continue;
  +}
  +
  +dev.type = VIR_DOMAIN_DEVICE_NET;
  +dev.data.net = net;
  +
  +dev_copy = virDomainDeviceDefCopy(dev, vm-def,
  +  caps, driver-xmlopt);
  +if (!dev_copy)
  +goto cleanup;
  +
  +if (live) {
  +/* nnets reduced */
  +if (qemuDomainDetachNetDevice(driver, vm, dev_copy)  0)
  +goto cleanup;
 
 So this is where the fun begins. qemuDomainDetachNetDevice is not
 designed to be called this way since the detach API where it's used
 normally returns 0 in the following two cases:
 
 1) The detach was successfull, the guest removed the device
 2) The detach request was successful, but guest did not remove the
 device yet
 
 In the latter case you need to wait for a event to successfully know
 when the device was removed. Since this might very well happen the code
 will need to be changed to take that option into account. Please note
 that that step will make all the things really complicated.

Even more fun

  3) The detach request was successful, but the guest is going to
 ignore it forever

Really, this is not something we want to be deciding policy for inside
libvirt. It is no end of trouble and we really must let the mgmt app
decide how it wants this kind of problem handled.

Regards,
Daniel
-- 
|: http://berrange.com  -o-http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org  -o- http://virt-manager.org :|
|: http://autobuild.org   -o- http://search.cpan.org/~danberr/ :|
|: http://entangle-photo.org   -o-   http://live.gnome.org/gtk-vnc :|

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [RFC v1 4/6] migration: Migration support for ephemeral hostdevs

2015-05-13 Thread Peter Krempa
On Wed, May 13, 2015 at 11:36:30 +0800, Chen Fan wrote:
 add migration support for ephemeral host devices, introduce
 two 'detach' and 'restore' functions to unplug/plug host devices
 during migration.
 
 Signed-off-by: Chen Fan chen.fan.f...@cn.fujitsu.com
 ---
  src/qemu/qemu_migration.c | 171 
 --
  src/qemu/qemu_migration.h |   9 +++
  src/qemu/qemu_process.c   |  11 +++
  3 files changed, 187 insertions(+), 4 deletions(-)
 
 diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
 index 56112f9..d5a698f 100644
 --- a/src/qemu/qemu_migration.c
 +++ b/src/qemu/qemu_migration.c
 @@ -3384,6 +3384,158 @@ qemuMigrationPrepareDef(virQEMUDriverPtr driver,
  return def;
  }
  
 +int
 +qemuMigrationDetachEphemeralDevices(virQEMUDriverPtr driver,
 +virDomainObjPtr vm,
 +bool live)
 +{
 +qemuDomainObjPrivatePtr priv = vm-privateData;
 +virDomainHostdevDefPtr hostdev;
 +virDomainNetDefPtr net;
 +virDomainDeviceDef dev;
 +virDomainDeviceDefPtr dev_copy = NULL;
 +virCapsPtr caps = NULL;
 +int actualType;
 +int ret = -1;
 +size_t i;
 +
 +VIR_DEBUG(Rum domain detach ephemeral devices);
 +
 +if (!(caps = virQEMUDriverGetCapabilities(driver, false)))
 +return ret;
 +
 +for (i = 0; i  vm-def-nnets;) {
 +net = vm-def-nets[i];
 +
 +actualType = virDomainNetGetActualType(net);
 +if (actualType != VIR_DOMAIN_NET_TYPE_HOSTDEV) {
 +i++;
 +continue;
 +}
 +
 +hostdev = virDomainNetGetActualHostdev(net);
 +if (hostdev-mode != VIR_DOMAIN_HOSTDEV_MODE_SUBSYS ||
 +hostdev-source.subsys.type != 
 VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI ||
 +!hostdev-ephemeral) {
 +i++;
 +continue;
 +}
 +
 +dev.type = VIR_DOMAIN_DEVICE_NET;
 +dev.data.net = net;
 +
 +dev_copy = virDomainDeviceDefCopy(dev, vm-def,
 +  caps, driver-xmlopt);
 +if (!dev_copy)
 +goto cleanup;
 +
 +if (live) {
 +/* nnets reduced */
 +if (qemuDomainDetachNetDevice(driver, vm, dev_copy)  0)
 +goto cleanup;

So this is where the fun begins. qemuDomainDetachNetDevice is not
designed to be called this way since the detach API where it's used
normally returns 0 in the following two cases:

1) The detach was successfull, the guest removed the device
2) The detach request was successful, but guest did not remove the
device yet

In the latter case you need to wait for a event to successfully know
when the device was removed. Since this might very well happen the code
will need to be changed to take that option into account. Please note
that that step will make all the things really complicated.

Peter



signature.asc
Description: Digital signature
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

Re: [libvirt] [RFC v1 4/6] migration: Migration support for ephemeral hostdevs

2015-05-13 Thread Laine Stump
On 05/13/2015 04:36 AM, Peter Krempa wrote:
 On Wed, May 13, 2015 at 11:36:30 +0800, Chen Fan wrote:
 add migration support for ephemeral host devices, introduce
 two 'detach' and 'restore' functions to unplug/plug host devices
 during migration.

 Signed-off-by: Chen Fan chen.fan.f...@cn.fujitsu.com
 ---
  src/qemu/qemu_migration.c | 171 
 --
  src/qemu/qemu_migration.h |   9 +++
  src/qemu/qemu_process.c   |  11 +++
  3 files changed, 187 insertions(+), 4 deletions(-)

 diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
 index 56112f9..d5a698f 100644
 --- a/src/qemu/qemu_migration.c
 +++ b/src/qemu/qemu_migration.c
 @@ -3384,6 +3384,158 @@ qemuMigrationPrepareDef(virQEMUDriverPtr driver,
  return def;
  }
  
 +int
 +qemuMigrationDetachEphemeralDevices(virQEMUDriverPtr driver,
 +virDomainObjPtr vm,
 +bool live)
 +{
 +qemuDomainObjPrivatePtr priv = vm-privateData;
 +virDomainHostdevDefPtr hostdev;
 +virDomainNetDefPtr net;
 +virDomainDeviceDef dev;
 +virDomainDeviceDefPtr dev_copy = NULL;
 +virCapsPtr caps = NULL;
 +int actualType;
 +int ret = -1;
 +size_t i;
 +
 +VIR_DEBUG(Rum domain detach ephemeral devices);
 +
 +if (!(caps = virQEMUDriverGetCapabilities(driver, false)))
 +return ret;
 +
 +for (i = 0; i  vm-def-nnets;) {
 +net = vm-def-nets[i];
 +
 +actualType = virDomainNetGetActualType(net);
 +if (actualType != VIR_DOMAIN_NET_TYPE_HOSTDEV) {
 +i++;
 +continue;
 +}
 +
 +hostdev = virDomainNetGetActualHostdev(net);
 +if (hostdev-mode != VIR_DOMAIN_HOSTDEV_MODE_SUBSYS ||
 +hostdev-source.subsys.type != 
 VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI ||
 +!hostdev-ephemeral) {
 +i++;
 +continue;
 +}
 +
 +dev.type = VIR_DOMAIN_DEVICE_NET;
 +dev.data.net = net;
 +
 +dev_copy = virDomainDeviceDefCopy(dev, vm-def,
 +  caps, driver-xmlopt);
 +if (!dev_copy)
 +goto cleanup;
 +
 +if (live) {
 +/* nnets reduced */
 +if (qemuDomainDetachNetDevice(driver, vm, dev_copy)  0)
 +goto cleanup;
 
 So this is where the fun begins. qemuDomainDetachNetDevice is not
 designed to be called this way since the detach API where it's used
 normally returns 0 in the following two cases:
 
 1) The detach was successfull, the guest removed the device
 2) The detach request was successful, but guest did not remove the
 device yet
 
 In the latter case you need to wait for a event to successfully know
 when the device was removed. 

For historical reference: omission of this bit (needing to wait for the
guest to remove the device) was one of the reasons Shradha's patches
couldn't be pushed.


--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [RFC v1 4/6] migration: Migration support for ephemeral hostdevs

2015-05-13 Thread Laine Stump
On 05/13/2015 05:57 AM, Daniel P. Berrange wrote:
 On Wed, May 13, 2015 at 11:36:30AM +0800, Chen Fan wrote:
 add migration support for ephemeral host devices, introduce
 two 'detach' and 'restore' functions to unplug/plug host devices
 during migration.

 Signed-off-by: Chen Fan chen.fan.f...@cn.fujitsu.com
 ---
  src/qemu/qemu_migration.c | 171 
 --
  src/qemu/qemu_migration.h |   9 +++
  src/qemu/qemu_process.c   |  11 +++
  3 files changed, 187 insertions(+), 4 deletions(-)

 diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
 index 56112f9..d5a698f 100644
 --- a/src/qemu/qemu_migration.c
 +++ b/src/qemu/qemu_migration.c
 
 +void
 +qemuMigrationRestoreEphemeralDevices(virQEMUDriverPtr driver,
 + virConnectPtr conn,
 + virDomainObjPtr vm,
 + bool live)
 +{
 +qemuDomainObjPrivatePtr priv = vm-privateData;
 +virDomainDeviceDefPtr dev;
 +int ret = -1;
 +size_t i;
 +
 +VIR_DEBUG(Rum domain restore ephemeral devices);
 +
 +for (i = 0; i  priv-nEphemeralDevices; i++) {
 +dev = priv-ephemeralDevices[i];
 +
 +switch ((virDomainDeviceType) dev-type) {
 +case VIR_DOMAIN_DEVICE_NET:
 +if (live) {
 +ret = qemuDomainAttachNetDevice(conn, driver, vm,
 +dev-data.net);
 +} else {
 +ret = virDomainNetInsert(vm-def, dev-data.net);
 +}
 +
 +if (!ret)
 +dev-data.net = NULL;
 +break;
 +case VIR_DOMAIN_DEVICE_HOSTDEV:
 +if (live) {
 +ret = qemuDomainAttachHostDevice(conn, driver, vm,
 + dev-data.hostdev);
 +   } else {
 +ret =virDomainHostdevInsert(vm-def, dev-data.hostdev);
 +}
 
 This re-attach step is where we actually have far far far worse problems
 than with detach. This is blindly assuming that the guest on the target
 host can use the same hostdev that it was using on the source host.

(kind of pointless to comment on, since pkrempa has changed my opinion
by forcing me to think about the failure to reattach condition, but
could be useful info for others)

For a hostdev, yes, but not for interface type='network' (which
would point to a libvirt network pool of VFs).

 This
 is essentially useless in the real world.

Agreed (for plain hostdev)

 Even if the same vendor/model
 device is available on the target host, it is very unlikely to be available
 at the same bus/slot/function that it was on the source. It is quite likely
 neccessary to allocate a complete different NIC, or if using SRIOV allocate
 a different function. It is also not uncommon to have different vendor/models,
 so a completely different NIC may be required.

In the case of a network device, a different brand/model of NIC at a
different PCI address using a different guest driver shouldn't be a
problem for the guest, as long as the MAC address is the same (for a
Linux guest anyway; not sure what a Windows guest would do with a NIC
that had the same MAC but used a different driver). This points out the
folly of trying to do migration with attached hostdevs (managed at *any*
level), for anything other than SRIOV VFs (which can have their MAC
address set before attach, unlike non-SRIOV NICs).

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [RFC v1 4/6] migration: Migration support for ephemeral hostdevs

2015-05-13 Thread Chen Fan


On 05/13/2015 10:30 PM, Laine Stump wrote:

On 05/13/2015 05:57 AM, Daniel P. Berrange wrote:

On Wed, May 13, 2015 at 11:36:30AM +0800, Chen Fan wrote:

add migration support for ephemeral host devices, introduce
two 'detach' and 'restore' functions to unplug/plug host devices
during migration.

Signed-off-by: Chen Fan chen.fan.f...@cn.fujitsu.com
---
  src/qemu/qemu_migration.c | 171 --
  src/qemu/qemu_migration.h |   9 +++
  src/qemu/qemu_process.c   |  11 +++
  3 files changed, 187 insertions(+), 4 deletions(-)

diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index 56112f9..d5a698f 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
+void
+qemuMigrationRestoreEphemeralDevices(virQEMUDriverPtr driver,
+ virConnectPtr conn,
+ virDomainObjPtr vm,
+ bool live)
+{
+qemuDomainObjPrivatePtr priv = vm-privateData;
+virDomainDeviceDefPtr dev;
+int ret = -1;
+size_t i;
+
+VIR_DEBUG(Rum domain restore ephemeral devices);
+
+for (i = 0; i  priv-nEphemeralDevices; i++) {
+dev = priv-ephemeralDevices[i];
+
+switch ((virDomainDeviceType) dev-type) {
+case VIR_DOMAIN_DEVICE_NET:
+if (live) {
+ret = qemuDomainAttachNetDevice(conn, driver, vm,
+dev-data.net);
+} else {
+ret = virDomainNetInsert(vm-def, dev-data.net);
+}
+
+if (!ret)
+dev-data.net = NULL;
+break;
+case VIR_DOMAIN_DEVICE_HOSTDEV:
+if (live) {
+ret = qemuDomainAttachHostDevice(conn, driver, vm,
+ dev-data.hostdev);
+   } else {
+ret =virDomainHostdevInsert(vm-def, dev-data.hostdev);
+}

This re-attach step is where we actually have far far far worse problems
than with detach. This is blindly assuming that the guest on the target
host can use the same hostdev that it was using on the source host.

(kind of pointless to comment on, since pkrempa has changed my opinion
by forcing me to think about the failure to reattach condition, but
could be useful info for others)

For a hostdev, yes, but not for interface type='network' (which
would point to a libvirt network pool of VFs).


This
is essentially useless in the real world.

Agreed (for plain hostdev)


Even if the same vendor/model
device is available on the target host, it is very unlikely to be available
at the same bus/slot/function that it was on the source. It is quite likely
neccessary to allocate a complete different NIC, or if using SRIOV allocate
a different function. It is also not uncommon to have different vendor/models,
so a completely different NIC may be required.

In the case of a network device, a different brand/model of NIC at a
different PCI address using a different guest driver shouldn't be a
problem for the guest, as long as the MAC address is the same (for a
Linux guest anyway; not sure what a Windows guest would do with a NIC
that had the same MAC but used a different driver). This points out the
folly of trying to do migration with attached hostdevs (managed at *any*
level), for anything other than SRIOV VFs (which can have their MAC
address set before attach, unlike non-SRIOV NICs).

.
So should we focus on implementing the feature that support migration 
with SRIOV

VFs at first?

I think that is simple to achieve my original target that implement NIC 
passthrough
device migration. because sometimes we assign a native NIC to guest to 
keep the
performance of network I/O, due to the MAC limitation of the non-SRIOV 
NICs, as

laine said the cost of SRIOV NIC is cheaper than what we try.

Thanks,
Chen


--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [RFC v1 4/6] migration: Migration support for ephemeral hostdevs

2015-05-13 Thread Chen Fan


On 05/13/2015 04:36 PM, Peter Krempa wrote:

On Wed, May 13, 2015 at 11:36:30 +0800, Chen Fan wrote:

add migration support for ephemeral host devices, introduce
two 'detach' and 'restore' functions to unplug/plug host devices
during migration.

Signed-off-by: Chen Fan chen.fan.f...@cn.fujitsu.com
---
  src/qemu/qemu_migration.c | 171 --
  src/qemu/qemu_migration.h |   9 +++
  src/qemu/qemu_process.c   |  11 +++
  3 files changed, 187 insertions(+), 4 deletions(-)

diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index 56112f9..d5a698f 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -3384,6 +3384,158 @@ qemuMigrationPrepareDef(virQEMUDriverPtr driver,
  return def;
  }
  
+int

+qemuMigrationDetachEphemeralDevices(virQEMUDriverPtr driver,
+virDomainObjPtr vm,
+bool live)
+{
+qemuDomainObjPrivatePtr priv = vm-privateData;
+virDomainHostdevDefPtr hostdev;
+virDomainNetDefPtr net;
+virDomainDeviceDef dev;
+virDomainDeviceDefPtr dev_copy = NULL;
+virCapsPtr caps = NULL;
+int actualType;
+int ret = -1;
+size_t i;
+
+VIR_DEBUG(Rum domain detach ephemeral devices);
+
+if (!(caps = virQEMUDriverGetCapabilities(driver, false)))
+return ret;
+
+for (i = 0; i  vm-def-nnets;) {
+net = vm-def-nets[i];
+
+actualType = virDomainNetGetActualType(net);
+if (actualType != VIR_DOMAIN_NET_TYPE_HOSTDEV) {
+i++;
+continue;
+}
+
+hostdev = virDomainNetGetActualHostdev(net);
+if (hostdev-mode != VIR_DOMAIN_HOSTDEV_MODE_SUBSYS ||
+hostdev-source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI 
||
+!hostdev-ephemeral) {
+i++;
+continue;
+}
+
+dev.type = VIR_DOMAIN_DEVICE_NET;
+dev.data.net = net;
+
+dev_copy = virDomainDeviceDefCopy(dev, vm-def,
+  caps, driver-xmlopt);
+if (!dev_copy)
+goto cleanup;
+
+if (live) {
+/* nnets reduced */
+if (qemuDomainDetachNetDevice(driver, vm, dev_copy)  0)
+goto cleanup;

So this is where the fun begins. qemuDomainDetachNetDevice is not
designed to be called this way since the detach API where it's used
normally returns 0 in the following two cases:

1) The detach was successfull, the guest removed the device
2) The detach request was successful, but guest did not remove the
device yet

In the latter case you need to wait for a event to successfully know
when the device was removed. Since this might very well happen the code
will need to be changed to take that option into account. Please note
that that step will make all the things really complicated.


did you said the event is DEVICE_DELETED ?
I saw the code  the funcition qemuDomainWaitForDeviceRemoval
has been used for waiting device removed from guest.

Thanks,
Chen




Peter



--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [RFC v1 4/6] migration: Migration support for ephemeral hostdevs

2015-05-12 Thread Chen Fan
add migration support for ephemeral host devices, introduce
two 'detach' and 'restore' functions to unplug/plug host devices
during migration.

Signed-off-by: Chen Fan chen.fan.f...@cn.fujitsu.com
---
 src/qemu/qemu_migration.c | 171 --
 src/qemu/qemu_migration.h |   9 +++
 src/qemu/qemu_process.c   |  11 +++
 3 files changed, 187 insertions(+), 4 deletions(-)

diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index 56112f9..d5a698f 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -3384,6 +3384,158 @@ qemuMigrationPrepareDef(virQEMUDriverPtr driver,
 return def;
 }
 
+int
+qemuMigrationDetachEphemeralDevices(virQEMUDriverPtr driver,
+virDomainObjPtr vm,
+bool live)
+{
+qemuDomainObjPrivatePtr priv = vm-privateData;
+virDomainHostdevDefPtr hostdev;
+virDomainNetDefPtr net;
+virDomainDeviceDef dev;
+virDomainDeviceDefPtr dev_copy = NULL;
+virCapsPtr caps = NULL;
+int actualType;
+int ret = -1;
+size_t i;
+
+VIR_DEBUG(Rum domain detach ephemeral devices);
+
+if (!(caps = virQEMUDriverGetCapabilities(driver, false)))
+return ret;
+
+for (i = 0; i  vm-def-nnets;) {
+net = vm-def-nets[i];
+
+actualType = virDomainNetGetActualType(net);
+if (actualType != VIR_DOMAIN_NET_TYPE_HOSTDEV) {
+i++;
+continue;
+}
+
+hostdev = virDomainNetGetActualHostdev(net);
+if (hostdev-mode != VIR_DOMAIN_HOSTDEV_MODE_SUBSYS ||
+hostdev-source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI 
||
+!hostdev-ephemeral) {
+i++;
+continue;
+}
+
+dev.type = VIR_DOMAIN_DEVICE_NET;
+dev.data.net = net;
+
+dev_copy = virDomainDeviceDefCopy(dev, vm-def,
+  caps, driver-xmlopt);
+if (!dev_copy)
+goto cleanup;
+
+if (live) {
+/* nnets reduced */
+if (qemuDomainDetachNetDevice(driver, vm, dev_copy)  0)
+goto cleanup;
+} else {
+virDomainNetDefFree(virDomainNetRemove(vm-def, i));
+}
+if (VIR_APPEND_ELEMENT(priv-ephemeralDevices,
+   priv-nEphemeralDevices,
+   dev_copy)  0) {
+goto cleanup;
+}
+dev_copy = NULL;
+}
+
+for (i = 0; i  vm-def-nhostdevs;) {
+hostdev = vm-def-hostdevs[i];
+
+if (hostdev-mode != VIR_DOMAIN_HOSTDEV_MODE_SUBSYS ||
+hostdev-source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI 
||
+!hostdev-ephemeral) {
+i++;
+continue;
+}
+
+dev.type = VIR_DOMAIN_DEVICE_HOSTDEV;
+dev.data.hostdev = hostdev;
+
+VIR_FREE(dev_copy);
+dev_copy = virDomainDeviceDefCopy(dev, vm-def,
+  caps, driver-xmlopt);
+if (!dev_copy)
+goto cleanup;
+
+if (live) {
+/* nhostdevs reduced */
+if (qemuDomainDetachHostDevice(driver, vm, dev_copy)  0)
+goto cleanup;
+} else {
+virDomainHostdevDefFree(virDomainHostdevRemove(vm-def, i));
+}
+if (VIR_APPEND_ELEMENT(priv-ephemeralDevices,
+   priv-nEphemeralDevices,
+   dev_copy)  0) {
+goto cleanup;
+}
+dev_copy = NULL;
+}
+
+ret = 0;
+ cleanup:
+virDomainDeviceDefFree(dev_copy);
+virObjectUnref(caps);
+
+return ret;
+}
+
+void
+qemuMigrationRestoreEphemeralDevices(virQEMUDriverPtr driver,
+ virConnectPtr conn,
+ virDomainObjPtr vm,
+ bool live)
+{
+qemuDomainObjPrivatePtr priv = vm-privateData;
+virDomainDeviceDefPtr dev;
+int ret = -1;
+size_t i;
+
+VIR_DEBUG(Rum domain restore ephemeral devices);
+
+for (i = 0; i  priv-nEphemeralDevices; i++) {
+dev = priv-ephemeralDevices[i];
+
+switch ((virDomainDeviceType) dev-type) {
+case VIR_DOMAIN_DEVICE_NET:
+if (live) {
+ret = qemuDomainAttachNetDevice(conn, driver, vm,
+dev-data.net);
+} else {
+ret = virDomainNetInsert(vm-def, dev-data.net);
+}
+
+if (!ret)
+dev-data.net = NULL;
+break;
+case VIR_DOMAIN_DEVICE_HOSTDEV:
+if (live) {
+ret = qemuDomainAttachHostDevice(conn, driver, vm,
+ dev-data.hostdev);
+   } else {
+ret =virDomainHostdevInsert(vm-def, dev-data.hostdev);
+}
+if (!ret)
+