On Tue, 22 Nov 2016 11:33:44 -0600 Michael Roth <mdr...@linux.vnet.ibm.com> wrote:
> Quoting Michael Roth (2016-11-22 11:24:23) > > Quoting Greg Kurz (2016-11-22 10:35:52) > > > On Thu, 17 Nov 2016 19:40:26 -0600 > > > Michael Roth <mdr...@linux.vnet.ibm.com> wrote: > > > > > > > From: Jianjun Duan <du...@linux.vnet.ibm.com> > > > > > > > > To manage hotplug/unplug of dynamic resources such as PCI cards, > > > > memory, and CPU on sPAPR guests, a firmware abstraction known as > > > > a Dynamic Resource Connector (DRC) is used to assign a particular > > > > dynamic resource to the guest, and provide an interface for the > > > > guest to manage configuration/removal of the resource associated > > > > with it. > > > > > > > > To migrate the hotplugged resources in migration, the > > > > associated DRC state need be migrated. To migrate the DRC state, > > > > we defined the VMStateDescription struct for spapr_drc to enable > > > > the transmission of spapr_drc state in migration. > > > > > > > > Not all the elements in the DRC state are migrated. Only those > > > > ones modifiable or needed by guest actions or device add/remove > > > > operation are migrated. From the perspective of device > > > > hotplugging, if we hotplug a device on the source, we need to > > > > "coldplug" it on the target. The states across two hosts for the > > > > same device are not the same. Ideally we want the states be same > > > > after migration so that the device would function as hotplugged > > > > on the target. For example we can unplug it. The minimum DRC > > > > state we need to transfer should cover all the pieces changed by > > > > hotplugging. Out of the elements of the DRC state, isolation_state, > > > > allocation_sate, and configured are involved in the DR state > > > > transition diagram from PAPR+ 2.7, 13.4. configured and signalled > > > > are needed in attaching and detaching devices. indicator_state > > > > provides users with hardware state information. These 6 elements > > > > are migrated. > > > > > > > > detach_cb in the DRC state is a function pointer that cannot be > > > > migrated. We set it right after DRC state is migrated so that > > > > a migrated hot-unplug event could finish its work. > > > > > > > > The instance_id is used to identify objects in migration. We set > > > > instance_id of DRC using the unique index so that it is the same > > > > across migration. > > > > > > > > Signed-off-by: Jianjun Duan <du...@linux.vnet.ibm.com> > > > > * add migration for awaiting_allocation state > > > > Signed-off-by: Michael Roth <mdr...@linux.vnet.ibm.com> > > > > --- > > > > hw/ppc/spapr_drc.c | 70 > > > > ++++++++++++++++++++++++++++++++++++++++++++++ > > > > hw/ppc/spapr_pci.c | 22 +++++++++++++++ > > > > include/hw/ppc/spapr_drc.h | 9 ++++++ > > > > 3 files changed, 101 insertions(+) > > > > > > > > diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c > > > > index a0c44ee..1ec6551 100644 > > > > --- a/hw/ppc/spapr_drc.c > > > > +++ b/hw/ppc/spapr_drc.c > > > > @@ -632,6 +632,72 @@ static void > > > > spapr_dr_connector_instance_init(Object *obj) > > > > NULL, NULL, NULL, NULL); > > > > } > > > > > > > > +static bool spapr_drc_needed(void *opaque) > > > > +{ > > > > + sPAPRDRConnector *drc = (sPAPRDRConnector *)opaque; > > > > + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); > > > > + bool rc = false; > > > > + sPAPRDREntitySense value; > > > > + > > > > + drck->entity_sense(drc, &value); > > > > + /* If no dev is plugged in there is no need to migrate the DRC > > > > state */ > > > > + if (value != SPAPR_DR_ENTITY_SENSE_PRESENT) { > > > > + return false; > > > > + } > > > > + /* > > > > + * If there is dev plugged in, we need to migrate the DRC state > > > > when > > > > + * it is different from cold-plugged state > > > > + */ > > > > + switch(drc->type) { > > > > + /* for PCI type */ > > > > + case SPAPR_DR_CONNECTOR_TYPE_PCI: > > > > + rc = !((drc->isolation_state == > > > > SPAPR_DR_ISOLATION_STATE_UNISOLATED) && > > > > + (drc->allocation_state == > > > > SPAPR_DR_ALLOCATION_STATE_USABLE) && > > > > + drc->configured && drc->signalled && > > > > !drc->awaiting_release); > > > > + break; > > > > + /* for LMB type */ > > > > + case SPAPR_DR_CONNECTOR_TYPE_LMB: > > > > + rc = !((drc->isolation_state == > > > > SPAPR_DR_ISOLATION_STATE_ISOLATED) && > > > > + (drc->allocation_state == > > > > SPAPR_DR_ALLOCATION_STATE_UNUSABLE) && > > > > + drc->configured && drc->signalled && > > > > !drc->awaiting_release); > > > > + break; > > > > + default: > > > > + ; > > > > + } > > > > + > > > > + return rc; > > > > +} > > > > + > > > > +/* detach_cb needs be set since it is not migrated */ > > > > +static void postmigrate_set_detach_cb(sPAPRDRConnector *drc, > > > > + spapr_drc_detach_cb *detach_cb) > > > > +{ > > > > + drc->detach_cb = detach_cb; > > > > +} > > > > + > > > > +/* return the unique drc index as instance_id for qom interfaces*/ > > > > +static int get_instance_id(DeviceState *dev) > > > > +{ > > > > + return (int)get_index(SPAPR_DR_CONNECTOR(OBJECT(dev))); > > > > +} > > > > + > > > > +static const VMStateDescription vmstate_spapr_drc = { > > > > + .name = "spapr_drc", > > > > + .version_id = 1, > > > > + .minimum_version_id = 1, > > > > + .needed = spapr_drc_needed, > > > > + .fields = (VMStateField []) { > > > > + VMSTATE_UINT32(isolation_state, sPAPRDRConnector), > > > > + VMSTATE_UINT32(allocation_state, sPAPRDRConnector), > > > > + VMSTATE_UINT32(indicator_state, sPAPRDRConnector), > > > > + VMSTATE_BOOL(configured, sPAPRDRConnector), > > > > + VMSTATE_BOOL(awaiting_release, sPAPRDRConnector), > > > > + VMSTATE_BOOL(awaiting_allocation, sPAPRDRConnector), > > > > + VMSTATE_BOOL(signalled, sPAPRDRConnector), > > > > + VMSTATE_END_OF_LIST() > > > > + } > > > > +}; > > > > + > > > > static void spapr_dr_connector_class_init(ObjectClass *k, void *data) > > > > { > > > > DeviceClass *dk = DEVICE_CLASS(k); > > > > @@ -640,6 +706,8 @@ static void > > > > spapr_dr_connector_class_init(ObjectClass *k, void *data) > > > > dk->reset = reset; > > > > dk->realize = realize; > > > > dk->unrealize = unrealize; > > > > + dk->vmsd = &vmstate_spapr_drc; > > > > + dk->dev_get_instance_id = get_instance_id; > > > > drck->set_isolation_state = set_isolation_state; > > > > drck->set_indicator_state = set_indicator_state; > > > > drck->set_allocation_state = set_allocation_state; > > > > @@ -653,6 +721,8 @@ static void > > > > spapr_dr_connector_class_init(ObjectClass *k, void *data) > > > > drck->detach = detach; > > > > drck->release_pending = release_pending; > > > > drck->set_signalled = set_signalled; > > > > + drck->postmigrate_set_detach_cb = postmigrate_set_detach_cb; > > > > + > > > > /* > > > > * Reason: it crashes FIXME find and document the real reason > > > > */ > > > > diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c > > > > index f9661b7..661f7d8 100644 > > > > --- a/hw/ppc/spapr_pci.c > > > > +++ b/hw/ppc/spapr_pci.c > > > > @@ -1638,11 +1638,33 @@ static void spapr_pci_pre_save(void *opaque) > > > > } > > > > } > > > > > > > > +/* > > > > + * detach_cb in the DRC state is a function pointer that cannot be > > > > + * migrated. We set it right after migration so that a migrated > > > > + * hot-unplug event could finish its work. > > > > + */ > > > > +static void spapr_pci_set_detach_cb(PCIBus *bus, PCIDevice *pdev, > > > > + void *opaque) > > > > +{ > > > > + sPAPRPHBState *sphb = opaque; > > > > + sPAPRDRConnector *drc = spapr_phb_get_pci_drc(sphb, pdev); > > > > + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); > > > > > > So we assume here that all PCI devices have an associated DRC, which is > > > of course wrong for coldplug devices. > > > > In spapr_phb_realize we do actually create a DRC for each possible PCI > > device: > > > > /* allocate connectors for child PCI devices */ > > if (sphb->dr_enabled) { > > for (i = 0; i < PCI_SLOT_MAX * 8; i++) { > > spapr_dr_connector_new(OBJECT(phb), > > SPAPR_DR_CONNECTOR_TYPE_PCI, > > (sphb->index << 16) | i); > > } > > } > > > > This is so coldplugged devices still have a mechanism for hot unplug > > later. Is there another scenario that I'm missing? > > > > However, now that I notice the sphb->dr_enabled that does make me > > concerned that this assumption will not hold for older machine types > > with dr disabled. Will make sure to check on that (and the postcopy > > test issue) before re-submitting. > > I'm also just noticing that the post-migrate hook to set the detach_cb > is only necessary for cases where migration occurs after device_del > was issued on the source (but before the device has been released > by the guest). This is part of the "race window" mentioned in the > summary and not actually within the scope of what this series is trying > to fix. In our scenario the device_del gets issued on the target side, > which is when the detach_cb's are set prior to sending unplug event to > guest, so we don't need the post-migrate hook. > > I'll go ahead and pull it out completely since there were other > discussions about better ways to approach this anyway. > Ok, I've now read the whole thread and I agree the specific case where migration happens while unplug is not yet finalized should be handled separately. Cheers. -- Greg > > > > > > > > > + drck->postmigrate_set_detach_cb(drc, > > > > spapr_phb_remove_pci_device_cb); > > > > +} > > > > + > > > > static int spapr_pci_post_load(void *opaque, int version_id) > > > > { > > > > sPAPRPHBState *sphb = opaque; > > > > gpointer key, value; > > > > int i; > > > > + PCIBus *bus = PCI_HOST_BRIDGE(sphb)->bus; > > > > + unsigned int bus_no = 0; > > > > + > > > > + /* Set detach_cb for the drc unconditionally after migration */ > > > > + if (bus) { > > > > + pci_for_each_device(bus, pci_bus_num(bus), > > > > spapr_pci_set_detach_cb, > > > > + &bus_no); > > > > + } > > > > > > > > for (i = 0; i < sphb->msi_devs_num; ++i) { > > > > key = g_memdup(&sphb->msi_devs[i].key, > > > > diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h > > > > index fa531d5..17589c8 100644 > > > > --- a/include/hw/ppc/spapr_drc.h > > > > +++ b/include/hw/ppc/spapr_drc.h > > > > @@ -192,6 +192,15 @@ typedef struct sPAPRDRConnectorClass { > > > > void *detach_cb_opaque, Error **errp); > > > > bool (*release_pending)(sPAPRDRConnector *drc); > > > > void (*set_signalled)(sPAPRDRConnector *drc); > > > > + > > > > + /* > > > > + * QEMU interface for setting detach_cb after migration. > > > > + * detach_cb in the DRC state is a function pointer that cannot be > > > > + * migrated. We set it right after migration so that a migrated > > > > + * hot-unplug event could finish its work. > > > > + */ > > > > + void (*postmigrate_set_detach_cb)(sPAPRDRConnector *drc, > > > > + spapr_drc_detach_cb *detach_cb); > > > > } sPAPRDRConnectorClass; > > > > > > > > sPAPRDRConnector *spapr_dr_connector_new(Object *owner, > > > > >