Quoting Michael Roth (2016-11-22 11:24:23) > Quoting Greg Kurz (2016-11-22 10:35:52) > > On Thu, 17 Nov 2016 19:40:26 -0600 > > Michael Roth <mdr...@linux.vnet.ibm.com> wrote: > > > > > From: Jianjun Duan <du...@linux.vnet.ibm.com> > > > > > > To manage hotplug/unplug of dynamic resources such as PCI cards, > > > memory, and CPU on sPAPR guests, a firmware abstraction known as > > > a Dynamic Resource Connector (DRC) is used to assign a particular > > > dynamic resource to the guest, and provide an interface for the > > > guest to manage configuration/removal of the resource associated > > > with it. > > > > > > To migrate the hotplugged resources in migration, the > > > associated DRC state need be migrated. To migrate the DRC state, > > > we defined the VMStateDescription struct for spapr_drc to enable > > > the transmission of spapr_drc state in migration. > > > > > > Not all the elements in the DRC state are migrated. Only those > > > ones modifiable or needed by guest actions or device add/remove > > > operation are migrated. From the perspective of device > > > hotplugging, if we hotplug a device on the source, we need to > > > "coldplug" it on the target. The states across two hosts for the > > > same device are not the same. Ideally we want the states be same > > > after migration so that the device would function as hotplugged > > > on the target. For example we can unplug it. The minimum DRC > > > state we need to transfer should cover all the pieces changed by > > > hotplugging. Out of the elements of the DRC state, isolation_state, > > > allocation_sate, and configured are involved in the DR state > > > transition diagram from PAPR+ 2.7, 13.4. configured and signalled > > > are needed in attaching and detaching devices. indicator_state > > > provides users with hardware state information. These 6 elements > > > are migrated. > > > > > > detach_cb in the DRC state is a function pointer that cannot be > > > migrated. We set it right after DRC state is migrated so that > > > a migrated hot-unplug event could finish its work. > > > > > > The instance_id is used to identify objects in migration. We set > > > instance_id of DRC using the unique index so that it is the same > > > across migration. > > > > > > Signed-off-by: Jianjun Duan <du...@linux.vnet.ibm.com> > > > * add migration for awaiting_allocation state > > > Signed-off-by: Michael Roth <mdr...@linux.vnet.ibm.com> > > > --- > > > hw/ppc/spapr_drc.c | 70 > > > ++++++++++++++++++++++++++++++++++++++++++++++ > > > hw/ppc/spapr_pci.c | 22 +++++++++++++++ > > > include/hw/ppc/spapr_drc.h | 9 ++++++ > > > 3 files changed, 101 insertions(+) > > > > > > diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c > > > index a0c44ee..1ec6551 100644 > > > --- a/hw/ppc/spapr_drc.c > > > +++ b/hw/ppc/spapr_drc.c > > > @@ -632,6 +632,72 @@ static void spapr_dr_connector_instance_init(Object > > > *obj) > > > NULL, NULL, NULL, NULL); > > > } > > > > > > +static bool spapr_drc_needed(void *opaque) > > > +{ > > > + sPAPRDRConnector *drc = (sPAPRDRConnector *)opaque; > > > + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); > > > + bool rc = false; > > > + sPAPRDREntitySense value; > > > + > > > + drck->entity_sense(drc, &value); > > > + /* If no dev is plugged in there is no need to migrate the DRC state > > > */ > > > + if (value != SPAPR_DR_ENTITY_SENSE_PRESENT) { > > > + return false; > > > + } > > > + /* > > > + * If there is dev plugged in, we need to migrate the DRC state when > > > + * it is different from cold-plugged state > > > + */ > > > + switch(drc->type) { > > > + /* for PCI type */ > > > + case SPAPR_DR_CONNECTOR_TYPE_PCI: > > > + rc = !((drc->isolation_state == > > > SPAPR_DR_ISOLATION_STATE_UNISOLATED) && > > > + (drc->allocation_state == > > > SPAPR_DR_ALLOCATION_STATE_USABLE) && > > > + drc->configured && drc->signalled && > > > !drc->awaiting_release); > > > + break; > > > + /* for LMB type */ > > > + case SPAPR_DR_CONNECTOR_TYPE_LMB: > > > + rc = !((drc->isolation_state == > > > SPAPR_DR_ISOLATION_STATE_ISOLATED) && > > > + (drc->allocation_state == > > > SPAPR_DR_ALLOCATION_STATE_UNUSABLE) && > > > + drc->configured && drc->signalled && > > > !drc->awaiting_release); > > > + break; > > > + default: > > > + ; > > > + } > > > + > > > + return rc; > > > +} > > > + > > > +/* detach_cb needs be set since it is not migrated */ > > > +static void postmigrate_set_detach_cb(sPAPRDRConnector *drc, > > > + spapr_drc_detach_cb *detach_cb) > > > +{ > > > + drc->detach_cb = detach_cb; > > > +} > > > + > > > +/* return the unique drc index as instance_id for qom interfaces*/ > > > +static int get_instance_id(DeviceState *dev) > > > +{ > > > + return (int)get_index(SPAPR_DR_CONNECTOR(OBJECT(dev))); > > > +} > > > + > > > +static const VMStateDescription vmstate_spapr_drc = { > > > + .name = "spapr_drc", > > > + .version_id = 1, > > > + .minimum_version_id = 1, > > > + .needed = spapr_drc_needed, > > > + .fields = (VMStateField []) { > > > + VMSTATE_UINT32(isolation_state, sPAPRDRConnector), > > > + VMSTATE_UINT32(allocation_state, sPAPRDRConnector), > > > + VMSTATE_UINT32(indicator_state, sPAPRDRConnector), > > > + VMSTATE_BOOL(configured, sPAPRDRConnector), > > > + VMSTATE_BOOL(awaiting_release, sPAPRDRConnector), > > > + VMSTATE_BOOL(awaiting_allocation, sPAPRDRConnector), > > > + VMSTATE_BOOL(signalled, sPAPRDRConnector), > > > + VMSTATE_END_OF_LIST() > > > + } > > > +}; > > > + > > > static void spapr_dr_connector_class_init(ObjectClass *k, void *data) > > > { > > > DeviceClass *dk = DEVICE_CLASS(k); > > > @@ -640,6 +706,8 @@ static void spapr_dr_connector_class_init(ObjectClass > > > *k, void *data) > > > dk->reset = reset; > > > dk->realize = realize; > > > dk->unrealize = unrealize; > > > + dk->vmsd = &vmstate_spapr_drc; > > > + dk->dev_get_instance_id = get_instance_id; > > > drck->set_isolation_state = set_isolation_state; > > > drck->set_indicator_state = set_indicator_state; > > > drck->set_allocation_state = set_allocation_state; > > > @@ -653,6 +721,8 @@ static void spapr_dr_connector_class_init(ObjectClass > > > *k, void *data) > > > drck->detach = detach; > > > drck->release_pending = release_pending; > > > drck->set_signalled = set_signalled; > > > + drck->postmigrate_set_detach_cb = postmigrate_set_detach_cb; > > > + > > > /* > > > * Reason: it crashes FIXME find and document the real reason > > > */ > > > diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c > > > index f9661b7..661f7d8 100644 > > > --- a/hw/ppc/spapr_pci.c > > > +++ b/hw/ppc/spapr_pci.c > > > @@ -1638,11 +1638,33 @@ static void spapr_pci_pre_save(void *opaque) > > > } > > > } > > > > > > +/* > > > + * detach_cb in the DRC state is a function pointer that cannot be > > > + * migrated. We set it right after migration so that a migrated > > > + * hot-unplug event could finish its work. > > > + */ > > > +static void spapr_pci_set_detach_cb(PCIBus *bus, PCIDevice *pdev, > > > + void *opaque) > > > +{ > > > + sPAPRPHBState *sphb = opaque; > > > + sPAPRDRConnector *drc = spapr_phb_get_pci_drc(sphb, pdev); > > > + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); > > > > So we assume here that all PCI devices have an associated DRC, which is > > of course wrong for coldplug devices. > > In spapr_phb_realize we do actually create a DRC for each possible PCI > device: > > /* allocate connectors for child PCI devices */ > if (sphb->dr_enabled) { > for (i = 0; i < PCI_SLOT_MAX * 8; i++) { > spapr_dr_connector_new(OBJECT(phb), > SPAPR_DR_CONNECTOR_TYPE_PCI, > (sphb->index << 16) | i); > } > } > > This is so coldplugged devices still have a mechanism for hot unplug > later. Is there another scenario that I'm missing? > > However, now that I notice the sphb->dr_enabled that does make me > concerned that this assumption will not hold for older machine types > with dr disabled. Will make sure to check on that (and the postcopy > test issue) before re-submitting.
I'm also just noticing that the post-migrate hook to set the detach_cb is only necessary for cases where migration occurs after device_del was issued on the source (but before the device has been released by the guest). This is part of the "race window" mentioned in the summary and not actually within the scope of what this series is trying to fix. In our scenario the device_del gets issued on the target side, which is when the detach_cb's are set prior to sending unplug event to guest, so we don't need the post-migrate hook. I'll go ahead and pull it out completely since there were other discussions about better ways to approach this anyway. > > > > > > + drck->postmigrate_set_detach_cb(drc, spapr_phb_remove_pci_device_cb); > > > +} > > > + > > > static int spapr_pci_post_load(void *opaque, int version_id) > > > { > > > sPAPRPHBState *sphb = opaque; > > > gpointer key, value; > > > int i; > > > + PCIBus *bus = PCI_HOST_BRIDGE(sphb)->bus; > > > + unsigned int bus_no = 0; > > > + > > > + /* Set detach_cb for the drc unconditionally after migration */ > > > + if (bus) { > > > + pci_for_each_device(bus, pci_bus_num(bus), > > > spapr_pci_set_detach_cb, > > > + &bus_no); > > > + } > > > > > > for (i = 0; i < sphb->msi_devs_num; ++i) { > > > key = g_memdup(&sphb->msi_devs[i].key, > > > diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h > > > index fa531d5..17589c8 100644 > > > --- a/include/hw/ppc/spapr_drc.h > > > +++ b/include/hw/ppc/spapr_drc.h > > > @@ -192,6 +192,15 @@ typedef struct sPAPRDRConnectorClass { > > > void *detach_cb_opaque, Error **errp); > > > bool (*release_pending)(sPAPRDRConnector *drc); > > > void (*set_signalled)(sPAPRDRConnector *drc); > > > + > > > + /* > > > + * QEMU interface for setting detach_cb after migration. > > > + * detach_cb in the DRC state is a function pointer that cannot be > > > + * migrated. We set it right after migration so that a migrated > > > + * hot-unplug event could finish its work. > > > + */ > > > + void (*postmigrate_set_detach_cb)(sPAPRDRConnector *drc, > > > + spapr_drc_detach_cb *detach_cb); > > > } sPAPRDRConnectorClass; > > > > > > sPAPRDRConnector *spapr_dr_connector_new(Object *owner, > >