On Tue, 22 Nov 2016 11:33:44 -0600
Michael Roth <mdr...@linux.vnet.ibm.com> wrote:

> Quoting Michael Roth (2016-11-22 11:24:23)
> > Quoting Greg Kurz (2016-11-22 10:35:52)  
> > > On Thu, 17 Nov 2016 19:40:26 -0600
> > > Michael Roth <mdr...@linux.vnet.ibm.com> wrote:
> > >   
> > > > From: Jianjun Duan <du...@linux.vnet.ibm.com>
> > > > 
> > > > To manage hotplug/unplug of dynamic resources such as PCI cards,
> > > > memory, and CPU on sPAPR guests, a firmware abstraction known as
> > > > a Dynamic Resource Connector (DRC) is used to assign a particular
> > > > dynamic resource to the guest, and provide an interface for the
> > > > guest to manage configuration/removal of the resource associated
> > > > with it.
> > > > 
> > > > To migrate the hotplugged resources in migration, the
> > > > associated DRC state need be migrated. To migrate the DRC state,
> > > > we defined the VMStateDescription struct for spapr_drc to enable
> > > > the transmission of spapr_drc state in migration.
> > > > 
> > > > Not all the elements in the DRC state are migrated. Only those
> > > > ones modifiable or needed by guest actions or device add/remove
> > > > operation are migrated. From the perspective of device
> > > > hotplugging, if we hotplug a device on the source, we need to
> > > > "coldplug" it on the target. The states across two hosts for the
> > > > same device are not the same. Ideally we want the states be same
> > > > after migration so that the device would function as hotplugged
> > > > on the target. For example we can unplug it. The minimum DRC
> > > > state we need to transfer should cover all the pieces changed by
> > > > hotplugging. Out of the elements of the DRC state, isolation_state,
> > > > allocation_sate, and configured are involved in the DR state
> > > > transition diagram from PAPR+ 2.7, 13.4. configured and signalled
> > > > are needed in attaching and detaching devices. indicator_state
> > > > provides users with hardware state information. These 6 elements
> > > > are migrated.
> > > > 
> > > > detach_cb in the DRC state is a function pointer that cannot be
> > > > migrated. We set it right after DRC state is migrated so that
> > > > a migrated hot-unplug event could finish its work.
> > > > 
> > > > The instance_id is used to identify objects in migration. We set
> > > > instance_id of DRC using the unique index so that it is the same
> > > > across migration.
> > > > 
> > > > Signed-off-by: Jianjun Duan <du...@linux.vnet.ibm.com>
> > > > * add migration for awaiting_allocation state
> > > > Signed-off-by: Michael Roth <mdr...@linux.vnet.ibm.com>
> > > > ---
> > > >  hw/ppc/spapr_drc.c         | 70 
> > > > ++++++++++++++++++++++++++++++++++++++++++++++
> > > >  hw/ppc/spapr_pci.c         | 22 +++++++++++++++
> > > >  include/hw/ppc/spapr_drc.h |  9 ++++++
> > > >  3 files changed, 101 insertions(+)
> > > > 
> > > > diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
> > > > index a0c44ee..1ec6551 100644
> > > > --- a/hw/ppc/spapr_drc.c
> > > > +++ b/hw/ppc/spapr_drc.c
> > > > @@ -632,6 +632,72 @@ static void 
> > > > spapr_dr_connector_instance_init(Object *obj)
> > > >                          NULL, NULL, NULL, NULL);
> > > >  }
> > > >  
> > > > +static bool spapr_drc_needed(void *opaque)
> > > > +{
> > > > +    sPAPRDRConnector *drc = (sPAPRDRConnector *)opaque;
> > > > +    sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
> > > > +    bool rc = false;
> > > > +    sPAPRDREntitySense value;
> > > > +
> > > > +    drck->entity_sense(drc, &value);
> > > > +    /* If no dev is plugged in there is no need to migrate the DRC 
> > > > state */
> > > > +    if (value != SPAPR_DR_ENTITY_SENSE_PRESENT) {
> > > > +        return false;
> > > > +    }
> > > > +    /*
> > > > +     * If there is dev plugged in, we need to migrate the DRC state 
> > > > when
> > > > +     * it is different from cold-plugged state
> > > > +     */
> > > > +    switch(drc->type) {
> > > > +    /* for PCI type */
> > > > +    case SPAPR_DR_CONNECTOR_TYPE_PCI:
> > > > +        rc = !((drc->isolation_state == 
> > > > SPAPR_DR_ISOLATION_STATE_UNISOLATED) &&
> > > > +               (drc->allocation_state == 
> > > > SPAPR_DR_ALLOCATION_STATE_USABLE) &&
> > > > +               drc->configured && drc->signalled && 
> > > > !drc->awaiting_release);
> > > > +        break;
> > > > +    /* for LMB type */
> > > > +    case SPAPR_DR_CONNECTOR_TYPE_LMB:
> > > > +        rc = !((drc->isolation_state == 
> > > > SPAPR_DR_ISOLATION_STATE_ISOLATED) &&
> > > > +               (drc->allocation_state == 
> > > > SPAPR_DR_ALLOCATION_STATE_UNUSABLE) &&
> > > > +               drc->configured && drc->signalled && 
> > > > !drc->awaiting_release);
> > > > +        break;
> > > > +    default:
> > > > +        ;
> > > > +    }
> > > > +
> > > > +    return rc;
> > > > +}
> > > > +
> > > > +/* detach_cb needs be set since it is not migrated */
> > > > +static void postmigrate_set_detach_cb(sPAPRDRConnector *drc,
> > > > +                                      spapr_drc_detach_cb *detach_cb)
> > > > +{
> > > > +    drc->detach_cb = detach_cb;
> > > > +}
> > > > +
> > > > +/* return the unique drc index as instance_id for qom interfaces*/
> > > > +static int get_instance_id(DeviceState *dev)
> > > > +{
> > > > +    return (int)get_index(SPAPR_DR_CONNECTOR(OBJECT(dev)));
> > > > +}
> > > > +
> > > > +static const VMStateDescription vmstate_spapr_drc = {
> > > > +    .name = "spapr_drc",
> > > > +    .version_id = 1,
> > > > +    .minimum_version_id = 1,
> > > > +    .needed = spapr_drc_needed,
> > > > +    .fields  = (VMStateField []) {
> > > > +        VMSTATE_UINT32(isolation_state, sPAPRDRConnector),
> > > > +        VMSTATE_UINT32(allocation_state, sPAPRDRConnector),
> > > > +        VMSTATE_UINT32(indicator_state, sPAPRDRConnector),
> > > > +        VMSTATE_BOOL(configured, sPAPRDRConnector),
> > > > +        VMSTATE_BOOL(awaiting_release, sPAPRDRConnector),
> > > > +        VMSTATE_BOOL(awaiting_allocation, sPAPRDRConnector),
> > > > +        VMSTATE_BOOL(signalled, sPAPRDRConnector),
> > > > +        VMSTATE_END_OF_LIST()
> > > > +    }
> > > > +};
> > > > +
> > > >  static void spapr_dr_connector_class_init(ObjectClass *k, void *data)
> > > >  {
> > > >      DeviceClass *dk = DEVICE_CLASS(k);
> > > > @@ -640,6 +706,8 @@ static void 
> > > > spapr_dr_connector_class_init(ObjectClass *k, void *data)
> > > >      dk->reset = reset;
> > > >      dk->realize = realize;
> > > >      dk->unrealize = unrealize;
> > > > +    dk->vmsd = &vmstate_spapr_drc;
> > > > +    dk->dev_get_instance_id = get_instance_id;
> > > >      drck->set_isolation_state = set_isolation_state;
> > > >      drck->set_indicator_state = set_indicator_state;
> > > >      drck->set_allocation_state = set_allocation_state;
> > > > @@ -653,6 +721,8 @@ static void 
> > > > spapr_dr_connector_class_init(ObjectClass *k, void *data)
> > > >      drck->detach = detach;
> > > >      drck->release_pending = release_pending;
> > > >      drck->set_signalled = set_signalled;
> > > > +    drck->postmigrate_set_detach_cb = postmigrate_set_detach_cb;
> > > > +
> > > >      /*
> > > >       * Reason: it crashes FIXME find and document the real reason
> > > >       */
> > > > diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> > > > index f9661b7..661f7d8 100644
> > > > --- a/hw/ppc/spapr_pci.c
> > > > +++ b/hw/ppc/spapr_pci.c
> > > > @@ -1638,11 +1638,33 @@ static void spapr_pci_pre_save(void *opaque)
> > > >      }
> > > >  }
> > > >  
> > > > +/*
> > > > + * detach_cb in the DRC state is a function pointer that cannot be
> > > > + * migrated. We set it right after migration so that a migrated
> > > > + * hot-unplug event could finish its work.
> > > > + */
> > > > +static void spapr_pci_set_detach_cb(PCIBus *bus, PCIDevice *pdev,
> > > > +                                 void *opaque)
> > > > +{
> > > > +    sPAPRPHBState *sphb = opaque;
> > > > +    sPAPRDRConnector *drc = spapr_phb_get_pci_drc(sphb, pdev);
> > > > +    sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);  
> > > 
> > > So we assume here that all PCI devices have an associated DRC, which is
> > > of course wrong for coldplug devices.  
> > 
> > In spapr_phb_realize we do actually create a DRC for each possible PCI
> > device:
> > 
> >    /* allocate connectors for child PCI devices */
> >     if (sphb->dr_enabled) {
> >         for (i = 0; i < PCI_SLOT_MAX * 8; i++) {
> >             spapr_dr_connector_new(OBJECT(phb),
> >                                    SPAPR_DR_CONNECTOR_TYPE_PCI,
> >                                    (sphb->index << 16) | i);
> >         }
> >     }
> > 
> > This is so coldplugged devices still have a mechanism for hot unplug
> > later. Is there another scenario that I'm missing?
> > 
> > However, now that I notice the sphb->dr_enabled that does make me
> > concerned that this assumption will not hold for older machine types
> > with dr disabled. Will make sure to check on that (and the postcopy
> > test issue) before re-submitting.  
> 
> I'm also just noticing that the post-migrate hook to set the detach_cb
> is only necessary for cases where migration occurs after device_del
> was issued on the source (but before the device has been released
> by the guest).  This is part of the "race window" mentioned in the
> summary and not actually within the scope of what this series is trying
> to fix. In our scenario the device_del gets issued on the target side,
> which is when the detach_cb's are set prior to sending unplug event to
> guest, so we don't need the post-migrate hook.
> 
> I'll go ahead and pull it out completely since there were other
> discussions about better ways to approach this anyway.
> 

Ok, I've now read the whole thread and I agree the specific case where
migration happens while unplug is not yet finalized should be handled
separately.

Cheers.

--
Greg

> >   
> > >   
> > > > +    drck->postmigrate_set_detach_cb(drc, 
> > > > spapr_phb_remove_pci_device_cb);
> > > > +}
> > > > +
> > > >  static int spapr_pci_post_load(void *opaque, int version_id)
> > > >  {
> > > >      sPAPRPHBState *sphb = opaque;
> > > >      gpointer key, value;
> > > >      int i;
> > > > +    PCIBus *bus = PCI_HOST_BRIDGE(sphb)->bus;
> > > > +    unsigned int bus_no = 0;
> > > > +
> > > > +    /* Set detach_cb for the drc unconditionally after migration */
> > > > +    if (bus) {
> > > > +        pci_for_each_device(bus, pci_bus_num(bus), 
> > > > spapr_pci_set_detach_cb,
> > > > +                            &bus_no);
> > > > +    }
> > > >  
> > > >      for (i = 0; i < sphb->msi_devs_num; ++i) {
> > > >          key = g_memdup(&sphb->msi_devs[i].key,
> > > > diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h
> > > > index fa531d5..17589c8 100644
> > > > --- a/include/hw/ppc/spapr_drc.h
> > > > +++ b/include/hw/ppc/spapr_drc.h
> > > > @@ -192,6 +192,15 @@ typedef struct sPAPRDRConnectorClass {
> > > >                     void *detach_cb_opaque, Error **errp);
> > > >      bool (*release_pending)(sPAPRDRConnector *drc);
> > > >      void (*set_signalled)(sPAPRDRConnector *drc);
> > > > +
> > > > +    /*
> > > > +     * QEMU interface for setting detach_cb after migration.
> > > > +     * detach_cb in the DRC state is a function pointer that cannot be
> > > > +     * migrated. We set it right after migration so that a migrated
> > > > +     * hot-unplug event could finish its work.
> > > > +     */
> > > > +    void (*postmigrate_set_detach_cb)(sPAPRDRConnector *drc,
> > > > +                                      spapr_drc_detach_cb *detach_cb);
> > > >  } sPAPRDRConnectorClass;
> > > >  
> > > >  sPAPRDRConnector *spapr_dr_connector_new(Object *owner,  
> > >   
> 
> 


Reply via email to