On Mon, Oct 03, 2016 at 11:24:53AM -0700, Jianjun Duan wrote:
> To manage hotplug/unplug of dynamic resources such as PCI cards,
> memory, and CPU on sPAPR guests, a firmware abstraction known as
> a Dynamic Resource Connector (DRC) is used to assign a particular
> dynamic resource to the guest, and provide an interface for the
> guest to manage configuration/removal of the resource associated
> with it.
> 
> To migrate the hotplugged resources in migration, the
> associated DRC state need be migrated. To migrate the DRC state,
> we defined the VMStateDescription struct for spapr_drc to enable
> the transmission of spapr_drc state in migration.
> 
> Not all the elements in the DRC state are migrated. Only those
> ones modifiable or needed by guest actions or device add/remove
> operation are migrated. From the perspective of device
> hotplugging, if we hotplug a device on the source, we need to
> "coldplug" it on the target. The states across two hosts for the
> same device are not the same. Ideally we want the states be same
> after migration so that the device would function as hotplugged
> on the target. For example we can unplug it. The minimum DRC
> state we need to transfer should cover all the pieces changed by
> hotplugging. Out of the elements of the DRC state, isolation_state,
> allocation_sate, and configured are involved in the DR state
> transition diagram from PAPR+ 2.7, 13.4. configured and signalled
> are needed in attaching and detaching devices. indicator_state
> provides users with hardware state information. These 6 elements
> are migrated.

Hmm.. are you saying that the DRC state of a coldplugged device (after
we've fully booted) is different from the DRC state of a hotplugged
device (after all the hotplug operations have fully completed)?

If that's correct that sounds like a general bug in the DRC state
management, not something only related to migration.

Looking at the code, though, that doesn't really seem to be what it's
doing.

> detach_cb in the DRC state is a function pointer that cannot be
> migrated. We set it right after DRC state is migrated so that
> a migrated hot-unplug event could finish its work.
> 
> The instance_id is used to identify objects in migration. We set
> instance_id of DRC using the unique index so that it is the same
> across migration.
> 
> Signed-off-by: Jianjun Duan <du...@linux.vnet.ibm.com>
> ---
>  hw/ppc/spapr_drc.c         | 69 
> ++++++++++++++++++++++++++++++++++++++++++++++
>  hw/ppc/spapr_pci.c         | 22 +++++++++++++++
>  include/hw/ppc/spapr_drc.h |  9 ++++++
>  3 files changed, 100 insertions(+)
> 
> diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
> index 6e54fd4..369ec02 100644
> --- a/hw/ppc/spapr_drc.c
> +++ b/hw/ppc/spapr_drc.c
> @@ -615,6 +615,71 @@ static void spapr_dr_connector_instance_init(Object *obj)
>                          NULL, NULL, NULL, NULL);
>  }
>  
> +static bool spapr_drc_needed(void *opaque)
> +{
> +    sPAPRDRConnector *drc = (sPAPRDRConnector *)opaque;
> +    sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
> +    bool rc = false;
> +    sPAPRDREntitySense value;
> +
> +    drck->entity_sense(drc, &value);
> +    /* If no dev is plugged in there is no need to migrate the DRC state */
> +    if (value != SPAPR_DR_ENTITY_SENSE_PRESENT) {
> +        return false;
> +    }
> +    /*
> +     * If there is dev plugged in, we need to migrate the DRC state when
> +     * it is different from cold-plugged state
> +     */
> +    switch(drc->type) {
> +    /* for PCI type */
> +    case SPAPR_DR_CONNECTOR_TYPE_PCI:
> +        rc = !((drc->isolation_state == SPAPR_DR_ISOLATION_STATE_UNISOLATED) 
> &&
> +               (drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_USABLE) &&
> +               drc->configured && drc->signalled && !drc->awaiting_release);
> +        break;
> +    /* for LMB type */
> +    case SPAPR_DR_CONNECTOR_TYPE_LMB:
> +        rc = !((drc->isolation_state == SPAPR_DR_ISOLATION_STATE_ISOLATED) &&
> +               (drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_UNUSABLE) 
> &&
> +               drc->configured && drc->signalled && !drc->awaiting_release);
> +        break;

What about CPU type?z

> +    default:
> +        ;
> +    }
> +
> +    return rc;
> +}
> +
> +/* detach_cb needs be set since it is not migrated */
> +static void postmigrate_set_detach_cb(sPAPRDRConnector *drc,
> +                                      spapr_drc_detach_cb *detach_cb)
> +{
> +    drc->detach_cb = detach_cb;
> +}
> +
> +/* return the unique drc index as instance_id for qom interfaces*/
> +static int get_instance_id(DeviceState *dev)
> +{
> +    return (int)get_index(SPAPR_DR_CONNECTOR(OBJECT(dev)));
> +}
> +
> +static const VMStateDescription vmstate_spapr_drc = {
> +    .name = "spapr_drc",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .needed = spapr_drc_needed,
> +    .fields  = (VMStateField []) {
> +        VMSTATE_UINT32(isolation_state, sPAPRDRConnector),
> +        VMSTATE_UINT32(allocation_state, sPAPRDRConnector),
> +        VMSTATE_UINT32(indicator_state, sPAPRDRConnector),
> +        VMSTATE_BOOL(configured, sPAPRDRConnector),
> +        VMSTATE_BOOL(awaiting_release, sPAPRDRConnector),
> +        VMSTATE_BOOL(signalled, sPAPRDRConnector),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
>  static void spapr_dr_connector_class_init(ObjectClass *k, void *data)
>  {
>      DeviceClass *dk = DEVICE_CLASS(k);
> @@ -623,6 +688,8 @@ static void spapr_dr_connector_class_init(ObjectClass *k, 
> void *data)
>      dk->reset = reset;
>      dk->realize = realize;
>      dk->unrealize = unrealize;
> +    dk->vmsd = &vmstate_spapr_drc;
> +    dk->dev_get_instance_id = get_instance_id;
>      drck->set_isolation_state = set_isolation_state;
>      drck->set_indicator_state = set_indicator_state;
>      drck->set_allocation_state = set_allocation_state;
> @@ -636,6 +703,8 @@ static void spapr_dr_connector_class_init(ObjectClass *k, 
> void *data)
>      drck->detach = detach;
>      drck->release_pending = release_pending;
>      drck->set_signalled = set_signalled;
> +    drck->postmigrate_set_detach_cb = postmigrate_set_detach_cb;
> +
>      /*
>       * Reason: it crashes FIXME find and document the real reason
>       */
> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> index 4f00865..080471c 100644
> --- a/hw/ppc/spapr_pci.c
> +++ b/hw/ppc/spapr_pci.c
> @@ -1594,11 +1594,33 @@ static void spapr_pci_pre_save(void *opaque)
>      }
>  }
>  
> +/*
> + * detach_cb in the DRC state is a function pointer that cannot be
> + * migrated. We set it right after migration so that a migrated
> + * hot-unplug event could finish its work.
> + */
> +static void spapr_pci_set_detach_cb(PCIBus *bus, PCIDevice *pdev,
> +                                 void *opaque)
> +{
> +    sPAPRPHBState *sphb = opaque;
> +    sPAPRDRConnector *drc = spapr_phb_get_pci_drc(sphb, pdev);
> +    sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
> +    drck->postmigrate_set_detach_cb(drc, spapr_phb_remove_pci_device_cb);

Why is spapr_phb_remove_pci_device_cb the right callback rather than
something else?  This doesn't seem sensible.

More to the point, you're not restoring detach_cb_opaque, which means
the detach_cb callback won't work properly anyway.

> +}
> +
>  static int spapr_pci_post_load(void *opaque, int version_id)
>  {
>      sPAPRPHBState *sphb = opaque;
>      gpointer key, value;
>      int i;
> +    PCIBus *bus = PCI_HOST_BRIDGE(sphb)->bus;
> +    unsigned int bus_no = 0;
> +
> +    /* Set detach_cb for the drc unconditionally after migration */
> +    if (bus) {
> +        pci_for_each_device(bus, pci_bus_num(bus), spapr_pci_set_detach_cb,
> +                            &bus_no);
> +    }
>  
>      for (i = 0; i < sphb->msi_devs_num; ++i) {
>          key = g_memdup(&sphb->msi_devs[i].key,
> diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h
> index fa531d5..17589c8 100644
> --- a/include/hw/ppc/spapr_drc.h
> +++ b/include/hw/ppc/spapr_drc.h
> @@ -192,6 +192,15 @@ typedef struct sPAPRDRConnectorClass {
>                     void *detach_cb_opaque, Error **errp);
>      bool (*release_pending)(sPAPRDRConnector *drc);
>      void (*set_signalled)(sPAPRDRConnector *drc);
> +
> +    /*
> +     * QEMU interface for setting detach_cb after migration.
> +     * detach_cb in the DRC state is a function pointer that cannot be
> +     * migrated. We set it right after migration so that a migrated
> +     * hot-unplug event could finish its work.
> +     */
> +    void (*postmigrate_set_detach_cb)(sPAPRDRConnector *drc,
> +                                      spapr_drc_detach_cb *detach_cb);
>  } sPAPRDRConnectorClass;
>  
>  sPAPRDRConnector *spapr_dr_connector_new(Object *owner,

-- 
David Gibson                    | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
                                | _way_ _around_!
http://www.ozlabs.org/~dgibson

Attachment: signature.asc
Description: PGP signature

Reply via email to