On 05/30/2014 08:08 PM, Alexander Graf wrote:
> 
> On 30.05.14 11:34, Alexey Kardashevskiy wrote:
>> Currently SPAPR PHB keeps track of all allocated MSI (here and below
>> MSI stands for both MSI and MSIX) interrupt because
>> XICS used to be unable to reuse interrupts. This is a problem for
>> dynamic MSI reconfiguration which happens when guest reloads a driver
>> or performs PCI hotplug. Another problem is that the existing
>> implementation can enable MSI on 32 devices maximum
>> (SPAPR_MSIX_MAX_DEVS=32) and there is no good reason for that.
>>
>> This makes use of new XICS ability to reuse interrupts.
>>
>> This reorganizes MSI information storage in sPAPRPHBState. Instead of
>> static array of 32 descriptors (one per a PCI function), this patch adds
>> a GHashTable when @config_addr is a key and (first_irq, num) pair is
>> a value. GHashTable can dynamically grow and shrink so the initial limit
>> of 32 devices is gone.
>>
>> This changes migration stream as @msi_table was a static array while new
>> @msi_devs is a dynamic hash table. This adds temporary array which is
>> used for migration, it is populated in "spapr_pci"::pre_save() callback
>> and expanded into the hash table in post_load() callback. Since
>> the destination side does not know the number of MSI-enabled devices
>> in advance and cannot pre-allocate the temporary array to receive
>> migration state, this makes use of new VMSTATE_STRUCT_VARRAY_ALLOC macro
>> which allocates the array automatically.
>>
>> This resets the MSI configuration space when interrupts are released by
>> the ibm,change-msi RTAS call.
>>
>> This fixed traces to be more informative.
>>
>> This changes vmstate_spapr_pci_msi name from "...lsi" to "...msi" which
>> was incorrect by accident. As the internal representation changed,
>> thus bumps migration version number.
>>
>> Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru>
>> ---
>> Changes:
>> v3:
>> * used GHashTable
>> * implemented temporary MSI state array for migration
>> ---
>>   hw/ppc/spapr_pci.c          | 195
>> +++++++++++++++++++++++++-------------------
>>   include/hw/pci-host/spapr.h |  21 +++--
>>   trace-events                |   5 +-
>>   3 files changed, 129 insertions(+), 92 deletions(-)
>>
>> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
>> index a2f9677..48c9aad 100644
>> --- a/hw/ppc/spapr_pci.c
>> +++ b/hw/ppc/spapr_pci.c
>> @@ -220,36 +220,12 @@ static void rtas_write_pci_config(PowerPCCPU *cpu,
>> sPAPREnvironment *spapr,
>>   }
>>     /*
>> - * Find an entry with config_addr or returns the empty one if not found AND
>> - * alloc_new is set.
>> - * At the moment the msi_table entries are never released so there is
>> - * no point to look till the end of the list if we need to find the free
>> entry.
>> - */
>> -static int spapr_msicfg_find(sPAPRPHBState *phb, uint32_t config_addr,
>> -                             bool alloc_new)
>> -{
>> -    int i;
>> -
>> -    for (i = 0; i < SPAPR_MSIX_MAX_DEVS; ++i) {
>> -        if (!phb->msi_table[i].nvec) {
>> -            break;
>> -        }
>> -        if (phb->msi_table[i].config_addr == config_addr) {
>> -            return i;
>> -        }
>> -    }
>> -    if ((i < SPAPR_MSIX_MAX_DEVS) && alloc_new) {
>> -        trace_spapr_pci_msi("Allocating new MSI config", i, config_addr);
>> -        return i;
>> -    }
>> -
>> -    return -1;
>> -}
>> -
>> -/*
>>    * Set MSI/MSIX message data.
>>    * This is required for msi_notify()/msix_notify() which
>>    * will write at the addresses via spapr_msi_write().
>> + *
>> + * If hwaddr == 0, all entries will have .data == first_irq i.e.
>> + * table will be reset.
>>    */
>>   static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix,
>>                                unsigned first_irq, unsigned req_num)
>> @@ -263,9 +239,12 @@ static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr
>> addr, bool msix,
>>           return;
>>       }
>>   -    for (i = 0; i < req_num; ++i, ++msg.data) {
>> +    for (i = 0; i < req_num; ++i) {
>>           msix_set_message(pdev, i, msg);
>>           trace_spapr_pci_msi_setup(pdev->name, i, msg.address);
>> +        if (addr) {
>> +            ++msg.data;
>> +        }
>>       }
>>   }
>>   @@ -280,9 +259,12 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu,
>> sPAPREnvironment *spapr,
>>       unsigned int req_num = rtas_ld(args, 4); /* 0 == remove all */
>>       unsigned int seq_num = rtas_ld(args, 5);
>>       unsigned int ret_intr_type;
>> -    int ndev, irq, max_irqs = 0;
>> +    unsigned int irq, max_irqs = 0, num = 0;
>>       sPAPRPHBState *phb = NULL;
>>       PCIDevice *pdev = NULL;
>> +    bool msix = false;
>> +    spapr_pci_msi *msi;
>> +    int *config_addr_key;
>>         switch (func) {
>>       case RTAS_CHANGE_MSI_FN:
>> @@ -310,13 +292,18 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu,
>> sPAPREnvironment *spapr,
>>         /* Releasing MSIs */
>>       if (!req_num) {
>> -        ndev = spapr_msicfg_find(phb, config_addr, false);
>> -        if (ndev < 0) {
>> -            trace_spapr_pci_msi("MSI has not been enabled", -1,
>> config_addr);
>> +        msi = (spapr_pci_msi *) g_hash_table_lookup(phb->msi,
>> &config_addr);
>> +        if (!msi) {
>> +            trace_spapr_pci_msi("Releasing wrong config", config_addr);
>>               rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
>>               return;
>>           }
>> -        trace_spapr_pci_msi("Released MSIs", ndev, config_addr);
>> +
>> +        xics_free(spapr->icp, msi->first_irq, msi->num);
>> +        spapr_msi_setmsg(pdev, 0, msix, 0, num);
>> +        g_hash_table_remove(phb->msi, &config_addr);
> 
> Are you sure this doesn't have to be the exact same element? That pointer
> is to the stack, not to the element.


I was not sure so I tested and it deletes element even if the key for
g_hash_table_remove() is on stack. I looked at glibc and it should just
work as it is now while I am providing a key_equal_func() callback to the map:
http://sourcecodebrowser.com/glib2.0/2.12.4/ghash_8c.html#acff7e5fffc2572456a379d3d62d3e6ed



>> +
>> +        trace_spapr_pci_msi("Released MSIs", config_addr);
>>           rtas_st(rets, 0, RTAS_OUT_SUCCESS);
>>           rtas_st(rets, 1, 0);
>>           return;
>> @@ -324,15 +311,6 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu,
>> sPAPREnvironment *spapr,
>>         /* Enabling MSI */
>>   -    /* Find a device number in the map to add or reuse the existing
>> one */
>> -    ndev = spapr_msicfg_find(phb, config_addr, true);
>> -    if (ndev >= SPAPR_MSIX_MAX_DEVS || ndev < 0) {
>> -        error_report("No free entry for a new MSI device");
>> -        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
>> -        return;
>> -    }
>> -    trace_spapr_pci_msi("Configuring MSI", ndev, config_addr);
>> -
>>       /* Check if the device supports as many IRQs as requested */
>>       if (ret_intr_type == RTAS_TYPE_MSI) {
>>           max_irqs = msi_nr_vectors_allocated(pdev);
>> @@ -340,48 +318,47 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu,
>> sPAPREnvironment *spapr,
>>           max_irqs = pdev->msix_entries_nr;
>>       }
>>       if (!max_irqs) {
>> -        error_report("Requested interrupt type %d is not enabled for
>> device#%d",
>> -                     ret_intr_type, ndev);
>> +        error_report("Requested interrupt type %d is not enabled for
>> device %x",
>> +                     ret_intr_type, config_addr);
>>           rtas_st(rets, 0, -1); /* Hardware error */
>>           return;
>>       }
>>       /* Correct the number if the guest asked for too many */
>>       if (req_num > max_irqs) {
>> +        trace_spapr_pci_msi_retry(config_addr, req_num, max_irqs);
>>           req_num = max_irqs;
>> +        irq = 0; /* to avoid misleading trace */
>> +        goto out;
>>       }
>>   -    /* Check if there is an old config and MSI number has not changed */
>> -    if (phb->msi_table[ndev].nvec && (req_num !=
>> phb->msi_table[ndev].nvec)) {
>> -        /* Unexpected behaviour */
>> -        error_report("Cannot reuse MSI config for device#%d", ndev);
>> +    /* Allocate MSIs */
>> +    irq = xics_alloc_block(spapr->icp, 0, req_num, false,
>> +                           ret_intr_type == RTAS_TYPE_MSI);
>> +    if (!irq) {
>> +        error_report("Cannot allocate MSIs for device %x", config_addr);
>>           rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
>>           return;
>>       }
>>   -    /* There is no cached config, allocate MSIs */
>> -    if (!phb->msi_table[ndev].nvec) {
>> -        irq = xics_alloc_block(spapr->icp, 0, req_num, false,
>> -                               ret_intr_type == RTAS_TYPE_MSI);
>> -        if (irq < 0) {
>> -            error_report("Cannot allocate MSIs for device#%d", ndev);
>> -            rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
>> -            return;
>> -        }
>> -        phb->msi_table[ndev].irq = irq;
>> -        phb->msi_table[ndev].nvec = req_num;
>> -        phb->msi_table[ndev].config_addr = config_addr;
>> -    }
>> -
>>       /* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */
>>       spapr_msi_setmsg(pdev, spapr->msi_win_addr, ret_intr_type ==
>> RTAS_TYPE_MSIX,
>> -                     phb->msi_table[ndev].irq, req_num);
>> +                     irq, req_num);
>>   +    /* Add MSI device to cache */
>> +    msi = g_new(spapr_pci_msi, 1);
>> +    msi->first_irq = irq;
>> +    msi->num = req_num;
>> +    config_addr_key = g_new(int, 1);
> 
> gint?

Same thing but yeah, better to stay solid.

Is that it or you will have more comments after more careful review? :) Thanks!



-- 
Alexey

Reply via email to