On 05/30/2014 08:08 PM, Alexander Graf wrote: > > On 30.05.14 11:34, Alexey Kardashevskiy wrote: >> Currently SPAPR PHB keeps track of all allocated MSI (here and below >> MSI stands for both MSI and MSIX) interrupt because >> XICS used to be unable to reuse interrupts. This is a problem for >> dynamic MSI reconfiguration which happens when guest reloads a driver >> or performs PCI hotplug. Another problem is that the existing >> implementation can enable MSI on 32 devices maximum >> (SPAPR_MSIX_MAX_DEVS=32) and there is no good reason for that. >> >> This makes use of new XICS ability to reuse interrupts. >> >> This reorganizes MSI information storage in sPAPRPHBState. Instead of >> static array of 32 descriptors (one per a PCI function), this patch adds >> a GHashTable when @config_addr is a key and (first_irq, num) pair is >> a value. GHashTable can dynamically grow and shrink so the initial limit >> of 32 devices is gone. >> >> This changes migration stream as @msi_table was a static array while new >> @msi_devs is a dynamic hash table. This adds temporary array which is >> used for migration, it is populated in "spapr_pci"::pre_save() callback >> and expanded into the hash table in post_load() callback. Since >> the destination side does not know the number of MSI-enabled devices >> in advance and cannot pre-allocate the temporary array to receive >> migration state, this makes use of new VMSTATE_STRUCT_VARRAY_ALLOC macro >> which allocates the array automatically. >> >> This resets the MSI configuration space when interrupts are released by >> the ibm,change-msi RTAS call. >> >> This fixed traces to be more informative. >> >> This changes vmstate_spapr_pci_msi name from "...lsi" to "...msi" which >> was incorrect by accident. As the internal representation changed, >> thus bumps migration version number. >> >> Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru> >> --- >> Changes: >> v3: >> * used GHashTable >> * implemented temporary MSI state array for migration >> --- >> hw/ppc/spapr_pci.c | 195 >> +++++++++++++++++++++++++------------------- >> include/hw/pci-host/spapr.h | 21 +++-- >> trace-events | 5 +- >> 3 files changed, 129 insertions(+), 92 deletions(-) >> >> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c >> index a2f9677..48c9aad 100644 >> --- a/hw/ppc/spapr_pci.c >> +++ b/hw/ppc/spapr_pci.c >> @@ -220,36 +220,12 @@ static void rtas_write_pci_config(PowerPCCPU *cpu, >> sPAPREnvironment *spapr, >> } >> /* >> - * Find an entry with config_addr or returns the empty one if not found AND >> - * alloc_new is set. >> - * At the moment the msi_table entries are never released so there is >> - * no point to look till the end of the list if we need to find the free >> entry. >> - */ >> -static int spapr_msicfg_find(sPAPRPHBState *phb, uint32_t config_addr, >> - bool alloc_new) >> -{ >> - int i; >> - >> - for (i = 0; i < SPAPR_MSIX_MAX_DEVS; ++i) { >> - if (!phb->msi_table[i].nvec) { >> - break; >> - } >> - if (phb->msi_table[i].config_addr == config_addr) { >> - return i; >> - } >> - } >> - if ((i < SPAPR_MSIX_MAX_DEVS) && alloc_new) { >> - trace_spapr_pci_msi("Allocating new MSI config", i, config_addr); >> - return i; >> - } >> - >> - return -1; >> -} >> - >> -/* >> * Set MSI/MSIX message data. >> * This is required for msi_notify()/msix_notify() which >> * will write at the addresses via spapr_msi_write(). >> + * >> + * If hwaddr == 0, all entries will have .data == first_irq i.e. >> + * table will be reset. >> */ >> static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix, >> unsigned first_irq, unsigned req_num) >> @@ -263,9 +239,12 @@ static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr >> addr, bool msix, >> return; >> } >> - for (i = 0; i < req_num; ++i, ++msg.data) { >> + for (i = 0; i < req_num; ++i) { >> msix_set_message(pdev, i, msg); >> trace_spapr_pci_msi_setup(pdev->name, i, msg.address); >> + if (addr) { >> + ++msg.data; >> + } >> } >> } >> @@ -280,9 +259,12 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, >> sPAPREnvironment *spapr, >> unsigned int req_num = rtas_ld(args, 4); /* 0 == remove all */ >> unsigned int seq_num = rtas_ld(args, 5); >> unsigned int ret_intr_type; >> - int ndev, irq, max_irqs = 0; >> + unsigned int irq, max_irqs = 0, num = 0; >> sPAPRPHBState *phb = NULL; >> PCIDevice *pdev = NULL; >> + bool msix = false; >> + spapr_pci_msi *msi; >> + int *config_addr_key; >> switch (func) { >> case RTAS_CHANGE_MSI_FN: >> @@ -310,13 +292,18 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, >> sPAPREnvironment *spapr, >> /* Releasing MSIs */ >> if (!req_num) { >> - ndev = spapr_msicfg_find(phb, config_addr, false); >> - if (ndev < 0) { >> - trace_spapr_pci_msi("MSI has not been enabled", -1, >> config_addr); >> + msi = (spapr_pci_msi *) g_hash_table_lookup(phb->msi, >> &config_addr); >> + if (!msi) { >> + trace_spapr_pci_msi("Releasing wrong config", config_addr); >> rtas_st(rets, 0, RTAS_OUT_HW_ERROR); >> return; >> } >> - trace_spapr_pci_msi("Released MSIs", ndev, config_addr); >> + >> + xics_free(spapr->icp, msi->first_irq, msi->num); >> + spapr_msi_setmsg(pdev, 0, msix, 0, num); >> + g_hash_table_remove(phb->msi, &config_addr); > > Are you sure this doesn't have to be the exact same element? That pointer > is to the stack, not to the element.
I was not sure so I tested and it deletes element even if the key for g_hash_table_remove() is on stack. I looked at glibc and it should just work as it is now while I am providing a key_equal_func() callback to the map: http://sourcecodebrowser.com/glib2.0/2.12.4/ghash_8c.html#acff7e5fffc2572456a379d3d62d3e6ed >> + >> + trace_spapr_pci_msi("Released MSIs", config_addr); >> rtas_st(rets, 0, RTAS_OUT_SUCCESS); >> rtas_st(rets, 1, 0); >> return; >> @@ -324,15 +311,6 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, >> sPAPREnvironment *spapr, >> /* Enabling MSI */ >> - /* Find a device number in the map to add or reuse the existing >> one */ >> - ndev = spapr_msicfg_find(phb, config_addr, true); >> - if (ndev >= SPAPR_MSIX_MAX_DEVS || ndev < 0) { >> - error_report("No free entry for a new MSI device"); >> - rtas_st(rets, 0, RTAS_OUT_HW_ERROR); >> - return; >> - } >> - trace_spapr_pci_msi("Configuring MSI", ndev, config_addr); >> - >> /* Check if the device supports as many IRQs as requested */ >> if (ret_intr_type == RTAS_TYPE_MSI) { >> max_irqs = msi_nr_vectors_allocated(pdev); >> @@ -340,48 +318,47 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, >> sPAPREnvironment *spapr, >> max_irqs = pdev->msix_entries_nr; >> } >> if (!max_irqs) { >> - error_report("Requested interrupt type %d is not enabled for >> device#%d", >> - ret_intr_type, ndev); >> + error_report("Requested interrupt type %d is not enabled for >> device %x", >> + ret_intr_type, config_addr); >> rtas_st(rets, 0, -1); /* Hardware error */ >> return; >> } >> /* Correct the number if the guest asked for too many */ >> if (req_num > max_irqs) { >> + trace_spapr_pci_msi_retry(config_addr, req_num, max_irqs); >> req_num = max_irqs; >> + irq = 0; /* to avoid misleading trace */ >> + goto out; >> } >> - /* Check if there is an old config and MSI number has not changed */ >> - if (phb->msi_table[ndev].nvec && (req_num != >> phb->msi_table[ndev].nvec)) { >> - /* Unexpected behaviour */ >> - error_report("Cannot reuse MSI config for device#%d", ndev); >> + /* Allocate MSIs */ >> + irq = xics_alloc_block(spapr->icp, 0, req_num, false, >> + ret_intr_type == RTAS_TYPE_MSI); >> + if (!irq) { >> + error_report("Cannot allocate MSIs for device %x", config_addr); >> rtas_st(rets, 0, RTAS_OUT_HW_ERROR); >> return; >> } >> - /* There is no cached config, allocate MSIs */ >> - if (!phb->msi_table[ndev].nvec) { >> - irq = xics_alloc_block(spapr->icp, 0, req_num, false, >> - ret_intr_type == RTAS_TYPE_MSI); >> - if (irq < 0) { >> - error_report("Cannot allocate MSIs for device#%d", ndev); >> - rtas_st(rets, 0, RTAS_OUT_HW_ERROR); >> - return; >> - } >> - phb->msi_table[ndev].irq = irq; >> - phb->msi_table[ndev].nvec = req_num; >> - phb->msi_table[ndev].config_addr = config_addr; >> - } >> - >> /* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */ >> spapr_msi_setmsg(pdev, spapr->msi_win_addr, ret_intr_type == >> RTAS_TYPE_MSIX, >> - phb->msi_table[ndev].irq, req_num); >> + irq, req_num); >> + /* Add MSI device to cache */ >> + msi = g_new(spapr_pci_msi, 1); >> + msi->first_irq = irq; >> + msi->num = req_num; >> + config_addr_key = g_new(int, 1); > > gint? Same thing but yeah, better to stay solid. Is that it or you will have more comments after more careful review? :) Thanks! -- Alexey