Ping, anyone?
On 07/23/2013 12:54 PM, Alexey Kardashevskiy wrote: > Michael, could you please review the patch as it is about PCI? Thanks! > > > On 07/12/2013 05:38 PM, Alexey Kardashevskiy wrote: >> On the sPAPR platform a guest allocates MSI/MSIX vectors via RTAS >> hypercalls which return global IRQ numbers to a guest so it only >> operates with those and never touches MSIMessage. >> >> Therefore MSIMessage handling is completely hidden in QEMU. >> >> Previously every sPAPR PCI host bridge implemented its own MSI window >> to catch msi_notify()/msix_notify() calls from QEMU devices (virtio-pci >> or vfio) and route them to the guest via qemu_pulse_irq(). >> MSIMessage used to be encoded as: >> .addr - address within the PHB MSI window; >> .data - the device index on PHB plus vector number. >> The MSI MR write function translated this MSIMessage to a global IRQ >> number and called qemu_pulse_irq(). >> >> However the total number of IRQs is not really big (at the moment it is >> 1024 IRQs starting from 4096) and even 16bit data field of MSIMessage >> seems to be enough to store an IRQ number there. >> >> This simplifies MSI handling in sPAPR PHB. Specifically, this does: >> 1. remove a MSI window from a PHB; >> 2. add a single memory region for all MSIs to sPAPREnvironment >> and spapr_pci_msi_init() to initialize it; >> 3. encode MSIMessage as: >> * .addr - a fixed address of SPAPR_PCI_MSI_WINDOW==0x40000000000ULL; >> * .data as an IRQ number. >> 4. change IRQ allocator to align first IRQ number in a block for MSI. >> MSI uses lower bits to specify the vector number so the first IRQ has to >> be aligned. MSIX does not need any special allocator though. >> >> Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru> >> --- >> hw/ppc/spapr.c | 29 ++++++++++++++++++--- >> hw/ppc/spapr_pci.c | 61 >> ++++++++++++++++++++------------------------- >> include/hw/pci-host/spapr.h | 8 +++--- >> include/hw/ppc/spapr.h | 4 ++- >> 4 files changed, 60 insertions(+), 42 deletions(-) >> >> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c >> index 671bbd9..6b21191 100644 >> --- a/hw/ppc/spapr.c >> +++ b/hw/ppc/spapr.c >> @@ -88,6 +88,9 @@ int spapr_allocate_irq(int hint, bool lsi) >> >> if (hint) { >> irq = hint; >> + if (hint >= spapr->next_irq) { >> + spapr->next_irq = hint + 1; >> + } >> /* FIXME: we should probably check for collisions somehow */ >> } else { >> irq = spapr->next_irq++; >> @@ -103,22 +106,39 @@ int spapr_allocate_irq(int hint, bool lsi) >> return irq; >> } >> >> -/* Allocate block of consequtive IRQs, returns a number of the first */ >> -int spapr_allocate_irq_block(int num, bool lsi) >> +/* >> + * Allocate block of consequtive IRQs, returns a number of the first. >> + * If msi==true, aligns the first IRQ number to num. >> + */ >> +int spapr_allocate_irq_block(int num, bool lsi, bool msi) >> { >> int first = -1; >> - int i; >> + int i, hint = 0; >> + >> + /* >> + * MSIMesage::data is used for storing VIRQ so >> + * it has to be aligned to num to support multiple >> + * MSI vectors. MSI-X is not affected by this. >> + * The hint is used for the first IRQ, the rest should >> + * be allocated continously. >> + */ >> + if (msi) { >> + assert((num == 1) || (num == 2) || (num == 4) || >> + (num == 8) || (num == 16) || (num == 32)); >> + hint = (spapr->next_irq + num - 1) & ~(num - 1); >> + } >> >> for (i = 0; i < num; ++i) { >> int irq; >> >> - irq = spapr_allocate_irq(0, lsi); >> + irq = spapr_allocate_irq(hint, lsi); >> if (!irq) { >> return -1; >> } >> >> if (0 == i) { >> first = irq; >> + hint = 0; >> } >> >> /* If the above doesn't create a consecutive block then that's >> @@ -1127,6 +1147,7 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args) >> spapr_create_nvram(spapr); >> >> /* Set up PCI */ >> + spapr_pci_msi_init(spapr, SPAPR_PCI_MSI_WINDOW); >> spapr_pci_rtas_init(); >> >> phb = spapr_create_phb(spapr, 0); >> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c >> index dfe4d04..c8ea777 100644 >> --- a/hw/ppc/spapr_pci.c >> +++ b/hw/ppc/spapr_pci.c >> @@ -258,11 +258,11 @@ static int spapr_msicfg_find(sPAPRPHBState *phb, >> uint32_t config_addr, >> * This is required for msi_notify()/msix_notify() which >> * will write at the addresses via spapr_msi_write(). >> */ >> -static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, >> - bool msix, unsigned req_num) >> +static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix, >> + unsigned first_irq, unsigned req_num) >> { >> unsigned i; >> - MSIMessage msg = { .address = addr, .data = 0 }; >> + MSIMessage msg = { .address = addr, .data = first_irq }; >> >> if (!msix) { >> msi_set_message(pdev, msg); >> @@ -270,8 +270,7 @@ static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr >> addr, >> return; >> } >> >> - for (i = 0; i < req_num; ++i) { >> - msg.address = addr | (i << 2); >> + for (i = 0; i < req_num; ++i, ++msg.data) { >> msix_set_message(pdev, i, msg); >> trace_spapr_pci_msi_setup(pdev->name, i, msg.address); >> } >> @@ -351,7 +350,8 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, >> sPAPREnvironment *spapr, >> >> /* There is no cached config, allocate MSIs */ >> if (!phb->msi_table[ndev].nvec) { >> - irq = spapr_allocate_irq_block(req_num, false); >> + irq = spapr_allocate_irq_block(req_num, false, >> + ret_intr_type == RTAS_TYPE_MSI); >> if (irq < 0) { >> fprintf(stderr, "Cannot allocate MSIs for device#%d", ndev); >> rtas_st(rets, 0, -1); /* Hardware error */ >> @@ -363,8 +363,8 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, >> sPAPREnvironment *spapr, >> } >> >> /* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */ >> - spapr_msi_setmsg(pdev, phb->msi_win_addr | (ndev << 16), >> - ret_intr_type == RTAS_TYPE_MSIX, req_num); >> + spapr_msi_setmsg(pdev, spapr->msi_win_addr, ret_intr_type == >> RTAS_TYPE_MSIX, >> + phb->msi_table[ndev].irq, req_num); >> >> rtas_st(rets, 0, 0); >> rtas_st(rets, 1, req_num); >> @@ -487,10 +487,7 @@ static const MemoryRegionOps spapr_io_ops = { >> static void spapr_msi_write(void *opaque, hwaddr addr, >> uint64_t data, unsigned size) >> { >> - sPAPRPHBState *phb = opaque; >> - int ndev = addr >> 16; >> - int vec = ((addr & 0xFFFF) >> 2) | data; >> - uint32_t irq = phb->msi_table[ndev].irq + vec; >> + uint32_t irq = data; >> >> trace_spapr_pci_msi_write(addr, data, irq); >> >> @@ -504,6 +501,23 @@ static const MemoryRegionOps spapr_msi_ops = { >> .endianness = DEVICE_LITTLE_ENDIAN >> }; >> >> +void spapr_pci_msi_init(sPAPREnvironment *spapr, hwaddr addr) >> +{ >> + /* >> + * As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors, >> + * we need to allocate some memory to catch those writes coming >> + * from msi_notify()/msix_notify(). >> + * As MSIMessage:addr is going to be the same and MSIMessage:data >> + * is going to be a VIRQ number, 4 bytes of the MSI MR will only >> + * be used. >> + */ >> + spapr->msi_win_addr = addr; >> + memory_region_init_io(&spapr->msiwindow, NULL, &spapr_msi_ops, spapr, >> + "msi", getpagesize()); >> + memory_region_add_subregion(get_system_memory(), spapr->msi_win_addr, >> + &spapr->msiwindow); >> +} >> + >> /* >> * PHB PCI device >> */ >> @@ -528,8 +542,7 @@ static int spapr_phb_init(SysBusDevice *s) >> >> if ((sphb->buid != -1) || (sphb->dma_liobn != -1) >> || (sphb->mem_win_addr != -1) >> - || (sphb->io_win_addr != -1) >> - || (sphb->msi_win_addr != -1)) { >> + || (sphb->io_win_addr != -1)) { >> fprintf(stderr, "Either \"index\" or other parameters must" >> " be specified for PAPR PHB, not both\n"); >> return -1; >> @@ -542,7 +555,6 @@ static int spapr_phb_init(SysBusDevice *s) >> + sphb->index * SPAPR_PCI_WINDOW_SPACING; >> sphb->mem_win_addr = windows_base + SPAPR_PCI_MMIO_WIN_OFF; >> sphb->io_win_addr = windows_base + SPAPR_PCI_IO_WIN_OFF; >> - sphb->msi_win_addr = windows_base + SPAPR_PCI_MSI_WIN_OFF; >> } >> >> if (sphb->buid == -1) { >> @@ -565,11 +577,6 @@ static int spapr_phb_init(SysBusDevice *s) >> return -1; >> } >> >> - if (sphb->msi_win_addr == -1) { >> - fprintf(stderr, "MSI window address not specified for PHB\n"); >> - return -1; >> - } >> - >> if (find_phb(spapr, sphb->buid)) { >> fprintf(stderr, "PCI host bridges must have unique BUIDs\n"); >> return -1; >> @@ -609,18 +616,6 @@ static int spapr_phb_init(SysBusDevice *s) >> namebuf, SPAPR_PCI_IO_WIN_SIZE); >> memory_region_add_subregion(get_system_memory(), sphb->io_win_addr, >> &sphb->iowindow); >> - >> - /* As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors, >> - * we need to allocate some memory to catch those writes coming >> - * from msi_notify()/msix_notify() */ >> - if (msi_supported) { >> - sprintf(namebuf, "%s.msi", sphb->dtbusname); >> - memory_region_init_io(&sphb->msiwindow, OBJECT(sphb), >> &spapr_msi_ops, sphb, >> - namebuf, SPAPR_MSIX_MAX_DEVS * 0x10000); >> - memory_region_add_subregion(get_system_memory(), sphb->msi_win_addr, >> - &sphb->msiwindow); >> - } >> - >> /* >> * Selecting a busname is more complex than you'd think, due to >> * interacting constraints. If the user has specified an id >> @@ -695,7 +690,6 @@ static Property spapr_phb_properties[] = { >> DEFINE_PROP_HEX64("io_win_addr", sPAPRPHBState, io_win_addr, -1), >> DEFINE_PROP_HEX64("io_win_size", sPAPRPHBState, io_win_size, >> SPAPR_PCI_IO_WIN_SIZE), >> - DEFINE_PROP_HEX64("msi_win_addr", sPAPRPHBState, msi_win_addr, -1), >> DEFINE_PROP_END_OF_LIST(), >> }; >> >> @@ -737,7 +731,6 @@ static const VMStateDescription vmstate_spapr_pci = { >> VMSTATE_UINT64_EQUAL(mem_win_size, sPAPRPHBState), >> VMSTATE_UINT64_EQUAL(io_win_addr, sPAPRPHBState), >> VMSTATE_UINT64_EQUAL(io_win_size, sPAPRPHBState), >> - VMSTATE_UINT64_EQUAL(msi_win_addr, sPAPRPHBState), >> VMSTATE_STRUCT_ARRAY(lsi_table, sPAPRPHBState, PCI_NUM_PINS, 0, >> vmstate_spapr_pci_lsi, struct spapr_pci_lsi), >> VMSTATE_STRUCT_ARRAY(msi_table, sPAPRPHBState, SPAPR_MSIX_MAX_DEVS, >> 0, >> diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h >> index 93f9511..970b4a9 100644 >> --- a/include/hw/pci-host/spapr.h >> +++ b/include/hw/pci-host/spapr.h >> @@ -43,8 +43,7 @@ typedef struct sPAPRPHBState { >> >> MemoryRegion memspace, iospace; >> hwaddr mem_win_addr, mem_win_size, io_win_addr, io_win_size; >> - hwaddr msi_win_addr; >> - MemoryRegion memwindow, iowindow, msiwindow; >> + MemoryRegion memwindow, iowindow; >> >> uint32_t dma_liobn; >> uint64_t dma_window_start; >> @@ -73,7 +72,8 @@ typedef struct sPAPRPHBState { >> #define SPAPR_PCI_MMIO_WIN_SIZE 0x20000000 >> #define SPAPR_PCI_IO_WIN_OFF 0x80000000 >> #define SPAPR_PCI_IO_WIN_SIZE 0x10000 >> -#define SPAPR_PCI_MSI_WIN_OFF 0x90000000 >> + >> +#define SPAPR_PCI_MSI_WINDOW 0x40000000000ULL >> >> #define SPAPR_PCI_MEM_WIN_BUS_OFFSET 0x80000000ULL >> >> @@ -88,6 +88,8 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, >> uint32_t xics_phandle, >> void *fdt); >> >> +void spapr_pci_msi_init(sPAPREnvironment *spapr, hwaddr addr); >> + >> void spapr_pci_rtas_init(void); >> >> #endif /* __HW_SPAPR_PCI_H__ */ >> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h >> index 7aa9a3d..aed02e1 100644 >> --- a/include/hw/ppc/spapr.h >> +++ b/include/hw/ppc/spapr.h >> @@ -14,6 +14,8 @@ struct icp_state; >> typedef struct sPAPREnvironment { >> struct VIOsPAPRBus *vio_bus; >> QLIST_HEAD(, sPAPRPHBState) phbs; >> + hwaddr msi_win_addr; >> + MemoryRegion msiwindow; >> struct sPAPRNVRAM *nvram; >> struct icp_state *icp; >> >> @@ -303,7 +305,7 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, >> target_ulong opcode, >> target_ulong *args); >> >> int spapr_allocate_irq(int hint, bool lsi); >> -int spapr_allocate_irq_block(int num, bool lsi); >> +int spapr_allocate_irq_block(int num, bool lsi, bool msi); >> >> static inline int spapr_allocate_msi(int hint) >> { >> > > -- Alexey