This proposal introduces a new IRQ number space layout using static
numbers for all devices, depending on a device index, and a bitmap
allocator for the MSI IRQ numbers which are negotiated by the guest at
runtime.

As the VIO device model does not have a device index but a "reg"
property, we introduce a formula to compute an IRQ number from a "reg"
value. It should minimize most of the collisions.

The previous layout is kept in pre-3.1 machines raising the
'legacy_irq_allocation' machine class flag.

Signed-off-by: Cédric Le Goater <c...@kaod.org>
---
 include/hw/ppc/spapr.h     |  5 +++
 include/hw/ppc/spapr_irq.h | 32 +++++++++++++++++++
 hw/ppc/spapr.c             | 32 ++++++++++++++++++-
 hw/ppc/spapr_events.c      | 12 ++++---
 hw/ppc/spapr_irq.c         | 56 ++++++++++++++++++++++++++++++++
 hw/ppc/spapr_pci.c         | 29 +++++++++++++----
 hw/ppc/spapr_vio.c         | 65 ++++++++++++++++++++++++++++++++++----
 hw/ppc/Makefile.objs       |  2 +-
 8 files changed, 214 insertions(+), 19 deletions(-)
 create mode 100644 include/hw/ppc/spapr_irq.h
 create mode 100644 hw/ppc/spapr_irq.c

diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 7e5de1a6fd42..73067f5ee8aa 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -8,6 +8,7 @@
 #include "hw/ppc/spapr_drc.h"
 #include "hw/mem/pc-dimm.h"
 #include "hw/ppc/spapr_ovec.h"
+#include "hw/ppc/spapr_irq.h"
 
 struct VIOsPAPRBus;
 struct sPAPRPHBState;
@@ -101,6 +102,8 @@ struct sPAPRMachineClass {
     bool dr_lmb_enabled;       /* enable dynamic-reconfig/hotplug of LMBs */
     bool use_ohci_by_default;  /* use USB-OHCI instead of XHCI */
     bool pre_2_10_has_unused_icps;
+    bool legacy_irq_allocation;
+
     void (*phb_placement)(sPAPRMachineState *spapr, uint32_t index,
                           uint64_t *buid, hwaddr *pio, 
                           hwaddr *mmio32, hwaddr *mmio64,
@@ -167,6 +170,8 @@ struct sPAPRMachineState {
     char *kvm_type;
 
     const char *icp_type;
+    int32_t irq_map_nr;
+    unsigned long *irq_map;
 
     bool cmd_line_caps[SPAPR_CAP_NUM];
     sPAPRCapabilities def, eff, mig;
diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h
new file mode 100644
index 000000000000..6f7f50548809
--- /dev/null
+++ b/include/hw/ppc/spapr_irq.h
@@ -0,0 +1,32 @@
+/*
+ * QEMU PowerPC sPAPR IRQ backend definitions
+ *
+ * Copyright (c) 2018, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef HW_SPAPR_IRQ_H
+#define HW_SPAPR_IRQ_H
+
+/*
+ * IRQ range offsets per device type
+ */
+#define SPAPR_IRQ_EPOW       0x1000  /* XICS_IRQ_BASE offset */
+#define SPAPR_IRQ_HOTPLUG    0x1001
+#define SPAPR_IRQ_VIO        0x1100  /* 256 VIO devices */
+#define SPAPR_IRQ_PCI_LSI    0x1200  /* 32+ PHBs devices */
+
+#define SPAPR_IRQ_MSI        0x1300  /* Offset of the dynamic range covered
+                                      * by the bitmap allocator */
+
+typedef struct sPAPRMachineState sPAPRMachineState;
+
+void spapr_irq_msi_init(sPAPRMachineState *spapr, uint32_t nr_msis);
+int spapr_irq_msi_alloc(sPAPRMachineState *spapr, uint32_t num, bool align,
+                        Error **errp);
+void spapr_irq_msi_free(sPAPRMachineState *spapr, int irq, uint32_t num);
+void spapr_irq_msi_reset(sPAPRMachineState *spapr);
+
+#endif
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 421b2dd09b51..1794952d4e2a 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -189,6 +189,11 @@ static void xics_system_init(MachineState *machine, int 
nr_irqs, Error **errp)
     sPAPRMachineState *spapr = SPAPR_MACHINE(machine);
     Error *local_err = NULL;
 
+    /* Initialize the MSI IRQ allocator. */
+    if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
+        spapr_irq_msi_init(spapr, XICS_IRQ_BASE + nr_irqs - SPAPR_IRQ_MSI);
+    }
+
     if (kvm_enabled()) {
         if (machine_kernel_irqchip_allowed(machine) &&
             !xics_kvm_init(spapr, &local_err)) {
@@ -1636,6 +1641,10 @@ static void spapr_machine_reset(void)
         ppc_set_compat(first_ppc_cpu, spapr->max_compat_pvr, &error_fatal);
     }
 
+    if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
+        spapr_irq_msi_reset(spapr);
+    }
+
     qemu_devices_reset();
 
     /* DRC reset may cause a device to be unplugged. This will cause troubles
@@ -1910,6 +1919,24 @@ static const VMStateDescription vmstate_spapr_patb_entry 
= {
     },
 };
 
+static bool spapr_irq_map_needed(void *opaque)
+{
+    sPAPRMachineState *spapr = opaque;
+
+    return spapr->irq_map && !bitmap_empty(spapr->irq_map, spapr->irq_map_nr);
+}
+
+static const VMStateDescription vmstate_spapr_irq_map = {
+    .name = "spapr_irq_map",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = spapr_irq_map_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_BITMAP(irq_map, sPAPRMachineState, 0, irq_map_nr),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
 static const VMStateDescription vmstate_spapr = {
     .name = "spapr",
     .version_id = 3,
@@ -1937,6 +1964,7 @@ static const VMStateDescription vmstate_spapr = {
         &vmstate_spapr_cap_cfpc,
         &vmstate_spapr_cap_sbbc,
         &vmstate_spapr_cap_ibs,
+        &vmstate_spapr_irq_map,
         NULL
     }
 };
@@ -4067,7 +4095,9 @@ static void 
spapr_machine_3_0_instance_options(MachineState *machine)
 
 static void spapr_machine_3_0_class_options(MachineClass *mc)
 {
-    /* Defaults for the latest behaviour inherited from the base class */
+    sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
+    smc->legacy_irq_allocation = true;
 }
 
 DEFINE_SPAPR_MACHINE(3_0, "3.0", true);
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index e4f5946a2188..32719a1b72d0 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -707,9 +707,11 @@ void spapr_clear_pending_events(sPAPRMachineState *spapr)
 
 void spapr_events_init(sPAPRMachineState *spapr)
 {
-    int epow_irq;
+    int epow_irq = SPAPR_IRQ_EPOW;
 
-    epow_irq = spapr_irq_findone(spapr, &error_fatal);
+    if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
+        epow_irq = spapr_irq_findone(spapr, &error_fatal);
+    }
 
     spapr_irq_claim(spapr, epow_irq, false, &error_fatal);
 
@@ -729,9 +731,11 @@ void spapr_events_init(sPAPRMachineState *spapr)
      * checking that it's enabled.
      */
     if (spapr->use_hotplug_event_source) {
-        int hp_irq;
+        int hp_irq = SPAPR_IRQ_HOTPLUG;
 
-        hp_irq = spapr_irq_findone(spapr, &error_fatal);
+        if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
+            hp_irq = spapr_irq_findone(spapr, &error_fatal);
+        }
 
         spapr_irq_claim(spapr, hp_irq, false, &error_fatal);
 
diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
new file mode 100644
index 000000000000..24e9c1d4433c
--- /dev/null
+++ b/hw/ppc/spapr_irq.c
@@ -0,0 +1,56 @@
+/*
+ * QEMU PowerPC sPAPR IRQ interface
+ *
+ * Copyright (c) 2018, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "hw/ppc/spapr.h"
+#include "hw/ppc/xics.h"
+
+void spapr_irq_msi_init(sPAPRMachineState *spapr, uint32_t nr_msis)
+{
+    spapr->irq_map_nr = nr_msis;
+    spapr->irq_map = bitmap_new(spapr->irq_map_nr);
+}
+
+int spapr_irq_msi_alloc(sPAPRMachineState *spapr, uint32_t num, bool align,
+                        Error **errp)
+{
+    int irq;
+
+    /*
+     * The 'align_mask' parameter of bitmap_find_next_zero_area()
+     * should be one less than a power of 2; 0 means no
+     * alignment. Adapt the 'align' value of the former allocator
+     * to fit the requirements of bitmap_find_next_zero_area()
+     */
+    align -= 1;
+
+    irq = bitmap_find_next_zero_area(spapr->irq_map, spapr->irq_map_nr, 0, num,
+                                     align);
+    if (irq == spapr->irq_map_nr) {
+        error_setg(errp, "can't find a free %d-IRQ block", num);
+        return -1;
+    }
+
+    bitmap_set(spapr->irq_map, irq, num);
+
+    return irq + SPAPR_IRQ_MSI;
+}
+
+void spapr_irq_msi_free(sPAPRMachineState *spapr, int irq, uint32_t num)
+{
+    bitmap_clear(spapr->irq_map, irq - SPAPR_IRQ_MSI, num);
+}
+
+void spapr_irq_msi_reset(sPAPRMachineState *spapr)
+{
+    bitmap_clear(spapr->irq_map, 0, spapr->irq_map_nr);
+}
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 497b896c7d24..3791ced6c536 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -334,6 +334,9 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, 
sPAPRMachineState *spapr,
             return;
         }
 
+        if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
+            spapr_irq_msi_free(spapr, msi->first_irq, msi->num);
+        }
         spapr_irq_free(spapr, msi->first_irq, msi->num);
         if (msi_present(pdev)) {
             spapr_msi_setmsg(pdev, 0, false, 0, 0);
@@ -372,7 +375,13 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, 
sPAPRMachineState *spapr,
     }
 
     /* Allocate MSIs */
-    irq = spapr_irq_find(spapr, req_num, ret_intr_type == RTAS_TYPE_MSI, &err);
+    if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
+        irq = spapr_irq_find(spapr, req_num, ret_intr_type == RTAS_TYPE_MSI,
+                             &err);
+    } else {
+        irq = spapr_irq_msi_alloc(spapr, req_num,
+                                  ret_intr_type == RTAS_TYPE_MSI, &err);
+    }
     if (err) {
         error_reportf_err(err, "Can't allocate MSIs for device %x: ",
                           config_addr);
@@ -392,6 +401,9 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, 
sPAPRMachineState *spapr,
 
     /* Release previous MSIs */
     if (msi) {
+        if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
+            spapr_irq_msi_free(spapr, msi->first_irq, msi->num);
+        }
         spapr_irq_free(spapr, msi->first_irq, msi->num);
         g_hash_table_remove(phb->msi, &config_addr);
     }
@@ -1705,14 +1717,16 @@ static void spapr_phb_realize(DeviceState *dev, Error 
**errp)
 
     /* Initialize the LSI table */
     for (i = 0; i < PCI_NUM_PINS; i++) {
-        uint32_t irq;
+        uint32_t irq = SPAPR_IRQ_PCI_LSI + sphb->index * PCI_NUM_PINS + i;
         Error *local_err = NULL;
 
-        irq = spapr_irq_findone(spapr, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            error_prepend(errp, "can't allocate LSIs: ");
-            return;
+        if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
+            irq = spapr_irq_findone(spapr, &local_err);
+            if (local_err) {
+                error_propagate(errp, local_err);
+                error_prepend(errp, "can't allocate LSIs: ");
+                return;
+            }
         }
 
         spapr_irq_claim(spapr, irq, true, &local_err);
@@ -2123,6 +2137,7 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
     _FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof_ranges));
     _FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg)));
     _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pci-config-space-type", 0x1));
+    /* TODO: fine tune the total count of allocatable MSIs per PHB */
     _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pe-total-#msi", XICS_IRQS_SPAPR));
 
     /* Dynamic DMA window */
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index be9af71437cc..087862d3773a 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -37,12 +37,13 @@
 
 #include "hw/ppc/spapr.h"
 #include "hw/ppc/spapr_vio.h"
-#include "hw/ppc/xics.h"
 #include "hw/ppc/fdt.h"
 #include "trace.h"
 
 #include <libfdt.h>
 
+#define SPAPR_VIO_REG_BASE 0x71000000
+
 static void spapr_vio_get_irq(Object *obj, Visitor *v, const char *name,
                               void *opaque, Error **errp)
 {
@@ -445,6 +446,54 @@ static void spapr_vio_busdev_reset(DeviceState *qdev)
     }
 }
 
+/*
+ * The register property of a VIO device is defined in livirt using
+ * 0x1000 as a base register number plus a 0x1000 increment. For the
+ * VIO tty device, the base number is changed to 0x30000000. QEMU uses
+ * a base register number of 0x71000000 and then a simple increment.
+ *
+ * The formula below tries to compute a unique index number from the
+ * register value that will be used to define the IRQ number of the
+ * VIO device.
+ *
+ * A maximum of 256 VIO devices is covered. Collisions are possible
+ * but they will be detected when the IRQ is claimed.
+ */
+static inline uint32_t spapr_vio_reg_to_irq(uint32_t reg)
+{
+    uint32_t irq;
+
+    if (reg >= SPAPR_VIO_REG_BASE) {
+        /*
+         * VIO device register values when allocated by QEMU. For
+         * these, we simply mask the high bits to fit the overall
+         * range: [0x00 - 0xff].
+         *
+         * The nvram VIO device (reg=0x71000000) is a static device of
+         * the pseries machine and so is always allocated by QEMU. Its
+         * IRQ number is 0x0.
+         */
+        irq = reg & 0xff;
+
+    } else if (reg >= 0x30000000) {
+        /*
+         * VIO tty devices register values, when allocated by livirt,
+         * are mapped in range [0xf0 - 0xff], gives us a maximum of 16
+         * vtys.
+         */
+        irq = 0xf0 | ((reg >> 12) & 0xf);
+
+    } else {
+        /*
+         * Other VIO devices register values, when allocated by
+         * livirt, are mapped in range [0x00 - 0xef].
+         */
+        irq = (reg >> 12) & 0xef;
+    }
+
+    return SPAPR_IRQ_VIO | irq;
+}
+
 static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp)
 {
     sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
@@ -485,10 +534,14 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, 
Error **errp)
     }
 
     if (!dev->irq) {
-        dev->irq = spapr_irq_findone(spapr, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            return;
+        dev->irq = spapr_vio_reg_to_irq(dev->reg);
+
+        if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
+            dev->irq = spapr_irq_findone(spapr, &local_err);
+            if (local_err) {
+                error_propagate(errp, local_err);
+                return;
+            }
         }
     }
 
@@ -557,7 +610,7 @@ VIOsPAPRBus *spapr_vio_bus_init(void)
     /* Create bus on bridge device */
     qbus = qbus_create(TYPE_SPAPR_VIO_BUS, dev, "spapr-vio");
     bus = SPAPR_VIO_BUS(qbus);
-    bus->next_reg = 0x71000000;
+    bus->next_reg = SPAPR_VIO_REG_BASE;
 
     /* hcall-vio */
     spapr_register_hypercall(H_VIO_SIGNAL, h_vio_signal);
diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index bcab6323b7ed..4ab556467289 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -4,7 +4,7 @@ obj-y += ppc.o ppc_booke.o fdt.o
 obj-$(CONFIG_PSERIES) += spapr.o spapr_caps.o spapr_vio.o spapr_events.o
 obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
 obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o
-obj-$(CONFIG_PSERIES) += spapr_cpu_core.o spapr_ovec.o
+obj-$(CONFIG_PSERIES) += spapr_cpu_core.o spapr_ovec.o spapr_irq.o
 # IBM PowerNV
 obj-$(CONFIG_POWERNV) += pnv.o pnv_xscom.o pnv_core.o pnv_lpc.o pnv_psi.o 
pnv_occ.o pnv_bmc.o
 ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
-- 
2.17.1


Reply via email to