On Sun, 29 Oct 2017 19:12:10 +0100 Cédric Le Goater <c...@kaod.org> wrote:
> Currently, the ICSState 'ics' object of the sPAPR machine acts as the > global interrupt source handler and also as the IRQ number allocator > for the machine. Some IRQ numbers are allocated very early in the > machine initialization sequence to populate the device tree, and this > is a problem to introduce the new POWER XIVE interrupt model, as it > needs to share the IRQ numbers with the older model. > > To prepare ground for XIVE, here is a proposal adding a set of new > XICSFabric operations to let the machine handle directly the IRQ > number allocation and to decorrelate the allocation from the interrupt > source object : > > bool (*irq_test)(XICSFabric *xi, int irq); > int (*irq_alloc_block)(XICSFabric *xi, int count, int align); > void (*irq_free_block)(XICSFabric *xi, int irq, int num); > > In these prototypes, the 'irq' parameter refers to a number in the > global IRQ number space. Indexes for arrays storing different state > informations on the interrupts, like the ICSIRQState, are named > 'srcno'. > > On the sPAPR platform, these IRQ operations are simply backed by a > bitmap 'irq_map' in the machine. > > 'irq_base' is a base number in sync with the ICSState 'offset'. It > lets us allocate only the subset of the IRQ numbers used on the sPAPR > platform but we could also choose to waste some extra bytes (512) and > allocate the whole number space. 'nr_irqs' is the total number of > IRQs, required to manipulate the bitmap. > > Signed-off-by: Cédric Le Goater <c...@kaod.org> > --- Hi, The idea makes sense but this patch brings too many changes IMHO. > hw/intc/xics.c | 3 ++- > hw/intc/xics_spapr.c | 57 ++++++++++++---------------------------------- > hw/ppc/spapr.c | 62 > +++++++++++++++++++++++++++++++++++++++++++++++++- > include/hw/ppc/spapr.h | 4 ++++ > include/hw/ppc/xics.h | 4 ++++ > 5 files changed, 85 insertions(+), 45 deletions(-) > > diff --git a/hw/intc/xics.c b/hw/intc/xics.c > index cc9816e7f204..2c4899f278e2 100644 > --- a/hw/intc/xics.c > +++ b/hw/intc/xics.c > @@ -53,6 +53,7 @@ void icp_pic_print_info(ICPState *icp, Monitor *mon) > void ics_pic_print_info(ICSState *ics, Monitor *mon) > { > uint32_t i; > + XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(ics->xics); > > monitor_printf(mon, "ICS %4x..%4x %p\n", > ics->offset, ics->offset + ics->nr_irqs - 1, ics); > @@ -64,7 +65,7 @@ void ics_pic_print_info(ICSState *ics, Monitor *mon) > for (i = 0; i < ics->nr_irqs; i++) { > ICSIRQState *irq = ics->irqs + i; > > - if (!(irq->flags & XICS_FLAGS_IRQ_MASK)) { > + if (!xic->irq_test(ics->xics, i + ics->offset)) { > continue; > } > monitor_printf(mon, " %4x %s %02x %02x\n", > diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c > index d98ea8b13068..f2e20bca5b2e 100644 > --- a/hw/intc/xics_spapr.c > +++ b/hw/intc/xics_spapr.c > @@ -245,50 +245,23 @@ void xics_spapr_init(sPAPRMachineState *spapr) > spapr_register_hypercall(H_IPOLL, h_ipoll); > } > > -#define ICS_IRQ_FREE(ics, srcno) \ > - (!((ics)->irqs[(srcno)].flags & (XICS_FLAGS_IRQ_MASK))) > - > -static int ics_find_free_block(ICSState *ics, int num, int alignnum) > -{ > - int first, i; > - > - for (first = 0; first < ics->nr_irqs; first += alignnum) { > - if (num > (ics->nr_irqs - first)) { > - return -1; > - } > - for (i = first; i < first + num; ++i) { > - if (!ICS_IRQ_FREE(ics, i)) { > - break; > - } > - } > - if (i == (first + num)) { > - return first; > - } > - } > - > - return -1; > -} > - > int spapr_ics_alloc(ICSState *ics, int irq_hint, bool lsi, Error **errp) > { > int irq; > + XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(ics->xics); > > - if (!ics) { > - return -1; > - } If spapr_ics_alloc() is never called with ics == NULL, then you should assert. Also, this looks like this deserves a separate patch. > if (irq_hint) { > - if (!ICS_IRQ_FREE(ics, irq_hint - ics->offset)) { > + if (xic->irq_test(ics->xics, irq_hint)) { > error_setg(errp, "can't allocate IRQ %d: already in use", > irq_hint); > return -1; > } > irq = irq_hint; > } else { > - irq = ics_find_free_block(ics, 1, 1); > + irq = xic->irq_alloc_block(ics->xics, 1, 0); > if (irq < 0) { > error_setg(errp, "can't allocate IRQ: no IRQ left"); > return -1; > } > - irq += ics->offset; > } > > ics_set_irq_type(ics, irq - ics->offset, lsi); > @@ -305,10 +278,8 @@ int spapr_ics_alloc_block(ICSState *ics, int num, bool > lsi, > bool align, Error **errp) > { > int i, first = -1; > + XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(ics->xics); > > - if (!ics) { > - return -1; > - } > Ditto. > /* > * MSIMesage::data is used for storing VIRQ so > @@ -320,9 +291,9 @@ int spapr_ics_alloc_block(ICSState *ics, int num, bool > lsi, > if (align) { > assert((num == 1) || (num == 2) || (num == 4) || > (num == 8) || (num == 16) || (num == 32)); > - first = ics_find_free_block(ics, num, num); > + first = xic->irq_alloc_block(ics->xics, num, num); > } else { > - first = ics_find_free_block(ics, num, 1); > + first = xic->irq_alloc_block(ics->xics, num, 0); > } > if (first < 0) { > error_setg(errp, "can't find a free %d-IRQ block", num); > @@ -331,25 +302,25 @@ int spapr_ics_alloc_block(ICSState *ics, int num, bool > lsi, > > if (first >= 0) { It looks like this check isn't needed since we return in the first < 0 case. Maybe you can fix this in a preliminary patch ? > for (i = first; i < first + num; ++i) { > - ics_set_irq_type(ics, i, lsi); > + ics_set_irq_type(ics, i - ics->offset, lsi); > } > } > - first += ics->offset; > > trace_xics_alloc_block(first, num, lsi, align); > > return first; > } > > -static void ics_free(ICSState *ics, int srcno, int num) > +static void ics_free(ICSState *ics, int irq, int num) > { > int i; > + XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(ics->xics); > > - for (i = srcno; i < srcno + num; ++i) { > - if (ICS_IRQ_FREE(ics, i)) { > - trace_xics_ics_free_warn(0, i + ics->offset); > + for (i = irq; i < irq + num; ++i) { > + if (xic->irq_test(ics->xics, i)) { > + trace_xics_ics_free_warn(0, i); > } > - memset(&ics->irqs[i], 0, sizeof(ICSIRQState)); > + xic->irq_free_block(ics->xics, i, 1); > } > } > > @@ -357,7 +328,7 @@ void spapr_ics_free(ICSState *ics, int irq, int num) > { > if (ics_valid_irq(ics, irq)) { > trace_xics_ics_free(0, irq, num); > - ics_free(ics, irq - ics->offset, num); > + ics_free(ics, irq, num); > } > } > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > index d682f013d422..88da4bad2328 100644 > --- a/hw/ppc/spapr.c > +++ b/hw/ppc/spapr.c > @@ -1681,6 +1681,24 @@ static const VMStateDescription > vmstate_spapr_patb_entry = { > }, > }; > > +static bool spapr_irq_map_needed(void *opaque) > +{ > + sPAPRMachineState *spapr = opaque; > + > + return find_first_bit(spapr->irq_map, spapr->nr_irqs) < spapr->nr_irqs; > +} This will allow migration from older QEMU. Maybe you can add a machine property so that the subsection is only generated for newer pseries, and you'll support migration to older QEMU (see details at the end of === Subsections === in docs/devel/migration.txt). > + > +static const VMStateDescription vmstate_spapr_irq_map = { > + .name = "spapr_irq_map", > + .version_id = 0, > + .minimum_version_id = 0, > + .needed = spapr_irq_map_needed, > + .fields = (VMStateField[]) { > + VMSTATE_BITMAP(irq_map, sPAPRMachineState, 0, nr_irqs), > + VMSTATE_END_OF_LIST() > + }, > +}; > + > static const VMStateDescription vmstate_spapr = { > .name = "spapr", > .version_id = 3, > @@ -1700,6 +1718,7 @@ static const VMStateDescription vmstate_spapr = { > &vmstate_spapr_ov5_cas, > &vmstate_spapr_patb_entry, > &vmstate_spapr_pending_events, > + &vmstate_spapr_irq_map, > NULL > } > }; > @@ -2337,8 +2356,13 @@ static void ppc_spapr_init(MachineState *machine) > /* Setup a load limit for the ramdisk leaving room for SLOF and FDT */ > load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD; > > + /* Initialize the IRQ allocator */ > + spapr->nr_irqs = XICS_IRQS_SPAPR; > + spapr->irq_map = bitmap_new(spapr->nr_irqs); > + spapr->irq_base = XICS_IRQ_BASE; > + > /* Set up Interrupt Controller before we create the VCPUs */ > - xics_system_init(machine, XICS_IRQS_SPAPR, &error_fatal); > + xics_system_init(machine, spapr->nr_irqs, &error_fatal); > > /* Set up containers for ibm,client-architecture-support negotiated > options > */ > @@ -3536,6 +3560,38 @@ static ICPState *spapr_icp_get(XICSFabric *xi, int > vcpu_id) > return cpu ? ICP(cpu->intc) : NULL; > } > > +static bool spapr_irq_test(XICSFabric *xi, int irq) > +{ > + sPAPRMachineState *spapr = SPAPR_MACHINE(xi); > + int srcno = irq - spapr->irq_base; > + > + return test_bit(srcno, spapr->irq_map); > +} > + > +static int spapr_irq_alloc_block(XICSFabric *xi, int count, int align) > +{ > + sPAPRMachineState *spapr = SPAPR_MACHINE(xi); > + int start = 0; > + int srcno; > + > + srcno = bitmap_find_next_zero_area(spapr->irq_map, spapr->nr_irqs, start, > + count, align); > + if (srcno == spapr->nr_irqs) { > + return -1; > + } > + > + bitmap_set(spapr->irq_map, srcno, count); > + return srcno + spapr->irq_base; > +} > + > +static void spapr_irq_free_block(XICSFabric *xi, int irq, int num) > +{ > + sPAPRMachineState *spapr = SPAPR_MACHINE(xi); > + int srcno = irq - spapr->irq_base; > + > + bitmap_clear(spapr->irq_map, srcno, num); > +} > + > static void spapr_pic_print_info(InterruptStatsProvider *obj, > Monitor *mon) > { > @@ -3630,6 +3686,10 @@ static void spapr_machine_class_init(ObjectClass *oc, > void *data) > xic->ics_get = spapr_ics_get; > xic->ics_resend = spapr_ics_resend; > xic->icp_get = spapr_icp_get; > + xic->irq_test = spapr_irq_test; > + xic->irq_alloc_block = spapr_irq_alloc_block; > + xic->irq_free_block = spapr_irq_free_block; > + > ispc->print_info = spapr_pic_print_info; > /* Force NUMA node memory size to be a multiple of > * SPAPR_MEMORY_BLOCK_SIZE (256M) since that's the granularity > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h > index 9d21ca9bde3a..b962bfe09bb5 100644 > --- a/include/hw/ppc/spapr.h > +++ b/include/hw/ppc/spapr.h > @@ -7,6 +7,7 @@ > #include "hw/ppc/spapr_drc.h" > #include "hw/mem/pc-dimm.h" > #include "hw/ppc/spapr_ovec.h" > +#include "qemu/bitmap.h" > > struct VIOsPAPRBus; > struct sPAPRPHBState; > @@ -78,6 +79,9 @@ struct sPAPRMachineState { > struct VIOsPAPRBus *vio_bus; > QLIST_HEAD(, sPAPRPHBState) phbs; > struct sPAPRNVRAM *nvram; > + int32_t nr_irqs; > + unsigned long *irq_map; > + uint32_t irq_base; > ICSState *ics; > sPAPRRTCState rtc; > > diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h > index 28d248abad61..30e7f2e0a7dd 100644 > --- a/include/hw/ppc/xics.h > +++ b/include/hw/ppc/xics.h > @@ -175,6 +175,10 @@ typedef struct XICSFabricClass { > ICSState *(*ics_get)(XICSFabric *xi, int irq); > void (*ics_resend)(XICSFabric *xi); > ICPState *(*icp_get)(XICSFabric *xi, int server); > + /* IRQ allocator helpers */ > + bool (*irq_test)(XICSFabric *xi, int irq); > + int (*irq_alloc_block)(XICSFabric *xi, int count, int align); > + void (*irq_free_block)(XICSFabric *xi, int irq, int num); API looks good to me. I suggest you introduce it in a dedicated patch, and change the allocator implementation in another patch. > } XICSFabricClass; > > #define XICS_IRQS_SPAPR 1024