On Wed, 2017-08-30 at 21:46 +0200, Cédric Le Goater wrote:
> This is the framework for using XIVE in a PowerVM guest. The support
> is very similar to the native one in a much simpler form.
> 
> Each source is associated with an Event State Buffer (ESB). This is a
> two bit state machine which is used to trigger events. The bits are
> named "P" (pending) and "Q" (queued) and can be controlled by MMIO.
> The Guest OS registers event (or notifications) queues on which the HW
> will post event data for a target to notify.
> 
> Instead of OPAL calls, a set of Hypervisors call are used to configure
> the interrupt sources and the event/notification queues of the guest:
> 
>  - H_INT_GET_SOURCE_INFO
> 
>    used to obtain the address of the MMIO page of the Event State
>    Buffer (PQ bits) entry associated with the source.
> 
>  - H_INT_SET_SOURCE_CONFIG
> 
>    assigns a source to a "target".
> 
>  - H_INT_GET_SOURCE_CONFIG
> 
>    determines to which "target" and "priority" is assigned to a source
> 
>  - H_INT_GET_QUEUE_INFO
> 
>    returns the address of the notification management page associated
>    with the specified "target" and "priority".
> 
>  - H_INT_SET_QUEUE_CONFIG
> 
>    sets or resets the event queue for a given "target" and "priority".
>    It is also used to set the notification config associated with the
>    queue, only unconditional notification for the moment.  Reset is
>    performed with a queue size of 0 and queueing is disabled in that
>    case.
> 
>  - H_INT_GET_QUEUE_CONFIG
> 
>    returns the queue settings for a given "target" and "priority".
> 
>  - H_INT_RESET
> 
>    resets all of the partition's interrupt exploitation structures to
>    their initial state, losing all configuration set via the hcalls
>    H_INT_SET_SOURCE_CONFIG and H_INT_SET_QUEUE_CONFIG.
> 
>  - H_INT_SYNC
> 
>    issue a synchronisation on a source to make sure sure all
>    notifications have reached their queue.
> 
> As for XICS, the XIVE interface for the guest is described in the
> device tree under the "interrupt-controller" node. A couple of new
> properties are specific to XIVE :
> 
>  - "reg"
> 
>    contains the base address and size of the thread interrupt
>    managnement areas (TIMA), also called rings, for the User level and
>    for the Guest OS level. Only the Guest OS level is taken into
>    account today.
> 
>  - "ibm,xive-eq-sizes"
> 
>    the size of the event queues. One cell per size supported, contains
>    log2 of size, in ascending order.
> 
>  - "ibm,xive-lisn-ranges"
> 
>    the interrupt numbers ranges assigned to the guest. These are
>    allocated using a simple bitmap.
> 
> and also :
> 
>  - "/ibm,plat-res-int-priorities"
> 
>    contains a list of priorities that the hypervisor has reserved for
>    its own use.
> 
> Tested with a QEMU XIVE model for pseries and with the Power hypervisor.
> 
> Signed-off-by: Cédric Le Goater <c...@kaod.org>

Acked-by: Benjamin Herrenschmidt <b...@kernel.crashing.org>

> ---
> 
>  Changes since v2 :
> 
>  - changed H_INT_SYNC hcall to reflect new api
>  
>  Changes since v1 :
> 
>  - added a xive_teardown_cpu() routine
>  - removed P9 doorbell support when xive is enabled.
>  - merged in patch for "ibm,plat-res-int-priorities" support
>  - added some comments on the usage of raw I/O accessors.
>  
>  Changes since RFC :
> 
>  - renamed backend to spapr
>  - fixed hotplug support
>  - fixed kexec support
>  - fixed src_chip value (XIVE_INVALID_CHIP_ID)
>  - added doorbell support 
>  - added some hcall debug logs
> 
>  arch/powerpc/include/asm/hvcall.h            |  13 +-
>  arch/powerpc/include/asm/xive.h              |   3 +
>  arch/powerpc/platforms/pseries/Kconfig       |   1 +
>  arch/powerpc/platforms/pseries/hotplug-cpu.c |  11 +-
>  arch/powerpc/platforms/pseries/kexec.c       |   6 +-
>  arch/powerpc/platforms/pseries/setup.c       |   8 +-
>  arch/powerpc/platforms/pseries/smp.c         |  27 +-
>  arch/powerpc/sysdev/xive/Kconfig             |   5 +
>  arch/powerpc/sysdev/xive/Makefile            |   1 +
>  arch/powerpc/sysdev/xive/common.c            |  13 +
>  arch/powerpc/sysdev/xive/spapr.c             | 618 
> +++++++++++++++++++++++++++
>  11 files changed, 698 insertions(+), 8 deletions(-)
>  create mode 100644 arch/powerpc/sysdev/xive/spapr.c
> 
> diff --git a/arch/powerpc/include/asm/hvcall.h 
> b/arch/powerpc/include/asm/hvcall.h
> index 57d38b504ff7..3d34dc0869f6 100644
> --- a/arch/powerpc/include/asm/hvcall.h
> +++ b/arch/powerpc/include/asm/hvcall.h
> @@ -280,7 +280,18 @@
>  #define H_RESIZE_HPT_COMMIT  0x370
>  #define H_REGISTER_PROC_TBL  0x37C
>  #define H_SIGNAL_SYS_RESET   0x380
> -#define MAX_HCALL_OPCODE     H_SIGNAL_SYS_RESET
> +#define H_INT_GET_SOURCE_INFO   0x3A8
> +#define H_INT_SET_SOURCE_CONFIG 0x3AC
> +#define H_INT_GET_SOURCE_CONFIG 0x3B0
> +#define H_INT_GET_QUEUE_INFO    0x3B4
> +#define H_INT_SET_QUEUE_CONFIG  0x3B8
> +#define H_INT_GET_QUEUE_CONFIG  0x3BC
> +#define H_INT_SET_OS_REPORTING_LINE 0x3C0
> +#define H_INT_GET_OS_REPORTING_LINE 0x3C4
> +#define H_INT_ESB               0x3C8
> +#define H_INT_SYNC              0x3CC
> +#define H_INT_RESET             0x3D0
> +#define MAX_HCALL_OPCODE     H_INT_RESET
>  
>  /* H_VIOCTL functions */
>  #define H_GET_VIOA_DUMP_SIZE 0x01
> diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
> index c23ff4389ca2..473f133a8555 100644
> --- a/arch/powerpc/include/asm/xive.h
> +++ b/arch/powerpc/include/asm/xive.h
> @@ -110,11 +110,13 @@ extern bool __xive_enabled;
>  
>  static inline bool xive_enabled(void) { return __xive_enabled; }
>  
> +extern bool xive_spapr_init(void);
>  extern bool xive_native_init(void);
>  extern void xive_smp_probe(void);
>  extern int  xive_smp_prepare_cpu(unsigned int cpu);
>  extern void xive_smp_setup_cpu(void);
>  extern void xive_smp_disable_cpu(void);
> +extern void xive_teardown_cpu(void);
>  extern void xive_kexec_teardown_cpu(int secondary);
>  extern void xive_shutdown(void);
>  extern void xive_flush_interrupt(void);
> @@ -147,6 +149,7 @@ extern int xive_native_get_vp_info(u32 vp_id, u32 
> *out_cam_id, u32 *out_chip_id)
>  
>  static inline bool xive_enabled(void) { return false; }
>  
> +static inline bool xive_spapr_init(void) { return false; }
>  static inline bool xive_native_init(void) { return false; }
>  static inline void xive_smp_probe(void) { }
>  extern inline int  xive_smp_prepare_cpu(unsigned int cpu) { return -EINVAL; }
> diff --git a/arch/powerpc/platforms/pseries/Kconfig 
> b/arch/powerpc/platforms/pseries/Kconfig
> index 3a6dfd14f64b..71dd69d9ec64 100644
> --- a/arch/powerpc/platforms/pseries/Kconfig
> +++ b/arch/powerpc/platforms/pseries/Kconfig
> @@ -7,6 +7,7 @@ config PPC_PSERIES
>       select PCI
>       select PCI_MSI
>       select PPC_XICS
> +     select PPC_XIVE_SPAPR
>       select PPC_ICP_NATIVE
>       select PPC_ICP_HV
>       select PPC_ICS_RTAS
> diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c 
> b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> index 6afd1efd3633..175230e80766 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> @@ -34,6 +34,7 @@
>  #include <asm/machdep.h>
>  #include <asm/vdso_datapage.h>
>  #include <asm/xics.h>
> +#include <asm/xive.h>
>  #include <asm/plpar_wrappers.h>
>  
>  #include "pseries.h"
> @@ -109,7 +110,10 @@ static void pseries_mach_cpu_die(void)
>  
>       local_irq_disable();
>       idle_task_exit();
> -     xics_teardown_cpu();
> +     if (xive_enabled())
> +             xive_teardown_cpu();
> +     else
> +             xics_teardown_cpu();
>  
>       if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
>               set_cpu_current_state(cpu, CPU_STATE_INACTIVE);
> @@ -174,7 +178,10 @@ static int pseries_cpu_disable(void)
>               boot_cpuid = cpumask_any(cpu_online_mask);
>  
>       /* FIXME: abstract this to not be platform specific later on */
> -     xics_migrate_irqs_away();
> +     if (xive_enabled())
> +             xive_smp_disable_cpu();
> +     else
> +             xics_migrate_irqs_away();
>       return 0;
>  }
>  
> diff --git a/arch/powerpc/platforms/pseries/kexec.c 
> b/arch/powerpc/platforms/pseries/kexec.c
> index 6681ac97fb18..eeb13429d685 100644
> --- a/arch/powerpc/platforms/pseries/kexec.c
> +++ b/arch/powerpc/platforms/pseries/kexec.c
> @@ -15,6 +15,7 @@
>  #include <asm/firmware.h>
>  #include <asm/kexec.h>
>  #include <asm/xics.h>
> +#include <asm/xive.h>
>  #include <asm/smp.h>
>  #include <asm/plpar_wrappers.h>
>  
> @@ -51,5 +52,8 @@ void pseries_kexec_cpu_down(int crash_shutdown, int 
> secondary)
>               }
>       }
>  
> -     xics_kexec_teardown_cpu(secondary);
> +     if (xive_enabled())
> +             xive_kexec_teardown_cpu(secondary);
> +     else
> +             xics_kexec_teardown_cpu(secondary);
>  }
> diff --git a/arch/powerpc/platforms/pseries/setup.c 
> b/arch/powerpc/platforms/pseries/setup.c
> index b5d86426e97b..a8531e012658 100644
> --- a/arch/powerpc/platforms/pseries/setup.c
> +++ b/arch/powerpc/platforms/pseries/setup.c
> @@ -57,6 +57,7 @@
>  #include <asm/nvram.h>
>  #include <asm/pmc.h>
>  #include <asm/xics.h>
> +#include <asm/xive.h>
>  #include <asm/ppc-pci.h>
>  #include <asm/i8259.h>
>  #include <asm/udbg.h>
> @@ -176,8 +177,11 @@ static void __init pseries_setup_i8259_cascade(void)
>  
>  static void __init pseries_init_irq(void)
>  {
> -     xics_init();
> -     pseries_setup_i8259_cascade();
> +     /* Try using a XIVE if available, otherwise use a XICS */
> +     if (!xive_spapr_init()) {
> +             xics_init();
> +             pseries_setup_i8259_cascade();
> +     }
>  }
>  
>  static void pseries_lpar_enable_pmcs(void)
> diff --git a/arch/powerpc/platforms/pseries/smp.c 
> b/arch/powerpc/platforms/pseries/smp.c
> index 24785f63fb40..2e184829e5d4 100644
> --- a/arch/powerpc/platforms/pseries/smp.c
> +++ b/arch/powerpc/platforms/pseries/smp.c
> @@ -41,6 +41,7 @@
>  #include <asm/vdso_datapage.h>
>  #include <asm/cputhreads.h>
>  #include <asm/xics.h>
> +#include <asm/xive.h>
>  #include <asm/dbell.h>
>  #include <asm/plpar_wrappers.h>
>  #include <asm/code-patching.h>
> @@ -136,7 +137,9 @@ static inline int smp_startup_cpu(unsigned int lcpu)
>  
>  static void smp_setup_cpu(int cpu)
>  {
> -     if (cpu != boot_cpuid)
> +     if (xive_enabled())
> +             xive_smp_setup_cpu();
> +     else if (cpu != boot_cpuid)
>               xics_setup_cpu();
>  
>       if (firmware_has_feature(FW_FEATURE_SPLPAR))
> @@ -181,6 +184,13 @@ static int smp_pSeries_kick_cpu(int nr)
>       return 0;
>  }
>  
> +static int pseries_smp_prepare_cpu(int cpu)
> +{
> +     if (xive_enabled())
> +             return xive_smp_prepare_cpu(cpu);
> +     return 0;
> +}
> +
>  static void smp_pseries_cause_ipi(int cpu)
>  {
>       /* POWER9 should not use this handler */
> @@ -211,7 +221,7 @@ static int pseries_cause_nmi_ipi(int cpu)
>       return 0;
>  }
>  
> -static __init void pSeries_smp_probe(void)
> +static __init void pSeries_smp_probe_xics(void)
>  {
>       xics_smp_probe();
>  
> @@ -221,11 +231,24 @@ static __init void pSeries_smp_probe(void)
>               smp_ops->cause_ipi = icp_ops->cause_ipi;
>  }
>  
> +static __init void pSeries_smp_probe(void)
> +{
> +     if (xive_enabled())
> +             /*
> +              * Don't use P9 doorbells when XIVE is enabled. IPIs
> +              * using MMIOs should be faster
> +              */
> +             xive_smp_probe();
> +     else
> +             pSeries_smp_probe_xics();
> +}
> +
>  static struct smp_ops_t pseries_smp_ops = {
>       .message_pass   = NULL, /* Use smp_muxed_ipi_message_pass */
>       .cause_ipi      = NULL, /* Filled at runtime by pSeries_smp_probe() */
>       .cause_nmi_ipi  = pseries_cause_nmi_ipi,
>       .probe          = pSeries_smp_probe,
> +     .prepare_cpu    = pseries_smp_prepare_cpu,
>       .kick_cpu       = smp_pSeries_kick_cpu,
>       .setup_cpu      = smp_setup_cpu,
>       .cpu_bootable   = smp_generic_cpu_bootable,
> diff --git a/arch/powerpc/sysdev/xive/Kconfig 
> b/arch/powerpc/sysdev/xive/Kconfig
> index 12ccd7373d2f..3e3e25b5e30d 100644
> --- a/arch/powerpc/sysdev/xive/Kconfig
> +++ b/arch/powerpc/sysdev/xive/Kconfig
> @@ -9,3 +9,8 @@ config PPC_XIVE_NATIVE
>       default n
>       select PPC_XIVE
>       depends on PPC_POWERNV
> +
> +config PPC_XIVE_SPAPR
> +     bool
> +     default n
> +     select PPC_XIVE
> diff --git a/arch/powerpc/sysdev/xive/Makefile 
> b/arch/powerpc/sysdev/xive/Makefile
> index 3fab303fc169..536d6e5706e3 100644
> --- a/arch/powerpc/sysdev/xive/Makefile
> +++ b/arch/powerpc/sysdev/xive/Makefile
> @@ -2,3 +2,4 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
>  
>  obj-y                                += common.o
>  obj-$(CONFIG_PPC_XIVE_NATIVE)        += native.o
> +obj-$(CONFIG_PPC_XIVE_SPAPR) += spapr.o
> diff --git a/arch/powerpc/sysdev/xive/common.c 
> b/arch/powerpc/sysdev/xive/common.c
> index 26999ceae20e..8774af7a4105 100644
> --- a/arch/powerpc/sysdev/xive/common.c
> +++ b/arch/powerpc/sysdev/xive/common.c
> @@ -1368,6 +1368,19 @@ void xive_flush_interrupt(void)
>  
>  #endif /* CONFIG_SMP */
>  
> +void xive_teardown_cpu(void)
> +{
> +     struct xive_cpu *xc = __this_cpu_read(xive_cpu);
> +     unsigned int cpu = smp_processor_id();
> +
> +     /* Set CPPR to 0 to disable flow of interrupts */
> +     xc->cppr = 0;
> +     out_8(xive_tima + xive_tima_offset + TM_CPPR, 0);
> +
> +     if (xive_ops->teardown_cpu)
> +             xive_ops->teardown_cpu(cpu, xc);
> +}
> +
>  void xive_kexec_teardown_cpu(int secondary)
>  {
>       struct xive_cpu *xc = __this_cpu_read(xive_cpu);
> diff --git a/arch/powerpc/sysdev/xive/spapr.c 
> b/arch/powerpc/sysdev/xive/spapr.c
> new file mode 100644
> index 000000000000..797bb0636ab7
> --- /dev/null
> +++ b/arch/powerpc/sysdev/xive/spapr.c
> @@ -0,0 +1,618 @@
> +/*
> + * Copyright 2016,2017 IBM Corporation.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +
> +#define pr_fmt(fmt) "xive: " fmt
> +
> +#include <linux/types.h>
> +#include <linux/irq.h>
> +#include <linux/smp.h>
> +#include <linux/interrupt.h>
> +#include <linux/init.h>
> +#include <linux/of.h>
> +#include <linux/slab.h>
> +#include <linux/spinlock.h>
> +#include <linux/cpumask.h>
> +#include <linux/mm.h>
> +
> +#include <asm/prom.h>
> +#include <asm/io.h>
> +#include <asm/smp.h>
> +#include <asm/irq.h>
> +#include <asm/errno.h>
> +#include <asm/xive.h>
> +#include <asm/xive-regs.h>
> +#include <asm/hvcall.h>
> +
> +#include "xive-internal.h"
> +
> +static u32 xive_queue_shift;
> +
> +struct xive_irq_bitmap {
> +     unsigned long           *bitmap;
> +     unsigned int            base;
> +     unsigned int            count;
> +     spinlock_t              lock;
> +     struct list_head        list;
> +};
> +
> +static LIST_HEAD(xive_irq_bitmaps);
> +
> +static int xive_irq_bitmap_add(int base, int count)
> +{
> +     struct xive_irq_bitmap *xibm;
> +
> +     xibm = kzalloc(sizeof(*xibm), GFP_ATOMIC);
> +     if (!xibm)
> +             return -ENOMEM;
> +
> +     spin_lock_init(&xibm->lock);
> +     xibm->base = base;
> +     xibm->count = count;
> +     xibm->bitmap = kzalloc(xibm->count, GFP_KERNEL);
> +     list_add(&xibm->list, &xive_irq_bitmaps);
> +
> +     pr_info("Using IRQ range [%x-%x]", xibm->base,
> +             xibm->base + xibm->count - 1);
> +     return 0;
> +}
> +
> +static int __xive_irq_bitmap_alloc(struct xive_irq_bitmap *xibm)
> +{
> +     int irq;
> +
> +     irq = find_first_zero_bit(xibm->bitmap, xibm->count);
> +     if (irq != xibm->count) {
> +             set_bit(irq, xibm->bitmap);
> +             irq += xibm->base;
> +     } else {
> +             irq = -ENOMEM;
> +     }
> +
> +     return irq;
> +}
> +
> +static int xive_irq_bitmap_alloc(void)
> +{
> +     struct xive_irq_bitmap *xibm;
> +     unsigned long flags;
> +     int irq = -ENOENT;
> +
> +     list_for_each_entry(xibm, &xive_irq_bitmaps, list) {
> +             spin_lock_irqsave(&xibm->lock, flags);
> +             irq = __xive_irq_bitmap_alloc(xibm);
> +             spin_unlock_irqrestore(&xibm->lock, flags);
> +             if (irq >= 0)
> +                     break;
> +     }
> +     return irq;
> +}
> +
> +static void xive_irq_bitmap_free(int irq)
> +{
> +     unsigned long flags;
> +     struct xive_irq_bitmap *xibm;
> +
> +     list_for_each_entry(xibm, &xive_irq_bitmaps, list) {
> +             if ((irq >= xibm->base) && (irq < xibm->base + xibm->count)) {
> +                     spin_lock_irqsave(&xibm->lock, flags);
> +                     clear_bit(irq - xibm->base, xibm->bitmap);
> +                     spin_unlock_irqrestore(&xibm->lock, flags);
> +                     break;
> +             }
> +     }
> +}
> +
> +static long plpar_int_get_source_info(unsigned long flags,
> +                                   unsigned long lisn,
> +                                   unsigned long *src_flags,
> +                                   unsigned long *eoi_page,
> +                                   unsigned long *trig_page,
> +                                   unsigned long *esb_shift)
> +{
> +     unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
> +     long rc;
> +
> +     rc = plpar_hcall(H_INT_GET_SOURCE_INFO, retbuf, flags, lisn);
> +     if (rc) {
> +             pr_err("H_INT_GET_SOURCE_INFO lisn=%ld failed %ld\n", lisn, rc);
> +             return rc;
> +     }
> +
> +     *src_flags = retbuf[0];
> +     *eoi_page  = retbuf[1];
> +     *trig_page = retbuf[2];
> +     *esb_shift = retbuf[3];
> +
> +     pr_devel("H_INT_GET_SOURCE_INFO flags=%lx eoi=%lx trig=%lx shift=%lx\n",
> +             retbuf[0], retbuf[1], retbuf[2], retbuf[3]);
> +
> +     return 0;
> +}
> +
> +#define XIVE_SRC_SET_EISN (1ull << (63 - 62))
> +#define XIVE_SRC_MASK     (1ull << (63 - 63)) /* unused */
> +
> +static long plpar_int_set_source_config(unsigned long flags,
> +                                     unsigned long lisn,
> +                                     unsigned long target,
> +                                     unsigned long prio,
> +                                     unsigned long sw_irq)
> +{
> +     long rc;
> +
> +
> +     pr_devel("H_INT_SET_SOURCE_CONFIG flags=%lx lisn=%lx target=%lx 
> prio=%lx sw_irq=%lx\n",
> +             flags, lisn, target, prio, sw_irq);
> +
> +
> +     rc = plpar_hcall_norets(H_INT_SET_SOURCE_CONFIG, flags, lisn,
> +                             target, prio, sw_irq);
> +     if (rc) {
> +             pr_err("H_INT_SET_SOURCE_CONFIG lisn=%ld target=%lx prio=%lx 
> failed %ld\n",
> +                    lisn, target, prio, rc);
> +             return rc;
> +     }
> +
> +     return 0;
> +}
> +
> +static long plpar_int_get_queue_info(unsigned long flags,
> +                                  unsigned long target,
> +                                  unsigned long priority,
> +                                  unsigned long *esn_page,
> +                                  unsigned long *esn_size)
> +{
> +     unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
> +     long rc;
> +
> +     rc = plpar_hcall(H_INT_GET_QUEUE_INFO, retbuf, flags, target, priority);
> +     if (rc) {
> +             pr_err("H_INT_GET_QUEUE_INFO cpu=%ld prio=%ld failed %ld\n",
> +                    target, priority, rc);
> +             return rc;
> +     }
> +
> +     *esn_page = retbuf[0];
> +     *esn_size = retbuf[1];
> +
> +     pr_devel("H_INT_GET_QUEUE_INFO page=%lx size=%lx\n",
> +             retbuf[0], retbuf[1]);
> +
> +     return 0;
> +}
> +
> +#define XIVE_EQ_ALWAYS_NOTIFY (1ull << (63 - 63))
> +
> +static long plpar_int_set_queue_config(unsigned long flags,
> +                                    unsigned long target,
> +                                    unsigned long priority,
> +                                    unsigned long qpage,
> +                                    unsigned long qsize)
> +{
> +     long rc;
> +
> +     pr_devel("H_INT_SET_QUEUE_CONFIG flags=%lx target=%lx priority=%lx 
> qpage=%lx qsize=%lx\n",
> +             flags,  target, priority, qpage, qsize);
> +
> +     rc = plpar_hcall_norets(H_INT_SET_QUEUE_CONFIG, flags, target,
> +                             priority, qpage, qsize);
> +     if (rc) {
> +             pr_err("H_INT_SET_QUEUE_CONFIG cpu=%ld prio=%ld qpage=%lx 
> returned %ld\n",
> +                    target, priority, qpage, rc);
> +             return  rc;
> +     }
> +
> +     return 0;
> +}
> +
> +static long plpar_int_sync(unsigned long flags, unsigned long lisn)
> +{
> +     long rc;
> +
> +     rc = plpar_hcall_norets(H_INT_SYNC, flags, lisn);
> +     if (rc) {
> +             pr_err("H_INT_SYNC lisn=%ld returned %ld\n", lisn, rc);
> +             return  rc;
> +     }
> +
> +     return 0;
> +}
> +
> +#define XIVE_SRC_H_INT_ESB     (1ull << (63 - 60)) /* TODO */
> +#define XIVE_SRC_LSI           (1ull << (63 - 61))
> +#define XIVE_SRC_TRIGGER       (1ull << (63 - 62))
> +#define XIVE_SRC_STORE_EOI     (1ull << (63 - 63))
> +
> +static int xive_spapr_populate_irq_data(u32 hw_irq, struct xive_irq_data 
> *data)
> +{
> +     long rc;
> +     unsigned long flags;
> +     unsigned long eoi_page;
> +     unsigned long trig_page;
> +     unsigned long esb_shift;
> +
> +     memset(data, 0, sizeof(*data));
> +
> +     rc = plpar_int_get_source_info(0, hw_irq, &flags, &eoi_page, &trig_page,
> +                                    &esb_shift);
> +     if (rc)
> +             return  -EINVAL;
> +
> +     if (flags & XIVE_SRC_STORE_EOI)
> +             data->flags  |= XIVE_IRQ_FLAG_STORE_EOI;
> +     if (flags & XIVE_SRC_LSI)
> +             data->flags  |= XIVE_IRQ_FLAG_LSI;
> +     data->eoi_page  = eoi_page;
> +     data->esb_shift = esb_shift;
> +     data->trig_page = trig_page;
> +
> +     /*
> +      * No chip-id for the sPAPR backend. This has an impact how we
> +      * pick a target. See xive_pick_irq_target().
> +      */
> +     data->src_chip = XIVE_INVALID_CHIP_ID;
> +
> +     data->eoi_mmio = ioremap(data->eoi_page, 1u << data->esb_shift);
> +     if (!data->eoi_mmio) {
> +             pr_err("Failed to map EOI page for irq 0x%x\n", hw_irq);
> +             return -ENOMEM;
> +     }
> +
> +     /* Full function page supports trigger */
> +     if (flags & XIVE_SRC_TRIGGER) {
> +             data->trig_mmio = data->eoi_mmio;
> +             return 0;
> +     }
> +
> +     data->trig_mmio = ioremap(data->trig_page, 1u << data->esb_shift);
> +     if (!data->trig_mmio) {
> +             pr_err("Failed to map trigger page for irq 0x%x\n", hw_irq);
> +             return -ENOMEM;
> +     }
> +     return 0;
> +}
> +
> +static int xive_spapr_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 
> sw_irq)
> +{
> +     long rc;
> +
> +     rc = plpar_int_set_source_config(XIVE_SRC_SET_EISN, hw_irq, target,
> +                                      prio, sw_irq);
> +
> +     return rc == 0 ? 0 : -ENXIO;
> +}
> +
> +/* This can be called multiple time to change a queue configuration */
> +static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio,
> +                                __be32 *qpage, u32 order)
> +{
> +     s64 rc = 0;
> +     unsigned long esn_page;
> +     unsigned long esn_size;
> +     u64 flags, qpage_phys;
> +
> +     /* If there's an actual queue page, clean it */
> +     if (order) {
> +             if (WARN_ON(!qpage))
> +                     return -EINVAL;
> +             qpage_phys = __pa(qpage);
> +     } else {
> +             qpage_phys = 0;
> +     }
> +
> +     /* Initialize the rest of the fields */
> +     q->msk = order ? ((1u << (order - 2)) - 1) : 0;
> +     q->idx = 0;
> +     q->toggle = 0;
> +
> +     rc = plpar_int_get_queue_info(0, target, prio, &esn_page, &esn_size);
> +     if (rc) {
> +             pr_err("Error %lld getting queue info prio %d\n", rc, prio);
> +             rc = -EIO;
> +             goto fail;
> +     }
> +
> +     /* TODO: add support for the notification page */
> +     q->eoi_phys = esn_page;
> +
> +     /* Default is to always notify */
> +     flags = XIVE_EQ_ALWAYS_NOTIFY;
> +
> +     /* Configure and enable the queue in HW */
> +     rc = plpar_int_set_queue_config(flags, target, prio, qpage_phys, order);
> +     if (rc) {
> +             pr_err("Error %lld setting queue for prio %d\n", rc, prio);
> +             rc = -EIO;
> +     } else {
> +             q->qpage = qpage;
> +     }
> +fail:
> +     return rc;
> +}
> +
> +static int xive_spapr_setup_queue(unsigned int cpu, struct xive_cpu *xc,
> +                               u8 prio)
> +{
> +     struct xive_q *q = &xc->queue[prio];
> +     __be32 *qpage;
> +
> +     qpage = xive_queue_page_alloc(cpu, xive_queue_shift);
> +     if (IS_ERR(qpage))
> +             return PTR_ERR(qpage);
> +
> +     return xive_spapr_configure_queue(cpu, q, prio, qpage,
> +                                       xive_queue_shift);
> +}
> +
> +static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc,
> +                               u8 prio)
> +{
> +     struct xive_q *q = &xc->queue[prio];
> +     unsigned int alloc_order;
> +     long rc;
> +
> +     rc = plpar_int_set_queue_config(0, cpu, prio, 0, 0);
> +     if (rc)
> +             pr_err("Error %ld setting queue for prio %d\n", rc, prio);
> +
> +     alloc_order = xive_alloc_order(xive_queue_shift);
> +     free_pages((unsigned long)q->qpage, alloc_order);
> +     q->qpage = NULL;
> +}
> +
> +static bool xive_spapr_match(struct device_node *node)
> +{
> +     /* Ignore cascaded controllers for the moment */
> +     return 1;
> +}
> +
> +#ifdef CONFIG_SMP
> +static int xive_spapr_get_ipi(unsigned int cpu, struct xive_cpu *xc)
> +{
> +     int irq = xive_irq_bitmap_alloc();
> +
> +     if (irq < 0) {
> +             pr_err("Failed to allocate IPI on CPU %d\n", cpu);
> +             return -ENXIO;
> +     }
> +
> +     xc->hw_ipi = irq;
> +     return 0;
> +}
> +
> +static void xive_spapr_put_ipi(unsigned int cpu, struct xive_cpu *xc)
> +{
> +     xive_irq_bitmap_free(xc->hw_ipi);
> +}
> +#endif /* CONFIG_SMP */
> +
> +static void xive_spapr_shutdown(void)
> +{
> +     long rc;
> +
> +     rc = plpar_hcall_norets(H_INT_RESET, 0);
> +     if (rc)
> +             pr_err("H_INT_RESET failed %ld\n", rc);
> +}
> +
> +/*
> + * Perform an "ack" cycle on the current thread. Grab the pending
> + * active priorities and update the CPPR to the most favored one.
> + */
> +static void xive_spapr_update_pending(struct xive_cpu *xc)
> +{
> +     u8 nsr, cppr;
> +     u16 ack;
> +
> +     /*
> +      * Perform the "Acknowledge O/S to Register" cycle.
> +      *
> +      * Let's speedup the access to the TIMA using the raw I/O
> +      * accessor as we don't need the synchronisation routine of
> +      * the higher level ones
> +      */
> +     ack = be16_to_cpu(__raw_readw(xive_tima + TM_SPC_ACK_OS_REG));
> +
> +     /* Synchronize subsequent queue accesses */
> +     mb();
> +
> +     /*
> +      * Grab the CPPR and the "NSR" field which indicates the source
> +      * of the interrupt (if any)
> +      */
> +     cppr = ack & 0xff;
> +     nsr = ack >> 8;
> +
> +     if (nsr & TM_QW1_NSR_EO) {
> +             if (cppr == 0xff)
> +                     return;
> +             /* Mark the priority pending */
> +             xc->pending_prio |= 1 << cppr;
> +
> +             /*
> +              * A new interrupt should never have a CPPR less favored
> +              * than our current one.
> +              */
> +             if (cppr >= xc->cppr)
> +                     pr_err("CPU %d odd ack CPPR, got %d at %d\n",
> +                            smp_processor_id(), cppr, xc->cppr);
> +
> +             /* Update our idea of what the CPPR is */
> +             xc->cppr = cppr;
> +     }
> +}
> +
> +static void xive_spapr_eoi(u32 hw_irq)
> +{
> +     /* Not used */;
> +}
> +
> +static void xive_spapr_setup_cpu(unsigned int cpu, struct xive_cpu *xc)
> +{
> +     /* Only some debug on the TIMA settings */
> +     pr_debug("(HW value: %08x %08x %08x)\n",
> +              in_be32(xive_tima + TM_QW1_OS + TM_WORD0),
> +              in_be32(xive_tima + TM_QW1_OS + TM_WORD1),
> +              in_be32(xive_tima + TM_QW1_OS + TM_WORD2));
> +}
> +
> +static void xive_spapr_teardown_cpu(unsigned int cpu, struct xive_cpu *xc)
> +{
> +     /* Nothing to do */;
> +}
> +
> +static void xive_spapr_sync_source(u32 hw_irq)
> +{
> +     /* Specs are unclear on what this is doing */
> +     plpar_int_sync(0, hw_irq);
> +}
> +
> +static const struct xive_ops xive_spapr_ops = {
> +     .populate_irq_data      = xive_spapr_populate_irq_data,
> +     .configure_irq          = xive_spapr_configure_irq,
> +     .setup_queue            = xive_spapr_setup_queue,
> +     .cleanup_queue          = xive_spapr_cleanup_queue,
> +     .match                  = xive_spapr_match,
> +     .shutdown               = xive_spapr_shutdown,
> +     .update_pending         = xive_spapr_update_pending,
> +     .eoi                    = xive_spapr_eoi,
> +     .setup_cpu              = xive_spapr_setup_cpu,
> +     .teardown_cpu           = xive_spapr_teardown_cpu,
> +     .sync_source            = xive_spapr_sync_source,
> +#ifdef CONFIG_SMP
> +     .get_ipi                = xive_spapr_get_ipi,
> +     .put_ipi                = xive_spapr_put_ipi,
> +#endif /* CONFIG_SMP */
> +     .name                   = "spapr",
> +};
> +
> +/*
> + * get max priority from "/ibm,plat-res-int-priorities"
> + */
> +static bool xive_get_max_prio(u8 *max_prio)
> +{
> +     struct device_node *rootdn;
> +     const __be32 *reg;
> +     u32 len;
> +     int prio, found;
> +
> +     rootdn = of_find_node_by_path("/");
> +     if (!rootdn) {
> +             pr_err("not root node found !\n");
> +             return false;
> +     }
> +
> +     reg = of_get_property(rootdn, "ibm,plat-res-int-priorities", &len);
> +     if (!reg) {
> +             pr_err("Failed to read 'ibm,plat-res-int-priorities' 
> property\n");
> +             return false;
> +     }
> +
> +     if (len % (2 * sizeof(u32)) != 0) {
> +             pr_err("invalid 'ibm,plat-res-int-priorities' property\n");
> +             return false;
> +     }
> +
> +     /* HW supports priorities in the range [0-7] and 0xFF is a
> +      * wildcard priority used to mask. We scan the ranges reserved
> +      * by the hypervisor to find the lowest priority we can use.
> +      */
> +     found = 0xFF;
> +     for (prio = 0; prio < 8; prio++) {
> +             int reserved = 0;
> +             int i;
> +
> +             for (i = 0; i < len / (2 * sizeof(u32)); i++) {
> +                     int base  = be32_to_cpu(reg[2 * i]);
> +                     int range = be32_to_cpu(reg[2 * i + 1]);
> +
> +                     if (prio >= base && prio < base + range)
> +                             reserved++;
> +             }
> +
> +             if (!reserved)
> +                     found = prio;
> +     }
> +
> +     if (found == 0xFF) {
> +             pr_err("no valid priority found in 
> 'ibm,plat-res-int-priorities'\n");
> +             return false;
> +     }
> +
> +     *max_prio = found;
> +     return true;
> +}
> +
> +bool xive_spapr_init(void)
> +{
> +     struct device_node *np;
> +     struct resource r;
> +     void __iomem *tima;
> +     struct property *prop;
> +     u8 max_prio;
> +     u32 val;
> +     u32 len;
> +     const __be32 *reg;
> +     int i;
> +
> +     if (xive_cmdline_disabled)
> +             return false;
> +
> +     pr_devel("%s()\n", __func__);
> +     np = of_find_compatible_node(NULL, NULL, "ibm,power-ivpe");
> +     if (!np) {
> +             pr_devel("not found !\n");
> +             return false;
> +     }
> +     pr_devel("Found %s\n", np->full_name);
> +
> +     /* Resource 1 is the OS ring TIMA */
> +     if (of_address_to_resource(np, 1, &r)) {
> +             pr_err("Failed to get thread mgmnt area resource\n");
> +             return false;
> +     }
> +     tima = ioremap(r.start, resource_size(&r));
> +     if (!tima) {
> +             pr_err("Failed to map thread mgmnt area\n");
> +             return false;
> +     }
> +
> +     if (!xive_get_max_prio(&max_prio))
> +             return false;
> +
> +     /* Feed the IRQ number allocator with the ranges given in the DT */
> +     reg = of_get_property(np, "ibm,xive-lisn-ranges", &len);
> +     if (!reg) {
> +             pr_err("Failed to read 'ibm,xive-lisn-ranges' property\n");
> +             return false;
> +     }
> +
> +     if (len % (2 * sizeof(u32)) != 0) {
> +             pr_err("invalid 'ibm,xive-lisn-ranges' property\n");
> +             return false;
> +     }
> +
> +     for (i = 0; i < len / (2 * sizeof(u32)); i++, reg += 2)
> +             xive_irq_bitmap_add(be32_to_cpu(reg[0]),
> +                                 be32_to_cpu(reg[1]));
> +
> +     /* Iterate the EQ sizes and pick one */
> +     of_property_for_each_u32(np, "ibm,xive-eq-sizes", prop, reg, val) {
> +             xive_queue_shift = val;
> +             if (val == PAGE_SHIFT)
> +                     break;
> +     }
> +
> +     /* Initialize XIVE core with our backend */
> +     if (!xive_core_init(&xive_spapr_ops, tima, TM_QW1_OS, max_prio))
> +             return false;
> +
> +     pr_info("Using %dkB queues\n", 1 << (xive_queue_shift - 10));
> +     return true;
> +}

Reply via email to