Hi Thomas,

On Fri, 24 Jul 2020 21:44:41 +0100,
Thomas Gleixner <[email protected]> wrote:
> 
> John,
> 
> Thomas Gleixner <[email protected]> writes:
> > I have some ideas for a trivial generic way to solve this without
> > undoing the commit in question and without going through all the irq
> > chip drivers. So far everything I came up with is butt ugly. Maybe Marc
> > has some brilliant idea.
> >
> > Sorry for the wreckage and thanks for the excellent problem
> > description. I'll come back to you in the next days.
> 
> couldn't give up :)
> 
> So after staring in too many drivers, I resorted to make this mode
> opt-in and mark the interrupts accordingly for the two drivers which are
> known to want this. Not that I love it, but it's the least dangerous
> option. Completely untested patch below.
> 
> Thanks,
> 
>         tglx
> ---
>  arch/x86/kernel/apic/vector.c    |    4 ++++
>  drivers/irqchip/irq-gic-v3-its.c |    5 ++++-
>  include/linux/irq.h              |   13 +++++++++++++
>  kernel/irq/manage.c              |    6 +++++-
>  4 files changed, 26 insertions(+), 2 deletions(-)
> 
> --- a/arch/x86/kernel/apic/vector.c
> +++ b/arch/x86/kernel/apic/vector.c
> @@ -560,6 +560,10 @@ static int x86_vector_alloc_irqs(struct
>                * as that can corrupt the affinity move state.
>                */
>               irqd_set_handle_enforce_irqctx(irqd);
> +
> +             /* Don't invoke affinity setter on deactivated interrupts */
> +             irqd_set_affinity_on_activate(irqd);
> +
>               /*
>                * Legacy vectors are already assigned when the IOAPIC
>                * takes them over. They stay on the same vector. This is
> --- a/drivers/irqchip/irq-gic-v3-its.c
> +++ b/drivers/irqchip/irq-gic-v3-its.c
> @@ -3523,6 +3523,7 @@ static int its_irq_domain_alloc(struct i
>       msi_alloc_info_t *info = args;
>       struct its_device *its_dev = info->scratchpad[0].ptr;
>       struct its_node *its = its_dev->its;
> +     struct irq_data *irqd;
>       irq_hw_number_t hwirq;
>       int err;
>       int i;
> @@ -3542,7 +3543,9 @@ static int its_irq_domain_alloc(struct i
>  
>               irq_domain_set_hwirq_and_chip(domain, virq + i,
>                                             hwirq + i, &its_irq_chip, 
> its_dev);
> -             irqd_set_single_target(irq_desc_get_irq_data(irq_to_desc(virq + 
> i)));
> +             irqd = irq_get_irq_data(virq + i);
> +             irqd_set_single_target(irqd);
> +             irqd_set_affinity_on_activate(irqd);
>               pr_debug("ID:%d pID:%d vID:%d\n",
>                        (int)(hwirq + i - its_dev->event_map.lpi_base),
>                        (int)(hwirq + i), virq + i);
> --- a/include/linux/irq.h
> +++ b/include/linux/irq.h
> @@ -213,6 +213,8 @@ struct irq_data {
>   *                             required
>   * IRQD_HANDLE_ENFORCE_IRQCTX        - Enforce that handle_irq_*() is only 
> invoked
>   *                             from actual interrupt context.
> + * IRQD_AFFINITY_ON_ACTIVATE - Affinity is set on activation. Don't call
> + *                             irq_chip::irq_set_affinity() when deactivated.
>   */
>  enum {
>       IRQD_TRIGGER_MASK               = 0xf,
> @@ -237,6 +239,7 @@ enum {
>       IRQD_CAN_RESERVE                = (1 << 26),
>       IRQD_MSI_NOMASK_QUIRK           = (1 << 27),
>       IRQD_HANDLE_ENFORCE_IRQCTX      = (1 << 28),
> +     IRQD_AFFINITY_ON_ACTIVATE       = (1 << 29),
>  };
>  
>  #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
> @@ -421,6 +424,16 @@ static inline bool irqd_msi_nomask_quirk
>       return __irqd_to_state(d) & IRQD_MSI_NOMASK_QUIRK;
>  }
>  
> +static inline void irqd_set_affinity_on_activate(struct irq_data *d)
> +{
> +     __irqd_to_state(d) |= IRQD_AFFINITY_ON_ACTIVATE;
> +}
> +
> +static inline bool irqd_affinity_on_activate(struct irq_data *d)
> +{
> +     return __irqd_to_state(d) & IRQD_AFFINITY_ON_ACTIVATE;
> +}
> +
>  #undef __irqd_to_state
>  
>  static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
> --- a/kernel/irq/manage.c
> +++ b/kernel/irq/manage.c
> @@ -320,12 +320,16 @@ static bool irq_set_affinity_deactivated
>       struct irq_desc *desc = irq_data_to_desc(data);
>  
>       /*
> +      * Handle irq chips which can handle affinity only in activated
> +      * state correctly
> +      *
>        * If the interrupt is not yet activated, just store the affinity
>        * mask and do not call the chip driver at all. On activation the
>        * driver has to make sure anyway that the interrupt is in a
>        * useable state so startup works.
>        */
> -     if (!IS_ENABLED(CONFIG_IRQ_DOMAIN_HIERARCHY) || irqd_is_activated(data))
> +     if (!IS_ENABLED(CONFIG_IRQ_DOMAIN_HIERARCHY) ||
> +         irqd_is_activated(data) || !irqd_affinity_on_activate(data))
>               return false;
>  
>       cpumask_copy(desc->irq_common_data.affinity, mask);
> 

I have given this a go on two systems: one with a GICv2 that has its
PMUs wired in a similar way to John's system (each CPU PMU is on a
separate SPI), and another one that has an ITS. Both came up normally
and their interrupts are routed as expected:

* GICv2 PMU:
30:  121    0    0    0    0    0    0    0 GICv2  39 Level arm-pmu
31:    0  167    0    0    0    0    0    0 GICv2  40 Level arm-pmu
32:    0    0  145    0    0    0    0    0 GICv2  41 Level arm-pmu
33:    0    0    0  400    0    0    0    0 GICv2  42 Level arm-pmu
34:    0    0    0    0   97    0    0    0 GICv2  43 Level arm-pmu
35:    0    0    0    0    0  463    0    0 GICv2  44 Level arm-pmu
36:    0    0    0    0    0    0   74    0 GICv2  45 Level arm-pmu
37:    0    0    0    0    0    0    0    8 GICv2  46 Level arm-pmu

* GICv3+ITS:
241:  219    0    0    0    0    0   ITS-MSI 524289 Edge nvme0q1
242:    0  251    0    0    0    0   ITS-MSI 524290 Edge nvme0q2
243:    0    0  197    0    0    0   ITS-MSI 524291 Edge nvme0q3
244:    0    0    0  380    0    0   ITS-MSI 524292 Edge nvme0q4
245:    0    0    0    0  675    0   ITS-MSI 524293 Edge nvme0q5
246:    0    0    0    0    0  436   ITS-MSI 524294 Edge nvme0q6

For a good measure, I've added this on top, adding the missing bits to
the debugfs entries:

diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c
index 4f9f844074db..d44fc8a5dab2 100644
--- a/kernel/irq/debugfs.c
+++ b/kernel/irq/debugfs.c
@@ -120,6 +120,9 @@ static const struct irq_bit_descr irqdata_states[] = {
 
        BIT_MASK_DESCR(IRQD_WAKEUP_STATE),
        BIT_MASK_DESCR(IRQD_WAKEUP_ARMED),
+       BIT_MASK_DESCR(IRQD_DEFAULT_TRIGGER_SET),
+       BIT_MASK_DESCR(IRQD_HANDLE_ENFORCE_IRQCTX),
+       BIT_MASK_DESCR(IRQD_AFFINITY_ON_ACTIVATE),
 };
 
 static const struct irq_bit_descr irqdesc_states[] = {

FWIW:

Acked-by: Marc Zyngier <[email protected]>
Tested-by: Marc Zyngier <[email protected]>


        M.

-- 
Without deviation from the norm, progress is not possible.

Reply via email to