Hi,

On Fri, 17 Jul 2020 18:00:02 +0200
Thomas Gleixner <t...@linutronix.de> wrote:

> Setting interrupt affinity on inactive interrupts is inconsistent when
> hierarchical irq domains are enabled. The core code should just store the
> affinity and not call into the irq chip driver for inactive interrupts
> because the chip drivers may not be in a state to handle such requests.
> 
> X86 has a hacky workaround for that but all other irq chips have not which
> causes problems e.g. on GIC V3 ITS.
> 
> Instead of adding more ugly hacks all over the place, solve the problem in
> the core code. If the affinity is set on an inactive interrupt then:
> 
>     - Store it in the irq descriptors affinity mask
>     - Update the effective affinity to reflect that so user space has
>       a consistent view
>     - Don't call into the irq chip driver
> 
> This is the core equivalent of the X86 workaround and works correctly
> because the affinity setting is established in the irq chip when the
> interrupt is activated later on.
> 
> Note, that this is only effective when hierarchical irq domains are enabled
> by the architecture. Doing it unconditionally would break legacy irq chip
> implementations.
> 
> For hierarchial irq domains this works correctly as none of the drivers can
> have a dependency on affinity setting in inactive state by design.
> 
> Remove the X86 workaround as it is not longer required.
> 
> Fixes: 02edee152d6e ("x86/apic/vector: Ignore set_affinity call for inactive 
> interrupts")
> Reported-by: Ali Saidi <alisa...@amazon.com>
> Signed-off-by: Thomas Gleixner <t...@linutronix.de>
> Cc: sta...@vger.kernel.org
> Link: https://lore.kernel.org/r/20200529015501.15771-1-alisa...@amazon.com
> ---
> V2: Fix the fallout for CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK=n (0day)

It seems that this patch breaks perf events on RK3288 because the PMU
interrupts that should be per-cpu are now all on CPU0 so no events are
collected from CPUs 1-3 and those interrupts are killed as spurious
after a few seconds.

I'm seeing this on 4.19.134 and 5.4.53 but as far as I can tell the
relevant code hasn't changed through to next-20200723.  Reverting the
backport of this change fixes the problem.

It looks like what happens is that because the interrupts are not
per-CPU in the hardware, armpmu_request_irq() calls irq_force_affinity()
while the interrupt is deactivated and then request_irq() with
IRQF_PERCPU | IRQF_NOBALANCING.

Now when irq_startup() runs with IRQ_STARTUP_NORMAL, it calls
irq_setup_affinity() which returns early because IRQF_PERCPU and
IRQF_NOBALANCING are set, leaving the interrupt on its original CPU.

At this point /proc/interrupts clearly shows the interrupts occurring on
CPU0 despite /proc/irq/N/effective_affinity and /proc/irq/N/smp_affinity
showing them spread across the cores as expected.

I don't think I understand what's meant to happen well enough to propose
a patch, but hopefully the above explanation explains the problem.


Regards,
John

> ---
>  arch/x86/kernel/apic/vector.c |   22 +++++-----------------
>  kernel/irq/manage.c           |   37 +++++++++++++++++++++++++++++++++++--
>  2 files changed, 40 insertions(+), 19 deletions(-)
> 
> --- a/arch/x86/kernel/apic/vector.c
> +++ b/arch/x86/kernel/apic/vector.c
> @@ -446,12 +446,10 @@ static int x86_vector_activate(struct ir
>       trace_vector_activate(irqd->irq, apicd->is_managed,
>                             apicd->can_reserve, reserve);
>  
> -     /* Nothing to do for fixed assigned vectors */
> -     if (!apicd->can_reserve && !apicd->is_managed)
> -             return 0;
> -
>       raw_spin_lock_irqsave(&vector_lock, flags);
> -     if (reserve || irqd_is_managed_and_shutdown(irqd))
> +     if (!apicd->can_reserve && !apicd->is_managed)
> +             assign_irq_vector_any_locked(irqd);
> +     else if (reserve || irqd_is_managed_and_shutdown(irqd))
>               vector_assign_managed_shutdown(irqd);
>       else if (apicd->is_managed)
>               ret = activate_managed(irqd);
> @@ -775,20 +773,10 @@ void lapic_offline(void)
>  static int apic_set_affinity(struct irq_data *irqd,
>                            const struct cpumask *dest, bool force)
>  {
> -     struct apic_chip_data *apicd = apic_chip_data(irqd);
>       int err;
>  
> -     /*
> -      * Core code can call here for inactive interrupts. For inactive
> -      * interrupts which use managed or reservation mode there is no
> -      * point in going through the vector assignment right now as the
> -      * activation will assign a vector which fits the destination
> -      * cpumask. Let the core code store the destination mask and be
> -      * done with it.
> -      */
> -     if (!irqd_is_activated(irqd) &&
> -         (apicd->is_managed || apicd->can_reserve))
> -             return IRQ_SET_MASK_OK;
> +     if (WARN_ON_ONCE(!irqd_is_activated(irqd)))
> +             return -EIO;
>  
>       raw_spin_lock(&vector_lock);
>       cpumask_and(vector_searchmask, dest, cpu_online_mask);
> --- a/kernel/irq/manage.c
> +++ b/kernel/irq/manage.c
> @@ -195,9 +195,9 @@ void irq_set_thread_affinity(struct irq_
>                       set_bit(IRQTF_AFFINITY, &action->thread_flags);
>  }
>  
> +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
>  static void irq_validate_effective_affinity(struct irq_data *data)
>  {
> -#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
>       const struct cpumask *m = irq_data_get_effective_affinity_mask(data);
>       struct irq_chip *chip = irq_data_get_irq_chip(data);
>  
> @@ -205,9 +205,19 @@ static void irq_validate_effective_affin
>               return;
>       pr_warn_once("irq_chip %s did not update eff. affinity mask of irq 
> %u\n",
>                    chip->name, data->irq);
> -#endif
>  }
>  
> +static inline void irq_init_effective_affinity(struct irq_data *data,
> +                                            const struct cpumask *mask)
> +{
> +     cpumask_copy(irq_data_get_effective_affinity_mask(data), mask);
> +}
> +#else
> +static inline void irq_validate_effective_affinity(struct irq_data *data) { }
> +static inline void irq_init_effective_affinity(struct irq_data *data,
> +                                            const struct cpumask *mask) { }
> +#endif
> +
>  int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
>                       bool force)
>  {
> @@ -304,6 +314,26 @@ static int irq_try_set_affinity(struct i
>       return ret;
>  }
>  
> +static bool irq_set_affinity_deactivated(struct irq_data *data,
> +                                      const struct cpumask *mask, bool force)
> +{
> +     struct irq_desc *desc = irq_data_to_desc(data);
> +
> +     /*
> +      * If the interrupt is not yet activated, just store the affinity
> +      * mask and do not call the chip driver at all. On activation the
> +      * driver has to make sure anyway that the interrupt is in a
> +      * useable state so startup works.
> +      */
> +     if (!IS_ENABLED(CONFIG_IRQ_DOMAIN_HIERARCHY) || irqd_is_activated(data))
> +             return false;
> +
> +     cpumask_copy(desc->irq_common_data.affinity, mask);
> +     irq_init_effective_affinity(data, mask);
> +     irqd_set(data, IRQD_AFFINITY_SET);
> +     return true;
> +}
> +
>  int irq_set_affinity_locked(struct irq_data *data, const struct cpumask 
> *mask,
>                           bool force)
>  {
> @@ -314,6 +344,9 @@ int irq_set_affinity_locked(struct irq_d
>       if (!chip || !chip->irq_set_affinity)
>               return -EINVAL;
>  
> +     if (irq_set_affinity_deactivated(data, mask, force))
> +             return 0;
> +
>       if (irq_can_move_pcntxt(data) && !irqd_is_setaffinity_pending(data)) {
>               ret = irq_try_set_affinity(data, mask, force);
>       } else {

Reply via email to