Add a separate local APIC NMI controller to handle NMIs apart from the
regular APIC management.

This controller will be used to handle the NMI vector of the HPET NMI
watchdog.

Cc: Andi Kleen <a...@linux.intel.com>
Cc: "Ravi V. Shankar" <ravi.v.shan...@intel.com>
Cc: Stephane Eranian <eran...@google.com>
Cc: io...@lists.linux-foundation.org
Cc: linuxppc-dev@lists.ozlabs.org
Suggested-by: Thomas Gleixner <t...@linutronix.de>
Signed-off-by: Ricardo Neri <ricardo.neri-calde...@linux.intel.com>
---
Changes since v6:
 * Reworked patch "x86/apic/vector: Implement support for NMI delivery
   mode" into a separate local APIC NMI controller. (Thomas)

Changes since v5:
 * Introduced this patch.

Changes since v4:
 * N/A

Changes since v3:
 * N/A

Changes since v2:
 * N/A

Changes since v1:
 * N/A
---
 arch/x86/kernel/apic/vector.c | 57 +++++++++++++++++++++++++++++++++++
 include/linux/irq.h           |  5 +++
 2 files changed, 62 insertions(+)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 633b442c8f84..a4cf041427cb 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -42,6 +42,7 @@ EXPORT_SYMBOL_GPL(x86_vector_domain);
 static DEFINE_RAW_SPINLOCK(vector_lock);
 static cpumask_var_t vector_searchmask;
 static struct irq_chip lapic_controller;
+static struct irq_chip lapic_nmi_controller;
 static struct irq_matrix *vector_matrix;
 #ifdef CONFIG_SMP
 static DEFINE_PER_CPU(struct hlist_head, cleanup_list);
@@ -451,6 +452,10 @@ static int x86_vector_activate(struct irq_domain *dom, 
struct irq_data *irqd,
        trace_vector_activate(irqd->irq, apicd->is_managed,
                              apicd->can_reserve, reserve);
 
+       /* NMI has a fixed vector. No vector management required */
+       if (apicd->hw_irq_cfg.delivery_mode == APIC_DELIVERY_MODE_NMI)
+               return 0;
+
        raw_spin_lock_irqsave(&vector_lock, flags);
        if (!apicd->can_reserve && !apicd->is_managed)
                assign_irq_vector_any_locked(irqd);
@@ -472,6 +477,10 @@ static void vector_free_reserved_and_managed(struct 
irq_data *irqd)
        trace_vector_teardown(irqd->irq, apicd->is_managed,
                              apicd->has_reserved);
 
+       /* NMI has a fixed vector. No vector management required */
+       if (apicd->hw_irq_cfg.delivery_mode == APIC_DELIVERY_MODE_NMI)
+               return;
+
        if (apicd->has_reserved)
                irq_matrix_remove_reserved(vector_matrix);
        if (apicd->is_managed)
@@ -539,6 +548,10 @@ static int x86_vector_alloc_irqs(struct irq_domain 
*domain, unsigned int virq,
        if (disable_apic)
                return -ENXIO;
 
+       /* Only one IRQ per NMI */
+       if ((info->flags & X86_IRQ_ALLOC_AS_NMI) && nr_irqs != 1)
+               return -EINVAL;
+
        /*
         * Catch any attempt to touch the cascade interrupt on a PIC
         * equipped system.
@@ -573,6 +586,25 @@ static int x86_vector_alloc_irqs(struct irq_domain 
*domain, unsigned int virq,
                /* Don't invoke affinity setter on deactivated interrupts */
                irqd_set_affinity_on_activate(irqd);
 
+               if (info->flags & X86_IRQ_ALLOC_AS_NMI) {
+                       /*
+                        * NMIs have a fixed vector and need their own
+                        * interrupt chip so nothing can end up in the
+                        * regular local APIC management code except the
+                        * MSI message composing callback.
+                        */
+                       apicd->hw_irq_cfg.delivery_mode = 
APIC_DELIVERY_MODE_NMI;
+                       irqd->chip = &lapic_nmi_controller;
+                       /*
+                        * Exclude NMIs from balancing. This cannot work with
+                        * the regular affinity mechanisms. The local APIC NMI
+                        * controller provides a set_affinity() callback for the
+                        * intended HPET NMI watchdog use case.
+                        */
+                       irqd_set_no_balance(irqd);
+                       return 0;
+               }
+
                /*
                 * A delivery mode may be specified in the interrupt allocation
                 * info. If not, use the delivery mode of the APIC.
@@ -872,8 +904,27 @@ static int apic_set_affinity(struct irq_data *irqd,
        return err ? err : IRQ_SET_MASK_OK;
 }
 
+static int apic_nmi_set_affinity(struct irq_data *irqd,
+                                const struct cpumask *dest, bool force)
+{
+       struct apic_chip_data *apicd = apic_chip_data(irqd);
+       static struct cpumask tmp_mask;
+       int cpu;
+
+       cpumask_and(&tmp_mask, dest, cpu_online_mask);
+       if (cpumask_empty(&tmp_mask))
+               return -ENODEV;
+
+       cpu = cpumask_first(&tmp_mask);
+       apicd->hw_irq_cfg.dest_apicid = apic->calc_dest_apicid(cpu);
+       irq_data_update_effective_affinity(irqd, cpumask_of(cpu));
+
+       return IRQ_SET_MASK_OK;
+}
+
 #else
 # define apic_set_affinity     NULL
+# define apic_nmi_set_affinity NULL
 #endif
 
 static int apic_retrigger_irq(struct irq_data *irqd)
@@ -914,6 +965,12 @@ static struct irq_chip lapic_controller = {
        .irq_retrigger          = apic_retrigger_irq,
 };
 
+static struct irq_chip lapic_nmi_controller = {
+       .name                   = "APIC-NMI",
+       .irq_set_affinity       = apic_nmi_set_affinity,
+       .irq_compose_msi_msg    = x86_vector_msi_compose_msg,
+};
+
 #ifdef CONFIG_SMP
 
 static void free_moved_vector(struct apic_chip_data *apicd)
diff --git a/include/linux/irq.h b/include/linux/irq.h
index b1b28affb32a..c8738b36e316 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -263,6 +263,11 @@ static inline bool irqd_is_per_cpu(struct irq_data *d)
        return __irqd_to_state(d) & IRQD_PER_CPU;
 }
 
+static inline void irqd_set_no_balance(struct irq_data *d)
+{
+       __irqd_to_state(d) |= IRQD_NO_BALANCING;
+}
+
 static inline bool irqd_can_balance(struct irq_data *d)
 {
        return !(__irqd_to_state(d) & (IRQD_PER_CPU | IRQD_NO_BALANCING));
-- 
2.25.1

Reply via email to