I noticed when debugging a perf problem on a machine with GHES enabled,
perf seemed slow.  I then realized that the GHES NMI routine was taking
a global lock all the time to inspect the hardware.  This contended
with all the local perf counters which did not need a lock.  So each cpu
accidentally was synchronizing with itself when using perf.

This is because the way the nmi handler works.  It executes all the handlers
registered to a particular subtype (to deal with nmi sharing).  As a result
the GHES handler was executed on every PMI.

Fix this by creating a new nmi type called NMI_EXT, which is used by
handlers that need to probe external hardware and require a global lock
to do so.

Now the main NMI handler can check the internal NMI handlers first and
then the external ones if nothing is found.

This makes perf a little faster again on those machines with GHES enabled.

Signed-off-by: Don Zickus <dzic...@redhat.com>
---
 arch/x86/include/asm/nmi.h |    1 +
 arch/x86/kernel/nmi.c      |   41 ++++++++++++++++++++++++++---------------
 drivers/acpi/apei/ghes.c   |    4 ++--
 drivers/watchdog/hpwdt.c   |   10 ++--------
 4 files changed, 31 insertions(+), 25 deletions(-)

diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 5f2fc44..8631a49 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -25,6 +25,7 @@ extern int unknown_nmi_panic;
 
 enum {
        NMI_LOCAL=0,
+       NMI_EXT,
        NMI_UNKNOWN,
        NMI_SERR,
        NMI_IO_CHECK,
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 7b17864..b7c6f6b 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -34,29 +34,32 @@
 #include <trace/events/nmi.h>
 
 struct nmi_desc {
-       spinlock_t lock;
-       struct list_head head;
+       spinlock_t              lock;
+       struct list_head        head;
 };
 
 static struct nmi_desc nmi_desc[NMI_MAX] = 
 {
-       {
-               .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[0].lock),
-               .head = LIST_HEAD_INIT(nmi_desc[0].head),
+       [NMI_LOCAL] = {
+               .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[NMI_LOCAL].lock),
+               .head = LIST_HEAD_INIT(nmi_desc[NMI_LOCAL].head),
        },
-       {
-               .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock),
-               .head = LIST_HEAD_INIT(nmi_desc[1].head),
+       [NMI_EXT] = {
+               .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[NMI_EXT].lock),
+               .head = LIST_HEAD_INIT(nmi_desc[NMI_EXT].head),
        },
-       {
-               .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[2].lock),
-               .head = LIST_HEAD_INIT(nmi_desc[2].head),
+       [NMI_UNKNOWN] = {
+               .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[NMI_UNKNOWN].lock),
+               .head = LIST_HEAD_INIT(nmi_desc[NMI_UNKNOWN].head),
        },
-       {
-               .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[3].lock),
-               .head = LIST_HEAD_INIT(nmi_desc[3].head),
+       [NMI_SERR] = {
+               .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[NMI_SERR].lock),
+               .head = LIST_HEAD_INIT(nmi_desc[NMI_SERR].head),
+       },
+       [NMI_IO_CHECK] = {
+               .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[NMI_IO_CHECK].lock),
+               .head = LIST_HEAD_INIT(nmi_desc[NMI_IO_CHECK].head),
        },
-
 };
 
 struct nmi_stats {
@@ -428,8 +431,16 @@ static __kprobes void default_do_nmi(struct pt_regs *regs)
 
        __this_cpu_write(last_nmi_rip, regs->ip);
 
+       /* try local NMIs first */
        handled = nmi_handle(NMI_LOCAL, regs, b2b);
        __this_cpu_add(nmi_stats.normal, handled);
+
+       /* if unclaimed, try external NMIs next */
+       if (!handled) {
+               handled = nmi_handle(NMI_EXT, regs, b2b);
+               __this_cpu_add(nmi_stats.external, handled);
+       }
+
        if (handled) {
                /*
                 * There are cases when a NMI handler handles multiple
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index dab7cb7..8b78bf5 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -976,7 +976,7 @@ static int ghes_probe(struct platform_device *ghes_dev)
                ghes_estatus_pool_expand(len);
                mutex_lock(&ghes_list_mutex);
                if (list_empty(&ghes_nmi))
-                       register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0,
+                       register_nmi_handler(NMI_EXT, ghes_notify_nmi, 0,
                                                "ghes");
                list_add_rcu(&ghes->list, &ghes_nmi);
                mutex_unlock(&ghes_list_mutex);
@@ -1025,7 +1025,7 @@ static int ghes_remove(struct platform_device *ghes_dev)
                mutex_lock(&ghes_list_mutex);
                list_del_rcu(&ghes->list);
                if (list_empty(&ghes_nmi))
-                       unregister_nmi_handler(NMI_LOCAL, "ghes");
+                       unregister_nmi_handler(NMI_EXT, "ghes");
                mutex_unlock(&ghes_list_mutex);
                /*
                 * To synchronize with NMI handler, ghes can only be
diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
index 75d2243..dd70ee7 100644
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -736,12 +736,9 @@ static int hpwdt_init_nmi_decoding(struct pci_dev *dev)
        retval = register_nmi_handler(NMI_UNKNOWN, hpwdt_pretimeout, 0, 
"hpwdt");
        if (retval)
                goto error;
-       retval = register_nmi_handler(NMI_SERR, hpwdt_pretimeout, 0, "hpwdt");
+       retval = register_nmi_handler(NMI_EXT, hpwdt_pretimeout, 0, "hpwdt");
        if (retval)
                goto error1;
-       retval = register_nmi_handler(NMI_IO_CHECK, hpwdt_pretimeout, 0, 
"hpwdt");
-       if (retval)
-               goto error2;
 
        dev_info(&dev->dev,
                        "HP Watchdog Timer Driver: NMI decoding initialized"
@@ -749,8 +746,6 @@ static int hpwdt_init_nmi_decoding(struct pci_dev *dev)
                        (allow_kdump == 0) ? "OFF" : "ON");
        return 0;
 
-error2:
-       unregister_nmi_handler(NMI_SERR, "hpwdt");
 error1:
        unregister_nmi_handler(NMI_UNKNOWN, "hpwdt");
 error:
@@ -765,8 +760,7 @@ error:
 static void hpwdt_exit_nmi_decoding(void)
 {
        unregister_nmi_handler(NMI_UNKNOWN, "hpwdt");
-       unregister_nmi_handler(NMI_SERR, "hpwdt");
-       unregister_nmi_handler(NMI_IO_CHECK, "hpwdt");
+       unregister_nmi_handler(NMI_EXT, "hpwdt");
        if (cru_rom_addr)
                iounmap(cru_rom_addr);
 }
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to