[tip:ras/core] x86/mce: Avoid potential deadlock due to printk() in MCE context

2015-08-13 Thread tip-bot for Chen, Gong
Commit-ID:  f29a7aff4bd60ebc3da4982f80144a4158c4c74a
Gitweb: http://git.kernel.org/tip/f29a7aff4bd60ebc3da4982f80144a4158c4c74a
Author: Chen, Gong 
AuthorDate: Wed, 12 Aug 2015 18:29:37 +0200
Committer:  Ingo Molnar 
CommitDate: Thu, 13 Aug 2015 10:12:51 +0200

x86/mce: Avoid potential deadlock due to printk() in MCE context

Printing in MCE context is a no-no, currently, as printk() is
not NMI-safe. If some of the notifiers on the MCE chain call do
so, we may deadlock. In order to avoid that, delay printk() to
process context where it is safe.

Reported-by: Xie XiuQi 
Signed-off-by: Chen, Gong 
[ Fold in subsequent patch from Boris for early boot logging. ]
Signed-off-by: Tony Luck 
[ Kick irq_work in mce_log() directly. ]
Signed-off-by: Borislav Petkov 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Link: http://lkml.kernel.org/r/1439396985-12812-6-git-send-email...@alien8.de
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/cpu/mcheck/mce-apei.c  | 1 -
 arch/x86/kernel/cpu/mcheck/mce.c   | 4 ++--
 arch/x86/kernel/cpu/mcheck/mce_intel.c | 1 -
 3 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c 
b/arch/x86/kernel/cpu/mcheck/mce-apei.c
index a1aef95..34c89a3 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-apei.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c
@@ -57,7 +57,6 @@ void apei_mce_report_mem_error(int severity, struct 
cper_sec_mem_err *mem_err)
 
m.addr = mem_err->physical_addr;
mce_log(&m);
-   mce_notify_irq();
 }
 EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 8260369..9568bb5 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -159,7 +159,8 @@ void mce_log(struct mce *mce)
/* Emit the trace record: */
trace_mce_record(mce);
 
-   atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
+   if (!mce_gen_pool_add(mce))
+   irq_work_queue(&mce_irq_work);
 
mce->finished = 0;
wmb();
@@ -1122,7 +1123,6 @@ void do_machine_check(struct pt_regs *regs, long 
error_code)
/* assuming valid severity level != 0 */
m.severity = severity;
m.usable_addr = mce_usable_address(&m);
-   mce_gen_pool_add(&m);
 
mce_log(&m);
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c 
b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 844f56c..70f567f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -246,7 +246,6 @@ static void intel_threshold_interrupt(void)
return;
 
machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
-   mce_notify_irq();
 }
 
 /*
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:ras/core] x86/mce: Remove the MCE ring for Action Optional errors

2015-08-13 Thread tip-bot for Chen, Gong
Commit-ID:  fd4cf79fcc4b5130ced8fd8c40378d3cec2e5fa8
Gitweb: http://git.kernel.org/tip/fd4cf79fcc4b5130ced8fd8c40378d3cec2e5fa8
Author: Chen, Gong 
AuthorDate: Wed, 12 Aug 2015 18:29:36 +0200
Committer:  Ingo Molnar 
CommitDate: Thu, 13 Aug 2015 10:12:51 +0200

x86/mce: Remove the MCE ring for Action Optional errors

Use unified genpool to save Action Optional error events and put
Action Optional error handling in the same notification chain as
MCE error decoding.

Signed-off-by: Chen, Gong 
[ Fold in subsequent patch from Boris for early boot logging. ]
Signed-off-by: Tony Luck 
[ Correct a lot. ]
Signed-off-by: Borislav Petkov 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Link: http://lkml.kernel.org/r/1439396985-12812-5-git-send-email...@alien8.de
Signed-off-by: Ingo Molnar 
---
 arch/x86/include/asm/mce.h   |   2 +-
 arch/x86/kernel/cpu/mcheck/mce.c | 135 +--
 drivers/acpi/acpi_extlog.c   |   2 +-
 drivers/edac/i7core_edac.c   |   2 +-
 drivers/edac/mce_amd.c   |   2 +-
 drivers/edac/sb_edac.c   |   2 +-
 6 files changed, 65 insertions(+), 80 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 982dfc3..dfaa4de 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -140,7 +140,7 @@ struct mce_vendor_flags {
 extern struct mce_vendor_flags mce_flags;
 
 extern struct mca_config mca_cfg;
-extern void mce_register_decode_chain(struct notifier_block *nb);
+extern void mce_register_decode_chain(struct notifier_block *nb, bool drain);
 extern void mce_unregister_decode_chain(struct notifier_block *nb);
 
 #include 
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 456f8d7..8260369 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -114,6 +114,7 @@ static struct work_struct mce_work;
 static struct irq_work mce_irq_work;
 
 static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
+static int mce_usable_address(struct mce *m);
 
 /*
  * CPU/chipset specific EDAC code can register a notifier call here to print
@@ -234,11 +235,18 @@ static void drain_mcelog_buffer(void)
} while (next != prev);
 }
 
+static struct notifier_block mce_srao_nb;
 
-void mce_register_decode_chain(struct notifier_block *nb)
+void mce_register_decode_chain(struct notifier_block *nb, bool drain)
 {
+   /* Ensure SRAO notifier has the highest priority in the decode chain. */
+   if (nb != &mce_srao_nb && nb->priority == INT_MAX)
+   nb->priority -= 1;
+
atomic_notifier_chain_register(&x86_mce_decoder_chain, nb);
-   drain_mcelog_buffer();
+
+   if (drain)
+   drain_mcelog_buffer();
 }
 EXPORT_SYMBOL_GPL(mce_register_decode_chain);
 
@@ -462,61 +470,6 @@ static inline void mce_gather_info(struct mce *m, struct 
pt_regs *regs)
}
 }
 
-/*
- * Simple lockless ring to communicate PFNs from the exception handler with the
- * process context work function. This is vastly simplified because there's
- * only a single reader and a single writer.
- */
-#define MCE_RING_SIZE 16   /* we use one entry less */
-
-struct mce_ring {
-   unsigned short start;
-   unsigned short end;
-   unsigned long ring[MCE_RING_SIZE];
-};
-static DEFINE_PER_CPU(struct mce_ring, mce_ring);
-
-/* Runs with CPU affinity in workqueue */
-static int mce_ring_empty(void)
-{
-   struct mce_ring *r = this_cpu_ptr(&mce_ring);
-
-   return r->start == r->end;
-}
-
-static int mce_ring_get(unsigned long *pfn)
-{
-   struct mce_ring *r;
-   int ret = 0;
-
-   *pfn = 0;
-   get_cpu();
-   r = this_cpu_ptr(&mce_ring);
-   if (r->start == r->end)
-   goto out;
-   *pfn = r->ring[r->start];
-   r->start = (r->start + 1) % MCE_RING_SIZE;
-   ret = 1;
-out:
-   put_cpu();
-   return ret;
-}
-
-/* Always runs in MCE context with preempt off */
-static int mce_ring_add(unsigned long pfn)
-{
-   struct mce_ring *r = this_cpu_ptr(&mce_ring);
-   unsigned next;
-
-   next = (r->end + 1) % MCE_RING_SIZE;
-   if (next == r->start)
-   return -1;
-   r->ring[r->end] = pfn;
-   wmb();
-   r->end = next;
-   return 0;
-}
-
 int mce_available(struct cpuinfo_x86 *c)
 {
if (mca_cfg.disabled)
@@ -526,7 +479,7 @@ int mce_available(struct cpuinfo_x86 *c)
 
 static void mce_schedule_work(void)
 {
-   if (!mce_ring_empty())
+   if (!mce_gen_pool_empty() && keventd_up())
schedule_work(&mce_work);
 }
 
@@ -553,6 +506,27 @@ static void mce_report_event(struct pt_regs *regs)
irq_work_queue(&mce_irq_work);
 }
 
+static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
+   void *data)
+{
+   struct mce *mce = (struct mce *)data;
+   unsigned long pfn;
+
+   if (!mce)
+   return 

[tip:ras/core] x86/mce: Provide a lockless memory pool to save error records

2015-08-13 Thread tip-bot for Chen, Gong
Commit-ID:  648ed94038c030245a06e4be59744fd5cdc18c40
Gitweb: http://git.kernel.org/tip/648ed94038c030245a06e4be59744fd5cdc18c40
Author: Chen, Gong 
AuthorDate: Wed, 12 Aug 2015 18:29:34 +0200
Committer:  Ingo Molnar 
CommitDate: Thu, 13 Aug 2015 10:12:50 +0200

x86/mce: Provide a lockless memory pool to save error records

printk() is not safe to use in MCE context. Add a lockless
memory allocator pool to save error records in MCE context.
Those records will be issued later, in a printk-safe context.
The idea is inspired by the APEI/GHES driver.

We're very conservative and allocate only two pages for it but
since we're going to use those pages throughout the system's
lifetime, we allocate them statically to avoid early boot time
allocation woes.

Signed-off-by: Chen, Gong 
[ Rewrite. ]
Signed-off-by: Borislav Petkov 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: Tony Luck 
Link: http://lkml.kernel.org/r/1439396985-12812-3-git-send-email...@alien8.de
Signed-off-by: Ingo Molnar 
---
 arch/x86/Kconfig  |  1 +
 arch/x86/kernel/cpu/mcheck/Makefile   |  2 +-
 arch/x86/kernel/cpu/mcheck/mce-genpool.c  | 99 +++
 arch/x86/kernel/cpu/mcheck/mce-internal.h | 12 
 arch/x86/kernel/cpu/mcheck/mce.c  |  8 ++-
 5 files changed, 120 insertions(+), 2 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b3a1a5d..06dbb5d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -955,6 +955,7 @@ config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
 
 config X86_MCE
bool "Machine Check / overheating reporting"
+   select GENERIC_ALLOCATOR
default y
---help---
  Machine Check support allows the processor to notify the
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile 
b/arch/x86/kernel/cpu/mcheck/Makefile
index bb34b03..a3311c8 100644
--- a/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -1,4 +1,4 @@
-obj-y  =  mce.o mce-severity.o
+obj-y  =  mce.o mce-severity.o mce-genpool.o
 
 obj-$(CONFIG_X86_ANCIENT_MCE)  += winchip.o p5.o
 obj-$(CONFIG_X86_MCE_INTEL)+= mce_intel.o
diff --git a/arch/x86/kernel/cpu/mcheck/mce-genpool.c 
b/arch/x86/kernel/cpu/mcheck/mce-genpool.c
new file mode 100644
index 000..0a85010
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/mce-genpool.c
@@ -0,0 +1,99 @@
+/*
+ * MCE event pool management in MCE context
+ *
+ * Copyright (C) 2015 Intel Corp.
+ * Author: Chen, Gong 
+ *
+ * This file is licensed under GPLv2.
+ */
+#include 
+#include 
+#include 
+#include 
+#include "mce-internal.h"
+
+/*
+ * printk() is not safe in MCE context. This is a lock-less memory allocator
+ * used to save error information organized in a lock-less list.
+ *
+ * This memory pool is only to be used to save MCE records in MCE context.
+ * MCE events are rare, so a fixed size memory pool should be enough. Use
+ * 2 pages to save MCE events for now (~80 MCE records at most).
+ */
+#define MCE_POOLSZ (2 * PAGE_SIZE)
+
+static struct gen_pool *mce_evt_pool;
+static LLIST_HEAD(mce_event_llist);
+static char gen_pool_buf[MCE_POOLSZ];
+
+void mce_gen_pool_process(void)
+{
+   struct llist_node *head;
+   struct mce_evt_llist *node;
+   struct mce *mce;
+
+   head = llist_del_all(&mce_event_llist);
+   if (!head)
+   return;
+
+   head = llist_reverse_order(head);
+   llist_for_each_entry(node, head, llnode) {
+   mce = &node->mce;
+   atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
+   gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node));
+   }
+}
+
+bool mce_gen_pool_empty(void)
+{
+   return llist_empty(&mce_event_llist);
+}
+
+int mce_gen_pool_add(struct mce *mce)
+{
+   struct mce_evt_llist *node;
+
+   if (!mce_evt_pool)
+   return -EINVAL;
+
+   node = (void *)gen_pool_alloc(mce_evt_pool, sizeof(*node));
+   if (!node) {
+   pr_warn_ratelimited("MCE records pool full!\n");
+   return -ENOMEM;
+   }
+
+   memcpy(&node->mce, mce, sizeof(*mce));
+   llist_add(&node->llnode, &mce_event_llist);
+
+   return 0;
+}
+
+static int mce_gen_pool_create(void)
+{
+   struct gen_pool *tmpp;
+   int ret = -ENOMEM;
+
+   tmpp = gen_pool_create(ilog2(sizeof(struct mce_evt_llist)), -1);
+   if (!tmpp)
+   goto out;
+
+   ret = gen_pool_add(tmpp, (unsigned long)gen_pool_buf, MCE_POOLSZ, -1);
+   if (ret) {
+   gen_pool_destroy(tmpp);
+   goto out;
+   }
+
+   mce_evt_pool = tmpp;
+
+out:
+   return ret;
+}
+
+int mce_gen_pool_init(void)
+{
+   /* Just init mce_gen_pool once. */
+   if (mce_evt_pool)
+   return 0;
+
+   return mce_gen_pool_create();
+}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h 
b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index fe32074

[tip:ras/core] x86/mce: Don't use percpu workqueues

2015-08-13 Thread tip-bot for Chen, Gong
Commit-ID:  061120aed7081b9a4393fbe07b558192f40ad911
Gitweb: http://git.kernel.org/tip/061120aed7081b9a4393fbe07b558192f40ad911
Author: Chen, Gong 
AuthorDate: Wed, 12 Aug 2015 18:29:35 +0200
Committer:  Ingo Molnar 
CommitDate: Thu, 13 Aug 2015 10:12:51 +0200

x86/mce: Don't use percpu workqueues

An MCE is a rare event. Therefore, there's no need to have
per-CPU instances of both normal and IRQ workqueues. Make them
both global.

Signed-off-by: Chen, Gong 
[ Fold in subsequent patch from Rui/Boris/Tony for early boot logging. ]
Signed-off-by: Tony Luck 
[ Massage commit message. ]
Signed-off-by: Borislav Petkov 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Link: http://lkml.kernel.org/r/1439396985-12812-4-git-send-email...@alien8.de
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/cpu/mcheck/mce.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index a41c014..456f8d7 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -110,7 +110,8 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
  */
 mce_banks_t mce_banks_ce_disabled;
 
-static DEFINE_PER_CPU(struct work_struct, mce_work);
+static struct work_struct mce_work;
+static struct irq_work mce_irq_work;
 
 static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
 
@@ -526,11 +527,9 @@ int mce_available(struct cpuinfo_x86 *c)
 static void mce_schedule_work(void)
 {
if (!mce_ring_empty())
-   schedule_work(this_cpu_ptr(&mce_work));
+   schedule_work(&mce_work);
 }
 
-static DEFINE_PER_CPU(struct irq_work, mce_irq_work);
-
 static void mce_irq_work_cb(struct irq_work *entry)
 {
mce_notify_irq();
@@ -551,7 +550,7 @@ static void mce_report_event(struct pt_regs *regs)
return;
}
 
-   irq_work_queue(this_cpu_ptr(&mce_irq_work));
+   irq_work_queue(&mce_irq_work);
 }
 
 /*
@@ -1742,8 +1741,6 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
__mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(c);
__mcheck_cpu_init_timer();
-   INIT_WORK(this_cpu_ptr(&mce_work), mce_process_work);
-   init_irq_work(this_cpu_ptr(&mce_irq_work), &mce_irq_work_cb);
 }
 
 /*
@@ -2064,6 +2061,9 @@ int __init mcheck_init(void)
mcheck_intel_therm_init();
mcheck_vendor_init_severity();
 
+   INIT_WORK(&mce_work, mce_process_work);
+   init_irq_work(&mce_irq_work, mce_irq_work_cb);
+
return 0;
 }
 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/