[tip:ras/core] x86/mce: Avoid potential deadlock due to printk() in MCE context
Commit-ID: f29a7aff4bd60ebc3da4982f80144a4158c4c74a Gitweb: http://git.kernel.org/tip/f29a7aff4bd60ebc3da4982f80144a4158c4c74a Author: Chen, Gong AuthorDate: Wed, 12 Aug 2015 18:29:37 +0200 Committer: Ingo Molnar CommitDate: Thu, 13 Aug 2015 10:12:51 +0200 x86/mce: Avoid potential deadlock due to printk() in MCE context Printing in MCE context is a no-no, currently, as printk() is not NMI-safe. If some of the notifiers on the MCE chain call do so, we may deadlock. In order to avoid that, delay printk() to process context where it is safe. Reported-by: Xie XiuQi Signed-off-by: Chen, Gong [ Fold in subsequent patch from Boris for early boot logging. ] Signed-off-by: Tony Luck [ Kick irq_work in mce_log() directly. ] Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1439396985-12812-6-git-send-email...@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce-apei.c | 1 - arch/x86/kernel/cpu/mcheck/mce.c | 4 ++-- arch/x86/kernel/cpu/mcheck/mce_intel.c | 1 - 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c index a1aef95..34c89a3 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-apei.c +++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c @@ -57,7 +57,6 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err) m.addr = mem_err->physical_addr; mce_log(&m); - mce_notify_irq(); } EXPORT_SYMBOL_GPL(apei_mce_report_mem_error); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 8260369..9568bb5 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -159,7 +159,8 @@ void mce_log(struct mce *mce) /* Emit the trace record: */ trace_mce_record(mce); - atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce); + if (!mce_gen_pool_add(mce)) + irq_work_queue(&mce_irq_work); mce->finished = 0; wmb(); @@ -1122,7 +1123,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) /* assuming valid severity level != 0 */ m.severity = severity; m.usable_addr = mce_usable_address(&m); - mce_gen_pool_add(&m); mce_log(&m); diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 844f56c..70f567f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -246,7 +246,6 @@ static void intel_threshold_interrupt(void) return; machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); - mce_notify_irq(); } /* -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[tip:ras/core] x86/mce: Remove the MCE ring for Action Optional errors
Commit-ID: fd4cf79fcc4b5130ced8fd8c40378d3cec2e5fa8 Gitweb: http://git.kernel.org/tip/fd4cf79fcc4b5130ced8fd8c40378d3cec2e5fa8 Author: Chen, Gong AuthorDate: Wed, 12 Aug 2015 18:29:36 +0200 Committer: Ingo Molnar CommitDate: Thu, 13 Aug 2015 10:12:51 +0200 x86/mce: Remove the MCE ring for Action Optional errors Use unified genpool to save Action Optional error events and put Action Optional error handling in the same notification chain as MCE error decoding. Signed-off-by: Chen, Gong [ Fold in subsequent patch from Boris for early boot logging. ] Signed-off-by: Tony Luck [ Correct a lot. ] Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1439396985-12812-5-git-send-email...@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mce.h | 2 +- arch/x86/kernel/cpu/mcheck/mce.c | 135 +-- drivers/acpi/acpi_extlog.c | 2 +- drivers/edac/i7core_edac.c | 2 +- drivers/edac/mce_amd.c | 2 +- drivers/edac/sb_edac.c | 2 +- 6 files changed, 65 insertions(+), 80 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 982dfc3..dfaa4de 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -140,7 +140,7 @@ struct mce_vendor_flags { extern struct mce_vendor_flags mce_flags; extern struct mca_config mca_cfg; -extern void mce_register_decode_chain(struct notifier_block *nb); +extern void mce_register_decode_chain(struct notifier_block *nb, bool drain); extern void mce_unregister_decode_chain(struct notifier_block *nb); #include diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 456f8d7..8260369 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -114,6 +114,7 @@ static struct work_struct mce_work; static struct irq_work mce_irq_work; static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); +static int mce_usable_address(struct mce *m); /* * CPU/chipset specific EDAC code can register a notifier call here to print @@ -234,11 +235,18 @@ static void drain_mcelog_buffer(void) } while (next != prev); } +static struct notifier_block mce_srao_nb; -void mce_register_decode_chain(struct notifier_block *nb) +void mce_register_decode_chain(struct notifier_block *nb, bool drain) { + /* Ensure SRAO notifier has the highest priority in the decode chain. */ + if (nb != &mce_srao_nb && nb->priority == INT_MAX) + nb->priority -= 1; + atomic_notifier_chain_register(&x86_mce_decoder_chain, nb); - drain_mcelog_buffer(); + + if (drain) + drain_mcelog_buffer(); } EXPORT_SYMBOL_GPL(mce_register_decode_chain); @@ -462,61 +470,6 @@ static inline void mce_gather_info(struct mce *m, struct pt_regs *regs) } } -/* - * Simple lockless ring to communicate PFNs from the exception handler with the - * process context work function. This is vastly simplified because there's - * only a single reader and a single writer. - */ -#define MCE_RING_SIZE 16 /* we use one entry less */ - -struct mce_ring { - unsigned short start; - unsigned short end; - unsigned long ring[MCE_RING_SIZE]; -}; -static DEFINE_PER_CPU(struct mce_ring, mce_ring); - -/* Runs with CPU affinity in workqueue */ -static int mce_ring_empty(void) -{ - struct mce_ring *r = this_cpu_ptr(&mce_ring); - - return r->start == r->end; -} - -static int mce_ring_get(unsigned long *pfn) -{ - struct mce_ring *r; - int ret = 0; - - *pfn = 0; - get_cpu(); - r = this_cpu_ptr(&mce_ring); - if (r->start == r->end) - goto out; - *pfn = r->ring[r->start]; - r->start = (r->start + 1) % MCE_RING_SIZE; - ret = 1; -out: - put_cpu(); - return ret; -} - -/* Always runs in MCE context with preempt off */ -static int mce_ring_add(unsigned long pfn) -{ - struct mce_ring *r = this_cpu_ptr(&mce_ring); - unsigned next; - - next = (r->end + 1) % MCE_RING_SIZE; - if (next == r->start) - return -1; - r->ring[r->end] = pfn; - wmb(); - r->end = next; - return 0; -} - int mce_available(struct cpuinfo_x86 *c) { if (mca_cfg.disabled) @@ -526,7 +479,7 @@ int mce_available(struct cpuinfo_x86 *c) static void mce_schedule_work(void) { - if (!mce_ring_empty()) + if (!mce_gen_pool_empty() && keventd_up()) schedule_work(&mce_work); } @@ -553,6 +506,27 @@ static void mce_report_event(struct pt_regs *regs) irq_work_queue(&mce_irq_work); } +static int srao_decode_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct mce *mce = (struct mce *)data; + unsigned long pfn; + + if (!mce) + return
[tip:ras/core] x86/mce: Provide a lockless memory pool to save error records
Commit-ID: 648ed94038c030245a06e4be59744fd5cdc18c40 Gitweb: http://git.kernel.org/tip/648ed94038c030245a06e4be59744fd5cdc18c40 Author: Chen, Gong AuthorDate: Wed, 12 Aug 2015 18:29:34 +0200 Committer: Ingo Molnar CommitDate: Thu, 13 Aug 2015 10:12:50 +0200 x86/mce: Provide a lockless memory pool to save error records printk() is not safe to use in MCE context. Add a lockless memory allocator pool to save error records in MCE context. Those records will be issued later, in a printk-safe context. The idea is inspired by the APEI/GHES driver. We're very conservative and allocate only two pages for it but since we're going to use those pages throughout the system's lifetime, we allocate them statically to avoid early boot time allocation woes. Signed-off-by: Chen, Gong [ Rewrite. ] Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Link: http://lkml.kernel.org/r/1439396985-12812-3-git-send-email...@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 + arch/x86/kernel/cpu/mcheck/Makefile | 2 +- arch/x86/kernel/cpu/mcheck/mce-genpool.c | 99 +++ arch/x86/kernel/cpu/mcheck/mce-internal.h | 12 arch/x86/kernel/cpu/mcheck/mce.c | 8 ++- 5 files changed, 120 insertions(+), 2 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b3a1a5d..06dbb5d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -955,6 +955,7 @@ config X86_REROUTE_FOR_BROKEN_BOOT_IRQS config X86_MCE bool "Machine Check / overheating reporting" + select GENERIC_ALLOCATOR default y ---help--- Machine Check support allows the processor to notify the diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile index bb34b03..a3311c8 100644 --- a/arch/x86/kernel/cpu/mcheck/Makefile +++ b/arch/x86/kernel/cpu/mcheck/Makefile @@ -1,4 +1,4 @@ -obj-y = mce.o mce-severity.o +obj-y = mce.o mce-severity.o mce-genpool.o obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o obj-$(CONFIG_X86_MCE_INTEL)+= mce_intel.o diff --git a/arch/x86/kernel/cpu/mcheck/mce-genpool.c b/arch/x86/kernel/cpu/mcheck/mce-genpool.c new file mode 100644 index 000..0a85010 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce-genpool.c @@ -0,0 +1,99 @@ +/* + * MCE event pool management in MCE context + * + * Copyright (C) 2015 Intel Corp. + * Author: Chen, Gong + * + * This file is licensed under GPLv2. + */ +#include +#include +#include +#include +#include "mce-internal.h" + +/* + * printk() is not safe in MCE context. This is a lock-less memory allocator + * used to save error information organized in a lock-less list. + * + * This memory pool is only to be used to save MCE records in MCE context. + * MCE events are rare, so a fixed size memory pool should be enough. Use + * 2 pages to save MCE events for now (~80 MCE records at most). + */ +#define MCE_POOLSZ (2 * PAGE_SIZE) + +static struct gen_pool *mce_evt_pool; +static LLIST_HEAD(mce_event_llist); +static char gen_pool_buf[MCE_POOLSZ]; + +void mce_gen_pool_process(void) +{ + struct llist_node *head; + struct mce_evt_llist *node; + struct mce *mce; + + head = llist_del_all(&mce_event_llist); + if (!head) + return; + + head = llist_reverse_order(head); + llist_for_each_entry(node, head, llnode) { + mce = &node->mce; + atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce); + gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node)); + } +} + +bool mce_gen_pool_empty(void) +{ + return llist_empty(&mce_event_llist); +} + +int mce_gen_pool_add(struct mce *mce) +{ + struct mce_evt_llist *node; + + if (!mce_evt_pool) + return -EINVAL; + + node = (void *)gen_pool_alloc(mce_evt_pool, sizeof(*node)); + if (!node) { + pr_warn_ratelimited("MCE records pool full!\n"); + return -ENOMEM; + } + + memcpy(&node->mce, mce, sizeof(*mce)); + llist_add(&node->llnode, &mce_event_llist); + + return 0; +} + +static int mce_gen_pool_create(void) +{ + struct gen_pool *tmpp; + int ret = -ENOMEM; + + tmpp = gen_pool_create(ilog2(sizeof(struct mce_evt_llist)), -1); + if (!tmpp) + goto out; + + ret = gen_pool_add(tmpp, (unsigned long)gen_pool_buf, MCE_POOLSZ, -1); + if (ret) { + gen_pool_destroy(tmpp); + goto out; + } + + mce_evt_pool = tmpp; + +out: + return ret; +} + +int mce_gen_pool_init(void) +{ + /* Just init mce_gen_pool once. */ + if (mce_evt_pool) + return 0; + + return mce_gen_pool_create(); +} diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index fe32074
[tip:ras/core] x86/mce: Don't use percpu workqueues
Commit-ID: 061120aed7081b9a4393fbe07b558192f40ad911 Gitweb: http://git.kernel.org/tip/061120aed7081b9a4393fbe07b558192f40ad911 Author: Chen, Gong AuthorDate: Wed, 12 Aug 2015 18:29:35 +0200 Committer: Ingo Molnar CommitDate: Thu, 13 Aug 2015 10:12:51 +0200 x86/mce: Don't use percpu workqueues An MCE is a rare event. Therefore, there's no need to have per-CPU instances of both normal and IRQ workqueues. Make them both global. Signed-off-by: Chen, Gong [ Fold in subsequent patch from Rui/Boris/Tony for early boot logging. ] Signed-off-by: Tony Luck [ Massage commit message. ] Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1439396985-12812-4-git-send-email...@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce.c | 14 +++--- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index a41c014..456f8d7 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -110,7 +110,8 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { */ mce_banks_t mce_banks_ce_disabled; -static DEFINE_PER_CPU(struct work_struct, mce_work); +static struct work_struct mce_work; +static struct irq_work mce_irq_work; static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); @@ -526,11 +527,9 @@ int mce_available(struct cpuinfo_x86 *c) static void mce_schedule_work(void) { if (!mce_ring_empty()) - schedule_work(this_cpu_ptr(&mce_work)); + schedule_work(&mce_work); } -static DEFINE_PER_CPU(struct irq_work, mce_irq_work); - static void mce_irq_work_cb(struct irq_work *entry) { mce_notify_irq(); @@ -551,7 +550,7 @@ static void mce_report_event(struct pt_regs *regs) return; } - irq_work_queue(this_cpu_ptr(&mce_irq_work)); + irq_work_queue(&mce_irq_work); } /* @@ -1742,8 +1741,6 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c) __mcheck_cpu_init_generic(); __mcheck_cpu_init_vendor(c); __mcheck_cpu_init_timer(); - INIT_WORK(this_cpu_ptr(&mce_work), mce_process_work); - init_irq_work(this_cpu_ptr(&mce_irq_work), &mce_irq_work_cb); } /* @@ -2064,6 +2061,9 @@ int __init mcheck_init(void) mcheck_intel_therm_init(); mcheck_vendor_init_severity(); + INIT_WORK(&mce_work, mce_process_work); + init_irq_work(&mce_irq_work, mce_irq_work_cb); + return 0; } -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/