On Fri, Jul 28, 2017 at 09:09:33AM +0200, Borislav Petkov wrote:
> On Thu, Jul 27, 2017 at 09:47:08PM -0400, Steven Rostedt wrote:
> > What happens if two CPUs have mce's at the same time? Wouldn't one
> > corrupt the other buffer. 128 isn't too big to put on the stack is it?
> 
> Yeah, putting it on the stack is probably safer, just in case.
> 
> What is even better, though, is if I extended
> arch/x86/kernel/cpu/mcheck/mce-genpool.c to allocate a second buffer for the
> decoded strings. We use it for the struct mces right now.

Here's a conversion to a 2-page backed genpool. Seems to work:

---
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index 90eabb1e4a27..fd4a615200a8 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -1,3 +1,4 @@
+#include <linux/genalloc.h>
 #include <linux/seq_buf.h>
 #include <linux/module.h>
 #include <linux/slab.h>
@@ -37,6 +38,16 @@ void amd_unregister_ecc_decoder(void (*f)(int, struct mce *))
 }
 EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder);
 
+/* 128 because, well, nice and round - two cachelines. */
+#define ELEM_ORDER     7
+#define ELEM_SIZE      (1 << 7)
+#define DEC_POOL_SIZE  (2 * PAGE_SIZE)
+
+static char __err_buf[DEC_POOL_SIZE];
+static struct gen_pool *dec_pool;
+
+static struct seq_buf sb;
+
 /*
  * string representation for the different MCA reported error types, see F3x48
  * or MSR0000_0411.
@@ -309,11 +320,6 @@ static struct smca_mce_desc smca_mce_descs[] = {
        [SMCA_SMU]      = { smca_smu_mce_desc,  ARRAY_SIZE(smca_smu_mce_desc)   
},
 };
 
-/* 128 because, well, nice and round - two cachelines. */
-#define BUF_LEN        128
-static char __err_buf[BUF_LEN];
-static struct seq_buf sb;
-
 static bool f12h_mc0_mce(u16 ec, u8 xec)
 {
        bool ret = false;
@@ -1050,6 +1056,7 @@ static int
 amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
 {
        struct mce *m = (struct mce *)data;
+       char *dec_buf;
 
        if (amd_filter_mce(m))
                return NOTIFY_STOP;
@@ -1057,8 +1064,15 @@ amd_decode_mce(struct notifier_block *nb, unsigned long 
val, void *data)
        if (!ras_userspace_consumers())
                __decode_mce(m);
 
+       dec_buf = (void *)gen_pool_alloc(dec_pool, ELEM_SIZE);
+       if (!dec_buf) {
+               pr_warn_ratelimited("Decode buffer full!\n");
+               return NOTIFY_STOP;
+       }
+
        /* \0 terminated */
-       seq_buf_init(&sb, __err_buf, BUF_LEN);
+       seq_buf_init(&sb, dec_buf, ELEM_SIZE);
+       seq_buf_clear_buf(&sb);
 
        if (boot_cpu_has(X86_FEATURE_SMCA))
                decode_smca_error(m);
@@ -1074,12 +1088,9 @@ amd_decode_mce(struct notifier_block *nb, unsigned long 
val, void *data)
 
                while ((l = strsep(&sb.buffer, "\n")))
                        pr_emerg(HW_ERR "%s\n", l);
-
-               /* Restore original address because strsep() mangles it. */
-               sb.buffer = __err_buf;
        }
 
-       seq_buf_clear_buf(&sb);
+       gen_pool_free(dec_pool, (unsigned long)dec_buf, ELEM_SIZE);
 
        return NOTIFY_STOP;
 }
@@ -1092,6 +1103,7 @@ static struct notifier_block amd_mce_dec_nb = {
 static int __init mce_amd_init(void)
 {
        struct cpuinfo_x86 *c = &boot_cpu_data;
+       int ret;
 
        if (c->x86_vendor != X86_VENDOR_AMD)
                return -ENODEV;
@@ -1100,6 +1112,16 @@ static int __init mce_amd_init(void)
        if (!fam_ops)
                return -ENOMEM;
 
+       dec_pool = gen_pool_create(ELEM_ORDER, -1);
+       if (!dec_pool)
+               goto err_out;
+
+       ret = gen_pool_add(dec_pool, (unsigned long)__err_buf, DEC_POOL_SIZE, 
-1);
+       if (ret) {
+               gen_pool_destroy(dec_pool);
+               goto err_out;
+       }
+
        switch (c->x86) {
        case 0xf:
                fam_ops->mc0_mce = k8_mc0_mce;
@@ -1177,6 +1199,7 @@ static void __exit mce_amd_exit(void)
 {
        mce_unregister_decode_chain(&amd_mce_dec_nb);
        kfree(fam_ops);
+       gen_pool_destroy(dec_pool);
 }
 
 MODULE_DESCRIPTION("AMD MCE decoder");

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.
--

Reply via email to