Currently memory_failure() assumes an infrequent report on a handful of
pages. A new use case for surprise removal of a persistent memory device
needs to trigger memory_failure() on a large range. Rate limit
memory_failure() error logging, and allow the
memory_failure_dev_pagemap() helper to be called directly.

Cc: Naoya Horiguchi <naoya.horigu...@nec.com>
Cc: Andrew Morton <a...@linux-foundation.org>
Signed-off-by: Dan Williams <dan.j.willi...@intel.com>
---
 mm/memory-failure.c |   25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 24210c9bd843..43ba4307c526 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -395,8 +395,9 @@ static void kill_procs(struct list_head *to_kill, int 
forcekill, bool fail,
                         * signal and then access the memory. Just kill it.
                         */
                        if (fail || tk->addr == -EFAULT) {
-                               pr_err("Memory failure: %#lx: forcibly killing 
%s:%d because of failure to unmap corrupted page\n",
-                                      pfn, tk->tsk->comm, tk->tsk->pid);
+                               pr_err_ratelimited(
+                                       "Memory failure: %#lx: forcibly killing 
%s:%d because of failure to unmap corrupted page\n",
+                                       pfn, tk->tsk->comm, tk->tsk->pid);
                                do_send_sig_info(SIGKILL, SEND_SIG_PRIV,
                                                 tk->tsk, PIDTYPE_PID);
                        }
@@ -408,8 +409,9 @@ static void kill_procs(struct list_head *to_kill, int 
forcekill, bool fail,
                         * process anyways.
                         */
                        else if (kill_proc(tk, pfn, flags) < 0)
-                               pr_err("Memory failure: %#lx: Cannot send 
advisory machine check signal to %s:%d\n",
-                                      pfn, tk->tsk->comm, tk->tsk->pid);
+                               pr_err_ratelimited(
+                                       "Memory failure: %#lx: Cannot send 
advisory machine check signal to %s:%d\n",
+                                       pfn, tk->tsk->comm, tk->tsk->pid);
                }
                put_task_struct(tk->tsk);
                kfree(tk);
@@ -919,8 +921,8 @@ static void action_result(unsigned long pfn, enum 
mf_action_page_type type,
 {
        trace_memory_failure_event(pfn, type, result);
 
-       pr_err("Memory failure: %#lx: recovery action for %s: %s\n",
-               pfn, action_page_types[type], action_name[result]);
+       pr_err_ratelimited("Memory failure: %#lx: recovery action for %s: %s\n",
+                          pfn, action_page_types[type], action_name[result]);
 }
 
 static int page_action(struct page_state *ps, struct page *p,
@@ -1375,8 +1377,6 @@ static int memory_failure_dev_pagemap(unsigned long pfn, 
int flags,
 unlock:
        dax_unlock_page(page, cookie);
 out:
-       /* drop pgmap ref acquired in caller */
-       put_dev_pagemap(pgmap);
        action_result(pfn, MF_MSG_DAX, rc ? MF_FAILED : MF_RECOVERED);
        return rc;
 }
@@ -1415,9 +1415,12 @@ int memory_failure(unsigned long pfn, int flags)
        if (!p) {
                if (pfn_valid(pfn)) {
                        pgmap = get_dev_pagemap(pfn, NULL);
-                       if (pgmap)
-                               return memory_failure_dev_pagemap(pfn, flags,
-                                                                 pgmap);
+                       if (pgmap) {
+                               res = memory_failure_dev_pagemap(pfn, flags,
+                                                                pgmap);
+                               put_dev_pagemap(pgmap);
+                               return res;
+                       }
                }
                pr_err("Memory failure: %#lx: memory outside kernel control\n",
                        pfn);

Reply via email to