Add a void memory_failure notification callback to struct
node_private_ops so services managing N_MEMORY_PRIVATE nodes notified
when a page on their node experiences a hardware error.

The callback is notification only -- the kernel always proceeds with
standard hwpoison handling for online pages.

The notification hook fires after TestSetPageHWPoison succeeds and
before get_hwpoison_page giving the service a chance to clean up.

Signed-off-by: Gregory Price <[email protected]>
---
 include/linux/node_private.h |  6 ++++++
 mm/internal.h                | 16 ++++++++++++++++
 mm/memory-failure.c          | 15 +++++++++++++++
 3 files changed, 37 insertions(+)

diff --git a/include/linux/node_private.h b/include/linux/node_private.h
index 7a7438fb9eda..d2669f68ac20 100644
--- a/include/linux/node_private.h
+++ b/include/linux/node_private.h
@@ -113,6 +113,10 @@ struct node_reclaim_policy {
  *   watermark_boost lifecycle (kswapd will not clear it).
  *   If NULL, normal boost policy applies.
  *
+ * @memory_failure: Notification of hardware error on a page on this node.
+ *   [folio-referenced callback]
+ *   Notification only, kernel always handles the failure.
+ *
  * @flags: Operation exclusion flags (NP_OPS_* constants).
  *
  */
@@ -127,6 +131,8 @@ struct node_private_ops {
        vm_fault_t (*handle_fault)(struct folio *folio, struct vm_fault *vmf,
                                   enum pgtable_level level);
        void (*reclaim_policy)(int nid, struct node_reclaim_policy *policy);
+       void (*memory_failure)(struct folio *folio, unsigned long pfn,
+                              int mf_flags);
        unsigned long flags;
 };
 
diff --git a/mm/internal.h b/mm/internal.h
index db32cb2d7a29..64467ca774f1 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1608,6 +1608,22 @@ static inline void node_private_reclaim_policy(int nid,
 }
 #endif
 
+static inline void folio_managed_memory_failure(struct folio *folio,
+                                               unsigned long pfn,
+                                               int mf_flags)
+{
+       /* Zone device pages handle memory failure via dev_pagemap_ops */
+       if (folio_is_zone_device(folio))
+               return;
+       if (folio_is_private_node(folio)) {
+               const struct node_private_ops *ops =
+                       folio_node_private_ops(folio);
+
+               if (ops && ops->memory_failure)
+                       ops->memory_failure(folio, pfn, mf_flags);
+       }
+}
+
 struct vm_struct *__get_vm_area_node(unsigned long size,
                                     unsigned long align, unsigned long shift,
                                     unsigned long vm_flags, unsigned long 
start,
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index c80c2907da33..79c91d44ec1e 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -2379,6 +2379,15 @@ int memory_failure(unsigned long pfn, int flags)
                goto unlock_mutex;
        }
 
+       /*
+        * Notify private-node services about the hardware error so they
+        * can update internal tracking (e.g., CXL poison lists, stop
+        * demoting to failing DIMMs).  This is notification only -- the
+        * kernel proceeds with standard hwpoison handling regardless.
+        */
+       if (unlikely(page_is_private_managed(p)))
+               folio_managed_memory_failure(page_folio(p), pfn, flags);
+
        /*
         * We need/can do nothing about count=0 pages.
         * 1) it's a free page, and therefore in safe hand:
@@ -2825,6 +2834,12 @@ static int soft_offline_in_use_page(struct page *page)
                return 0;
        }
 
+       if (!folio_managed_allows_migrate(folio)) {
+               pr_info("%#lx: cannot migrate private node folio\n", pfn);
+               folio_put(folio);
+               return -EBUSY;
+       }
+
        isolated = isolate_folio_to_list(folio, &pagelist);
 
        /*
-- 
2.53.0


Reply via email to