Add a void memory_failure notification callback to struct node_private_ops so services managing N_MEMORY_PRIVATE nodes notified when a page on their node experiences a hardware error.
The callback is notification only -- the kernel always proceeds with standard hwpoison handling for online pages. The notification hook fires after TestSetPageHWPoison succeeds and before get_hwpoison_page giving the service a chance to clean up. Signed-off-by: Gregory Price <[email protected]> --- include/linux/node_private.h | 6 ++++++ mm/internal.h | 16 ++++++++++++++++ mm/memory-failure.c | 15 +++++++++++++++ 3 files changed, 37 insertions(+) diff --git a/include/linux/node_private.h b/include/linux/node_private.h index 7a7438fb9eda..d2669f68ac20 100644 --- a/include/linux/node_private.h +++ b/include/linux/node_private.h @@ -113,6 +113,10 @@ struct node_reclaim_policy { * watermark_boost lifecycle (kswapd will not clear it). * If NULL, normal boost policy applies. * + * @memory_failure: Notification of hardware error on a page on this node. + * [folio-referenced callback] + * Notification only, kernel always handles the failure. + * * @flags: Operation exclusion flags (NP_OPS_* constants). * */ @@ -127,6 +131,8 @@ struct node_private_ops { vm_fault_t (*handle_fault)(struct folio *folio, struct vm_fault *vmf, enum pgtable_level level); void (*reclaim_policy)(int nid, struct node_reclaim_policy *policy); + void (*memory_failure)(struct folio *folio, unsigned long pfn, + int mf_flags); unsigned long flags; }; diff --git a/mm/internal.h b/mm/internal.h index db32cb2d7a29..64467ca774f1 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1608,6 +1608,22 @@ static inline void node_private_reclaim_policy(int nid, } #endif +static inline void folio_managed_memory_failure(struct folio *folio, + unsigned long pfn, + int mf_flags) +{ + /* Zone device pages handle memory failure via dev_pagemap_ops */ + if (folio_is_zone_device(folio)) + return; + if (folio_is_private_node(folio)) { + const struct node_private_ops *ops = + folio_node_private_ops(folio); + + if (ops && ops->memory_failure) + ops->memory_failure(folio, pfn, mf_flags); + } +} + struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long align, unsigned long shift, unsigned long vm_flags, unsigned long start, diff --git a/mm/memory-failure.c b/mm/memory-failure.c index c80c2907da33..79c91d44ec1e 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -2379,6 +2379,15 @@ int memory_failure(unsigned long pfn, int flags) goto unlock_mutex; } + /* + * Notify private-node services about the hardware error so they + * can update internal tracking (e.g., CXL poison lists, stop + * demoting to failing DIMMs). This is notification only -- the + * kernel proceeds with standard hwpoison handling regardless. + */ + if (unlikely(page_is_private_managed(p))) + folio_managed_memory_failure(page_folio(p), pfn, flags); + /* * We need/can do nothing about count=0 pages. * 1) it's a free page, and therefore in safe hand: @@ -2825,6 +2834,12 @@ static int soft_offline_in_use_page(struct page *page) return 0; } + if (!folio_managed_allows_migrate(folio)) { + pr_info("%#lx: cannot migrate private node folio\n", pfn); + folio_put(folio); + return -EBUSY; + } + isolated = isolate_folio_to_list(folio, &pagelist); /* -- 2.53.0
