Make __get_user_pages return -EHWPOISON for HWPOISON page only if
FOLL_HWPOISON is specified.  With this patch, the interested callers
can distinguish HWPOISON page from general FAULT page, while other
callers will still get -EFAULT for pages, so the user space interface
need not to be changed.

get_user_pages_hwpoison is added as a variant of get_user_pages that
can return -EHWPOISON for HWPOISON page.

This feature is needed by KVM, where UCR MCE should be relayed to
guest for HWPOISON page, while instruction emulation and MMIO will be
tried for general FAULT page.

The idea comes from Andrew Morton.

Signed-off-by: Huang Ying <ying.hu...@intel.com>
Cc: Andrew Morton <a...@linux-foundation.org>
---
 include/asm-generic/errno.h |    2 +
 include/linux/mm.h          |   17 +++++++++++++
 mm/memory.c                 |   55 +++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 71 insertions(+), 3 deletions(-)

--- a/include/asm-generic/errno.h
+++ b/include/asm-generic/errno.h
@@ -108,4 +108,6 @@
 
 #define ERFKILL                132     /* Operation not possible due to 
RF-kill */
 
+#define EHWPOISON      133     /* Memory page has hardware error */
+
 #endif
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -860,6 +860,22 @@ int get_user_pages(struct task_struct *t
                        struct page **pages, struct vm_area_struct **vmas);
 int get_user_pages_fast(unsigned long start, int nr_pages, int write,
                        struct page **pages);
+#ifdef CONFIG_MEMORY_FAILURE
+int get_user_pages_hwpoison(struct task_struct *tsk, struct mm_struct *mm,
+                           unsigned long start, int nr_pages, int write,
+                           int force, struct page **pages,
+                           struct vm_area_struct **vmas);
+#else
+static inline int get_user_pages_hwpoison(struct task_struct *tsk,
+                                         struct mm_struct *mm,
+                                         unsigned long start, int nr_pages,
+                                         int write, int force,
+                                         struct page **pages,
+                                         struct vm_area_struct **vmas) {
+       return get_user_pages(tsk, mm, start, nr_pages,
+                             write, force, pages, vmas);
+}
+#endif
 struct page *get_dump_page(unsigned long addr);
 
 extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
@@ -1415,6 +1431,7 @@ struct page *follow_page(struct vm_area_
 #define FOLL_GET       0x04    /* do get_page on page */
 #define FOLL_DUMP      0x08    /* give error on hole if it would be zero */
 #define FOLL_FORCE     0x10    /* get_user_pages read/write w/o permission */
+#define FOLL_HWPOISON  0x20    /* check page is hwpoisoned */
 
 typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
                        void *data);
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1449,9 +1449,16 @@ int __get_user_pages(struct task_struct
                                if (ret & VM_FAULT_ERROR) {
                                        if (ret & VM_FAULT_OOM)
                                                return i ? i : -ENOMEM;
-                                       if (ret &
-                                           
(VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE|
-                                            VM_FAULT_SIGBUS))
+                                       if (ret & (VM_FAULT_HWPOISON |
+                                                  VM_FAULT_HWPOISON_LARGE)) {
+                                               if (i)
+                                                       return i;
+                                               else if (gup_flags & 
FOLL_HWPOISON)
+                                                       return -EHWPOISON;
+                                               else
+                                                       return -EFAULT;
+                                       }
+                                       if (ret & VM_FAULT_SIGBUS)
                                                return i ? i : -EFAULT;
                                        BUG();
                                }
@@ -1563,6 +1570,48 @@ int get_user_pages(struct task_struct *t
 }
 EXPORT_SYMBOL(get_user_pages);
 
+#ifdef CONFIG_MEMORY_FAILURE
+/**
+ * get_user_pages_hwpoison() - pin user pages in memory, return hwpoison status
+ * @tsk:       task_struct of target task
+ * @mm:                mm_struct of target mm
+ * @start:     starting user address
+ * @nr_pages:  number of pages from start to pin
+ * @write:     whether pages will be written to by the caller
+ * @force:     whether to force write access even if user mapping is
+ *             readonly. This will result in the page being COWed even
+ *             in MAP_SHARED mappings. You do not want this.
+ * @pages:     array that receives pointers to the pages pinned.
+ *             Should be at least nr_pages long. Or NULL, if caller
+ *             only intends to ensure the pages are faulted in.
+ * @vmas:      array of pointers to vmas corresponding to each page.
+ *             Or NULL if the caller does not require them.
+ *
+ * Returns number of pages pinned.
+ *
+ * If the page table or memory page is hwpoisoned, return -EHWPOISON.
+ *
+ * Otherwise, same as get_user_pages.
+ */
+int get_user_pages_hwpoison(struct task_struct *tsk, struct mm_struct *mm,
+                           unsigned long start, int nr_pages, int write,
+                           int force, struct page **pages,
+                           struct vm_area_struct **vmas)
+{
+       int flags = FOLL_TOUCH | FOLL_HWPOISON;
+
+       if (pages)
+               flags |= FOLL_GET;
+       if (write)
+               flags |= FOLL_WRITE;
+       if (force)
+               flags |= FOLL_FORCE;
+
+       return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
+}
+EXPORT_SYMBOL(get_user_pages_hwpoison);
+#endif
+
 /**
  * get_dump_page() - pin user page in memory while writing it to core dump
  * @addr: user address


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to