From: Andrea Arcangeli <aarca...@redhat.com>

Accurate userfaultfd WP tracking is possible by tracking exactly which
virtual memory ranges were writeprotected by userland.  We can't relay
only on the RW bit of the mapped pagetable because that information is
destroyed by fork() or KSM or swap.  If we were to relay on that, we'd
need to stay on the safe side and generate false positive wp faults for
every swapped out page.

[pet...@redhat.com: append _PAGE_UFD_WP to _PAGE_CHG_MASK]
Signed-off-by: Andrea Arcangeli <aarca...@redhat.com>
Signed-off-by: Peter Xu <pet...@redhat.com>
Signed-off-by: Andrew Morton <a...@linux-foundation.org>
Reviewed-by: Jerome Glisse <jgli...@redhat.com>
Reviewed-by: Mike Rapoport <r...@linux.vnet.ibm.com>
Cc: Bobby Powers <bobbypow...@gmail.com>
Cc: Brian Geffon <bgef...@google.com>
Cc: David Hildenbrand <da...@redhat.com>
Cc: Denis Plotnikov <dplotni...@virtuozzo.com>
Cc: "Dr . David Alan Gilbert" <dgilb...@redhat.com>
Cc: Hugh Dickins <hu...@google.com>
Cc: Johannes Weiner <han...@cmpxchg.org>
Cc: "Kirill A . Shutemov" <kir...@shutemov.name>
Cc: Martin Cracauer <craca...@cons.org>
Cc: Marty McFadden <mcfadd...@llnl.gov>
Cc: Maya Gokhale <gokha...@llnl.gov>
Cc: Mel Gorman <mgor...@suse.de>
Cc: Mike Kravetz <mike.krav...@oracle.com>
Cc: Pavel Emelyanov <xe...@openvz.org>
Cc: Rik van Riel <r...@redhat.com>
Cc: Shaohua Li <s...@fb.com>
Link: http://lkml.kernel.org/r/20200220163112.11409-4-pet...@redhat.com
Signed-off-by: Linus Torvalds <torva...@linux-foundation.org>

https://jira.sw.ru/browse/PSBM-102938
(cherry picked from commit 5a281062af1d43d3f3956a6b429c2d727bc92603)
Signed-off-by: Andrey Ryabinin <aryabi...@virtuozzo.com>
---
 arch/x86/Kconfig                     |  1 +
 arch/x86/include/asm/pgtable.h       | 52 ++++++++++++++++++++++++++++
 arch/x86/include/asm/pgtable_64.h    |  8 ++++-
 arch/x86/include/asm/pgtable_types.h | 11 +++++-
 include/asm-generic/pgtable.h        |  1 +
 include/asm-generic/pgtable_uffd.h   | 51 +++++++++++++++++++++++++++
 init/Kconfig                         |  5 +++
 7 files changed, 127 insertions(+), 2 deletions(-)
 create mode 100644 include/asm-generic/pgtable_uffd.h

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c3523eac2cb6..961d472d1e28 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -128,6 +128,7 @@ config X86
        select HAVE_ARCH_TRACEHOOK
        select HAVE_ARCH_TRANSPARENT_HUGEPAGE
        select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64
+       select HAVE_ARCH_USERFAULTFD_WP         if USERFAULTFD
        select HAVE_ARCH_VMAP_STACK             if X86_64
        select HAVE_ARCH_WITHIN_STACK_FRAMES
        select HAVE_CMPXCHG_DOUBLE
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 13125aad804c..f01d8e82167b 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -23,6 +23,7 @@
 
 #ifndef __ASSEMBLY__
 #include <asm/x86_init.h>
+#include <asm-generic/pgtable_uffd.h>
 
 extern pgd_t early_top_pgt[PTRS_PER_PGD];
 int __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
@@ -291,6 +292,23 @@ static inline pte_t pte_clear_flags(pte_t pte, pteval_t 
clear)
        return native_make_pte(v & ~clear);
 }
 
+#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
+static inline int pte_uffd_wp(pte_t pte)
+{
+       return pte_flags(pte) & _PAGE_UFFD_WP;
+}
+
+static inline pte_t pte_mkuffd_wp(pte_t pte)
+{
+       return pte_set_flags(pte, _PAGE_UFFD_WP);
+}
+
+static inline pte_t pte_clear_uffd_wp(pte_t pte)
+{
+       return pte_clear_flags(pte, _PAGE_UFFD_WP);
+}
+#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
+
 static inline pte_t pte_mkclean(pte_t pte)
 {
        return pte_clear_flags(pte, _PAGE_DIRTY);
@@ -370,6 +388,23 @@ static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t 
clear)
        return native_make_pmd(v & ~clear);
 }
 
+#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
+static inline int pmd_uffd_wp(pmd_t pmd)
+{
+       return pmd_flags(pmd) & _PAGE_UFFD_WP;
+}
+
+static inline pmd_t pmd_mkuffd_wp(pmd_t pmd)
+{
+       return pmd_set_flags(pmd, _PAGE_UFFD_WP);
+}
+
+static inline pmd_t pmd_clear_uffd_wp(pmd_t pmd)
+{
+       return pmd_clear_flags(pmd, _PAGE_UFFD_WP);
+}
+#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
+
 static inline pmd_t pmd_mkold(pmd_t pmd)
 {
        return pmd_clear_flags(pmd, _PAGE_ACCESSED);
@@ -1262,6 +1297,23 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
 #endif
 #endif
 
+#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
+static inline pte_t pte_swp_mkuffd_wp(pte_t pte)
+{
+       return pte_set_flags(pte, _PAGE_SWP_UFFD_WP);
+}
+
+static inline int pte_swp_uffd_wp(pte_t pte)
+{
+       return pte_flags(pte) & _PAGE_SWP_UFFD_WP;
+}
+
+static inline pte_t pte_swp_clear_uffd_wp(pte_t pte)
+{
+       return pte_clear_flags(pte, _PAGE_SWP_UFFD_WP);
+}
+#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
+
 #define PKRU_AD_BIT 0x1
 #define PKRU_WD_BIT 0x2
 #define PKRU_BITS_PER_PKEY 2
diff --git a/arch/x86/include/asm/pgtable_64.h 
b/arch/x86/include/asm/pgtable_64.h
index 20127d551ab5..4bc241199087 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -274,7 +274,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
  *
  * |     ...            | 11| 10|  9|8|7|6|5| 4| 3|2| 1|0| <- bit number
  * |     ...            |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names
- * | TYPE (59-63) | ~OFFSET (9-58)  |0|0|X|X| X| X|X|SD|0| <- swp entry
+ * | TYPE (59-63) | ~OFFSET (9-58)  |0|0|X|X| X| X|F|SD|0| <- swp entry
  *
  * G (8) is aliased and used as a PROT_NONE indicator for
  * !present ptes.  We need to start storing swap entries above
@@ -282,9 +282,15 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
  * erratum where they can be incorrectly set by hardware on
  * non-present PTEs.
  *
+ * SD Bits 1-4 are not used in non-present format and available for
+ * special use described below:
+ *
  * SD (1) in swp entry is used to store soft dirty bit, which helps us
  * remember soft dirty over page migration
  *
+ * F (2) in swp entry is used to record when a pagetable is
+ * writeprotected by userfaultfd WP support.
+ *
  * Bit 7 in swp entry should be 0 because pmd_present checks not only P,
  * but also L and G.
  *
diff --git a/arch/x86/include/asm/pgtable_types.h 
b/arch/x86/include/asm/pgtable_types.h
index a558381b016b..019a235e5238 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -32,6 +32,7 @@
 
 #define _PAGE_BIT_SPECIAL      _PAGE_BIT_SOFTW1
 #define _PAGE_BIT_CPA_TEST     _PAGE_BIT_SOFTW1
+#define _PAGE_BIT_UFFD_WP      _PAGE_BIT_SOFTW2 /* userfaultfd wrprotected */
 #define _PAGE_BIT_SOFT_DIRTY   _PAGE_BIT_SOFTW3 /* software dirty tracking */
 #define _PAGE_BIT_DEVMAP       _PAGE_BIT_SOFTW4
 
@@ -99,6 +100,14 @@
 #define _PAGE_SWP_SOFT_DIRTY   (_AT(pteval_t, 0))
 #endif
 
+#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
+#define _PAGE_UFFD_WP          (_AT(pteval_t, 1) << _PAGE_BIT_UFFD_WP)
+#define _PAGE_SWP_UFFD_WP      _PAGE_USER
+#else
+#define _PAGE_UFFD_WP          (_AT(pteval_t, 0))
+#define _PAGE_SWP_UFFD_WP      (_AT(pteval_t, 0))
+#endif
+
 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
 #define _PAGE_NX       (_AT(pteval_t, 1) << _PAGE_BIT_NX)
 #define _PAGE_DEVMAP   (_AT(u64, 1) << _PAGE_BIT_DEVMAP)
@@ -123,7 +132,7 @@
  */
 #define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT |         \
                         _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \
-                        _PAGE_SOFT_DIRTY | _PAGE_DEVMAP)
+                        _PAGE_SOFT_DIRTY | _PAGE_DEVMAP | _PAGE_UFFD_WP)
 #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
 
 /*
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 592b4c0f02c7..631f099e65c1 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -10,6 +10,7 @@
 #include <linux/mm_types.h>
 #include <linux/bug.h>
 #include <linux/errno.h>
+#include <asm-generic/pgtable_uffd.h>
 
 #if 5 - defined(__PAGETABLE_P4D_FOLDED) - defined(__PAGETABLE_PUD_FOLDED) - \
        defined(__PAGETABLE_PMD_FOLDED) != CONFIG_PGTABLE_LEVELS
diff --git a/include/asm-generic/pgtable_uffd.h 
b/include/asm-generic/pgtable_uffd.h
new file mode 100644
index 000000000000..643d1bf559c2
--- /dev/null
+++ b/include/asm-generic/pgtable_uffd.h
@@ -0,0 +1,51 @@
+#ifndef _ASM_GENERIC_PGTABLE_UFFD_H
+#define _ASM_GENERIC_PGTABLE_UFFD_H
+
+#ifndef CONFIG_HAVE_ARCH_USERFAULTFD_WP
+static __always_inline int pte_uffd_wp(pte_t pte)
+{
+       return 0;
+}
+
+static __always_inline int pmd_uffd_wp(pmd_t pmd)
+{
+       return 0;
+}
+
+static __always_inline pte_t pte_mkuffd_wp(pte_t pte)
+{
+       return pte;
+}
+
+static __always_inline pmd_t pmd_mkuffd_wp(pmd_t pmd)
+{
+       return pmd;
+}
+
+static __always_inline pte_t pte_clear_uffd_wp(pte_t pte)
+{
+       return pte;
+}
+
+static __always_inline pmd_t pmd_clear_uffd_wp(pmd_t pmd)
+{
+       return pmd;
+}
+
+static __always_inline pte_t pte_swp_mkuffd_wp(pte_t pte)
+{
+       return pte;
+}
+
+static __always_inline int pte_swp_uffd_wp(pte_t pte)
+{
+       return 0;
+}
+
+static __always_inline pte_t pte_swp_clear_uffd_wp(pte_t pte)
+{
+       return pte;
+}
+#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
+
+#endif /* _ASM_GENERIC_PGTABLE_UFFD_H */
diff --git a/init/Kconfig b/init/Kconfig
index 9560a06d735a..25eae91c2bea 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1357,6 +1357,11 @@ config ADVISE_SYSCALLS
          applications use these syscalls, you can disable this option to save
          space.
 
+config HAVE_ARCH_USERFAULTFD_WP
+       bool
+       help
+         Arch has userfaultfd write protection support
+
 config MEMBARRIER
        bool "Enable membarrier() system call" if EXPERT
        default y
-- 
2.25.3

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to