Re: [RFC PATCH v9 04/13] xpfo, x86: Add support for XPFO for x86-64

2019-04-04 Thread Khalid Aziz
[trimmed To: and Cc: It was too large]

On 4/4/19 1:52 AM, Peter Zijlstra wrote:
> On Wed, Apr 03, 2019 at 11:34:05AM -0600, Khalid Aziz wrote:
>> diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
>> index 2779ace16d23..5c0e1581fa56 100644
>> --- a/arch/x86/include/asm/pgtable.h
>> +++ b/arch/x86/include/asm/pgtable.h
>> @@ -1437,6 +1437,32 @@ static inline bool arch_has_pfn_modify_check(void)
>>  return boot_cpu_has_bug(X86_BUG_L1TF);
>>  }
>>  
>> +/*
>> + * The current flushing context - we pass it instead of 5 arguments:
>> + */
>> +struct cpa_data {
>> +unsigned long   *vaddr;
>> +pgd_t   *pgd;
>> +pgprot_tmask_set;
>> +pgprot_tmask_clr;
>> +unsigned long   numpages;
>> +unsigned long   curpage;
>> +unsigned long   pfn;
>> +unsigned intflags;
>> +unsigned intforce_split : 1,
>> +force_static_prot   : 1;
>> +struct page **pages;
>> +};
>> +
>> +
>> +int
>> +should_split_large_page(pte_t *kpte, unsigned long address,
>> +struct cpa_data *cpa);
>> +extern spinlock_t cpa_lock;
>> +int
>> +__split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
>> +   struct page *base);
>> +
> 
> I really hate exposing all that.

I believe this was done so set_kpte() could split large pages if needed.
I will look into creating a helper function instead so this does not
have to be exposed.

> 
>>  #include 
>>  #endif  /* __ASSEMBLY__ */
>>  
> 
>> diff --git a/arch/x86/mm/xpfo.c b/arch/x86/mm/xpfo.c
>> new file mode 100644
>> index ..3045bb7e4659
>> --- /dev/null
>> +++ b/arch/x86/mm/xpfo.c
>> @@ -0,0 +1,123 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * Copyright (C) 2017 Hewlett Packard Enterprise Development, L.P.
>> + * Copyright (C) 2016 Brown University. All rights reserved.
>> + *
>> + * Authors:
>> + *   Juerg Haefliger 
>> + *   Vasileios P. Kemerlis 
>> + *
>> + * This program is free software; you can redistribute it and/or modify it
>> + * under the terms of the GNU General Public License version 2 as published 
>> by
>> + * the Free Software Foundation.
>> + */
>> +
>> +#include 
>> +
>> +#include 
>> +
>> +extern spinlock_t cpa_lock;
>> +
>> +/* Update a single kernel page table entry */
>> +inline void set_kpte(void *kaddr, struct page *page, pgprot_t prot)
>> +{
>> +unsigned int level;
>> +pgprot_t msk_clr;
>> +pte_t *pte = lookup_address((unsigned long)kaddr, &level);
>> +
>> +if (unlikely(!pte)) {
>> +WARN(1, "xpfo: invalid address %p\n", kaddr);
>> +return;
>> +}
>> +
>> +switch (level) {
>> +case PG_LEVEL_4K:
>> +set_pte_atomic(pte, pfn_pte(page_to_pfn(page),
>> +   canon_pgprot(prot)));
> 
> (sorry, do we also need a nikon_pgprot() ? :-)

Are we trying to encourage nikon as well to sponsor this patch :)

> 
>> +break;
>> +case PG_LEVEL_2M:
>> +case PG_LEVEL_1G: {
>> +struct cpa_data cpa = { };
>> +int do_split;
>> +
>> +if (level == PG_LEVEL_2M)
>> +msk_clr = pmd_pgprot(*(pmd_t *)pte);
>> +else
>> +msk_clr = pud_pgprot(*(pud_t *)pte);
>> +
>> +cpa.vaddr = kaddr;
>> +cpa.pages = &page;
>> +cpa.mask_set = prot;
>> +cpa.mask_clr = msk_clr;
>> +cpa.numpages = 1;
>> +cpa.flags = 0;
>> +cpa.curpage = 0;
>> +cpa.force_split = 0;
>> +
>> +
>> +do_split = should_split_large_page(pte, (unsigned long)kaddr,
>> +   &cpa);
>> +if (do_split) {
>> +struct page *base;
>> +
>> +base = alloc_pages(GFP_ATOMIC, 0);
>> +if (!base) {
>> +WARN(1, "xpfo: failed to split large page\n");
> 
> You have to be fcking kidding right? A WARN when a GFP_ATOMIC allocation
> fails?!
> 

Not sure what the reasoning was for this WARN in original patch, but I
think this is trying to warn about failure to split the large page in
order to unmap this single page as opposed to warning about allocation
failure. Nevertheless this could be done better.

>> +break;
>> +}
>> +
>> +if (!debug_pagealloc_enabled())
>> +spin_lock(&cpa_lock);
>> +if  (__split_large_page(&cpa, pte, (unsigned long)kaddr,
>> +base) < 0) {
>> +__free_page(base);
>> +WARN(1, "xpfo: failed to split large page\n");
>> +}
>> +if (!debug_pagealloc_enabled())
>> +spin_unlock(&cpa_lock);
>> +}
>> +
>> +break;
> 
> Ever h

Re: [RFC PATCH v9 04/13] xpfo, x86: Add support for XPFO for x86-64

2019-04-04 Thread Peter Zijlstra
On Wed, Apr 03, 2019 at 11:34:05AM -0600, Khalid Aziz wrote:
> diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
> index 2779ace16d23..5c0e1581fa56 100644
> --- a/arch/x86/include/asm/pgtable.h
> +++ b/arch/x86/include/asm/pgtable.h
> @@ -1437,6 +1437,32 @@ static inline bool arch_has_pfn_modify_check(void)
>   return boot_cpu_has_bug(X86_BUG_L1TF);
>  }
>  
> +/*
> + * The current flushing context - we pass it instead of 5 arguments:
> + */
> +struct cpa_data {
> + unsigned long   *vaddr;
> + pgd_t   *pgd;
> + pgprot_tmask_set;
> + pgprot_tmask_clr;
> + unsigned long   numpages;
> + unsigned long   curpage;
> + unsigned long   pfn;
> + unsigned intflags;
> + unsigned intforce_split : 1,
> + force_static_prot   : 1;
> + struct page **pages;
> +};
> +
> +
> +int
> +should_split_large_page(pte_t *kpte, unsigned long address,
> + struct cpa_data *cpa);
> +extern spinlock_t cpa_lock;
> +int
> +__split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
> +struct page *base);
> +

I really hate exposing all that.

>  #include 
>  #endif   /* __ASSEMBLY__ */
>  

> diff --git a/arch/x86/mm/xpfo.c b/arch/x86/mm/xpfo.c
> new file mode 100644
> index ..3045bb7e4659
> --- /dev/null
> +++ b/arch/x86/mm/xpfo.c
> @@ -0,0 +1,123 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2017 Hewlett Packard Enterprise Development, L.P.
> + * Copyright (C) 2016 Brown University. All rights reserved.
> + *
> + * Authors:
> + *   Juerg Haefliger 
> + *   Vasileios P. Kemerlis 
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License version 2 as published 
> by
> + * the Free Software Foundation.
> + */
> +
> +#include 
> +
> +#include 
> +
> +extern spinlock_t cpa_lock;
> +
> +/* Update a single kernel page table entry */
> +inline void set_kpte(void *kaddr, struct page *page, pgprot_t prot)
> +{
> + unsigned int level;
> + pgprot_t msk_clr;
> + pte_t *pte = lookup_address((unsigned long)kaddr, &level);
> +
> + if (unlikely(!pte)) {
> + WARN(1, "xpfo: invalid address %p\n", kaddr);
> + return;
> + }
> +
> + switch (level) {
> + case PG_LEVEL_4K:
> + set_pte_atomic(pte, pfn_pte(page_to_pfn(page),
> +canon_pgprot(prot)));

(sorry, do we also need a nikon_pgprot() ? :-)

> + break;
> + case PG_LEVEL_2M:
> + case PG_LEVEL_1G: {
> + struct cpa_data cpa = { };
> + int do_split;
> +
> + if (level == PG_LEVEL_2M)
> + msk_clr = pmd_pgprot(*(pmd_t *)pte);
> + else
> + msk_clr = pud_pgprot(*(pud_t *)pte);
> +
> + cpa.vaddr = kaddr;
> + cpa.pages = &page;
> + cpa.mask_set = prot;
> + cpa.mask_clr = msk_clr;
> + cpa.numpages = 1;
> + cpa.flags = 0;
> + cpa.curpage = 0;
> + cpa.force_split = 0;
> +
> +
> + do_split = should_split_large_page(pte, (unsigned long)kaddr,
> +&cpa);
> + if (do_split) {
> + struct page *base;
> +
> + base = alloc_pages(GFP_ATOMIC, 0);
> + if (!base) {
> + WARN(1, "xpfo: failed to split large page\n");

You have to be fcking kidding right? A WARN when a GFP_ATOMIC allocation
fails?!

> + break;
> + }
> +
> + if (!debug_pagealloc_enabled())
> + spin_lock(&cpa_lock);
> + if  (__split_large_page(&cpa, pte, (unsigned long)kaddr,
> + base) < 0) {
> + __free_page(base);
> + WARN(1, "xpfo: failed to split large page\n");
> + }
> + if (!debug_pagealloc_enabled())
> + spin_unlock(&cpa_lock);
> + }
> +
> + break;

Ever heard of helper functions?

> + }
> + case PG_LEVEL_512G:
> + /* fallthrough, splitting infrastructure doesn't
> +  * support 512G pages.
> +  */

Broken coment style.

> + default:
> + WARN(1, "xpfo: unsupported page level %x\n", level);
> + }
> +
> +}
> +EXPORT_SYMBOL_GPL(set_kpte);
> +
> +inline void xpfo_flush_kernel_tlb(struct page *page, int order)
> +{
> + int level;
> + unsigned long size, kaddr;
> +
> + kaddr = (unsigned long)page_address(page);
> +
> + if (unlikely(!lookup_address(kaddr, &level))) {
> + WARN(1, "xpfo: invalid address to flush %lx %d\n", kadd

[RFC PATCH v9 04/13] xpfo, x86: Add support for XPFO for x86-64

2019-04-03 Thread Khalid Aziz
From: Juerg Haefliger 

This patch adds support for XPFO for x86-64. It uses the generic
infrastructure in place for XPFO and adds the architecture specific
bits to enable XPFO on x86-64.

CC: x...@kernel.org
Suggested-by: Vasileios P. Kemerlis 
Signed-off-by: Juerg Haefliger 
Signed-off-by: Tycho Andersen 
Signed-off-by: Marco Benatto 
Signed-off-by: Julian Stecklina 
Signed-off-by: Khalid Aziz 
Cc: Khalid Aziz 
---
 .../admin-guide/kernel-parameters.txt |  10 +-
 arch/x86/Kconfig  |   1 +
 arch/x86/include/asm/pgtable.h|  26 
 arch/x86/mm/Makefile  |   2 +
 arch/x86/mm/pageattr.c|  23 +---
 arch/x86/mm/xpfo.c| 123 ++
 include/linux/xpfo.h  |   2 +
 7 files changed, 162 insertions(+), 25 deletions(-)
 create mode 100644 arch/x86/mm/xpfo.c

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index 9b36da94760e..e65e3bc1efe0 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2997,11 +2997,11 @@
 
nox2apic[X86-64,APIC] Do not enable x2APIC mode.
 
-   noxpfo  [XPFO] Disable eXclusive Page Frame Ownership (XPFO)
-   when CONFIG_XPFO is on. Physical pages mapped into
-   user applications will also be mapped in the
-   kernel's address space as if CONFIG_XPFO was not
-   enabled.
+   noxpfo  [XPFO,X86-64] Disable eXclusive Page Frame
+   Ownership (XPFO) when CONFIG_XPFO is on. Physical
+   pages mapped into user applications will also be
+   mapped in the kernel's address space as if
+   CONFIG_XPFO was not enabled.
 
cpu0_hotplug[X86] Turn on CPU0 hotplug feature when
CONFIG_BOOTPARAM_HOTPLUG_CPU0 is off.
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 68261430fe6e..122786604252 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -209,6 +209,7 @@ config X86
select USER_STACKTRACE_SUPPORT
select VIRT_TO_BUS
select X86_FEATURE_NAMESif PROC_FS
+   select ARCH_SUPPORTS_XPFO   if X86_64
 
 config INSTRUCTION_DECODER
def_bool y
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 2779ace16d23..5c0e1581fa56 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1437,6 +1437,32 @@ static inline bool arch_has_pfn_modify_check(void)
return boot_cpu_has_bug(X86_BUG_L1TF);
 }
 
+/*
+ * The current flushing context - we pass it instead of 5 arguments:
+ */
+struct cpa_data {
+   unsigned long   *vaddr;
+   pgd_t   *pgd;
+   pgprot_tmask_set;
+   pgprot_tmask_clr;
+   unsigned long   numpages;
+   unsigned long   curpage;
+   unsigned long   pfn;
+   unsigned intflags;
+   unsigned intforce_split : 1,
+   force_static_prot   : 1;
+   struct page **pages;
+};
+
+
+int
+should_split_large_page(pte_t *kpte, unsigned long address,
+   struct cpa_data *cpa);
+extern spinlock_t cpa_lock;
+int
+__split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
+  struct page *base);
+
 #include 
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 4b101dd6e52f..93b0fdaf4a99 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -53,3 +53,5 @@ obj-$(CONFIG_PAGE_TABLE_ISOLATION)+= pti.o
 obj-$(CONFIG_AMD_MEM_ENCRYPT)  += mem_encrypt.o
 obj-$(CONFIG_AMD_MEM_ENCRYPT)  += mem_encrypt_identity.o
 obj-$(CONFIG_AMD_MEM_ENCRYPT)  += mem_encrypt_boot.o
+
+obj-$(CONFIG_XPFO) += xpfo.o
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 14e6119838a6..530b5df0617e 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -28,23 +28,6 @@
 
 #include "mm_internal.h"
 
-/*
- * The current flushing context - we pass it instead of 5 arguments:
- */
-struct cpa_data {
-   unsigned long   *vaddr;
-   pgd_t   *pgd;
-   pgprot_tmask_set;
-   pgprot_tmask_clr;
-   unsigned long   numpages;
-   unsigned long   curpage;
-   unsigned long   pfn;
-   unsigned intflags;
-   unsigned intforce_split : 1,
-   force_static_prot   : 1;
-   struct page **pages;
-};
-
 enum cpa_warn {
CPA_CONFLICT,
CPA_PROTECT,
@@ -59,7 +42,7 @@ static const int cpa_warn_level = CPA_PROTECT;
  * entries change the page attribute in parallel to some other cpu
  * splitting a large page entry along with changing the attribute.
  */
-static