[tip: x86/sgx] x86/sgx: Move provisioning device creation out of SGX driver

2021-04-07 Thread tip-bot2 for Sean Christopherson
The following commit has been merged into the x86/sgx branch of tip:

Commit-ID: b3754e5d3da320af2bebb7a690002685c7f5c15c
Gitweb:
https://git.kernel.org/tip/b3754e5d3da320af2bebb7a690002685c7f5c15c
Author:Sean Christopherson 
AuthorDate:Fri, 19 Mar 2021 20:23:09 +13:00
Committer: Borislav Petkov 
CommitterDate: Tue, 06 Apr 2021 19:18:46 +02:00

x86/sgx: Move provisioning device creation out of SGX driver

And extract sgx_set_attribute() out of sgx_ioc_enclave_provision() and
export it as symbol for KVM to use.

The provisioning key is sensitive. The SGX driver only allows to create
an enclave which can access the provisioning key when the enclave
creator has permission to open /dev/sgx_provision. It should apply to
a VM as well, as the provisioning key is platform-specific, thus an
unrestricted VM can also potentially compromise the provisioning key.

Move the provisioning device creation out of sgx_drv_init() to
sgx_init() as a preparation for adding SGX virtualization support,
so that even if the SGX driver is not enabled due to flexible launch
control not being available, SGX virtualization can still be enabled,
and use it to restrict a VM's capability of being able to access the
provisioning key.

 [ bp: Massage commit message. ]

Signed-off-by: Sean Christopherson 
Signed-off-by: Kai Huang 
Signed-off-by: Borislav Petkov 
Reviewed-by: Jarkko Sakkinen 
Acked-by: Dave Hansen 
Link: 
https://lkml.kernel.org/r/0f4d044d621561f26d5f4ef73e8dc6cd18cc7e79.1616136308.git.kai.hu...@intel.com
---
 arch/x86/include/asm/sgx.h   |  3 ++-
 arch/x86/kernel/cpu/sgx/driver.c | 17 +-
 arch/x86/kernel/cpu/sgx/ioctl.c  | 16 +
 arch/x86/kernel/cpu/sgx/main.c   | 57 ++-
 4 files changed, 61 insertions(+), 32 deletions(-)

diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h
index 954042e..a16e2c9 100644
--- a/arch/x86/include/asm/sgx.h
+++ b/arch/x86/include/asm/sgx.h
@@ -372,4 +372,7 @@ int sgx_virt_einit(void __user *sigstruct, void __user 
*token,
   void __user *secs, u64 *lepubkeyhash, int *trapnr);
 #endif
 
+int sgx_set_attribute(unsigned long *allowed_attributes,
+ unsigned int attribute_fd);
+
 #endif /* _ASM_X86_SGX_H */
diff --git a/arch/x86/kernel/cpu/sgx/driver.c b/arch/x86/kernel/cpu/sgx/driver.c
index 8ce6d83..aa9b8b8 100644
--- a/arch/x86/kernel/cpu/sgx/driver.c
+++ b/arch/x86/kernel/cpu/sgx/driver.c
@@ -136,10 +136,6 @@ static const struct file_operations sgx_encl_fops = {
.get_unmapped_area  = sgx_get_unmapped_area,
 };
 
-const struct file_operations sgx_provision_fops = {
-   .owner  = THIS_MODULE,
-};
-
 static struct miscdevice sgx_dev_enclave = {
.minor = MISC_DYNAMIC_MINOR,
.name = "sgx_enclave",
@@ -147,13 +143,6 @@ static struct miscdevice sgx_dev_enclave = {
.fops = _encl_fops,
 };
 
-static struct miscdevice sgx_dev_provision = {
-   .minor = MISC_DYNAMIC_MINOR,
-   .name = "sgx_provision",
-   .nodename = "sgx_provision",
-   .fops = _provision_fops,
-};
-
 int __init sgx_drv_init(void)
 {
unsigned int eax, ebx, ecx, edx;
@@ -187,11 +176,5 @@ int __init sgx_drv_init(void)
if (ret)
return ret;
 
-   ret = misc_register(_dev_provision);
-   if (ret) {
-   misc_deregister(_dev_enclave);
-   return ret;
-   }
-
return 0;
 }
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index 7be9c06..83df20e 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -2,6 +2,7 @@
 /*  Copyright(c) 2016-20 Intel Corporation. */
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -666,24 +667,11 @@ out:
 static long sgx_ioc_enclave_provision(struct sgx_encl *encl, void __user *arg)
 {
struct sgx_enclave_provision params;
-   struct file *file;
 
if (copy_from_user(, arg, sizeof(params)))
return -EFAULT;
 
-   file = fget(params.fd);
-   if (!file)
-   return -EINVAL;
-
-   if (file->f_op != _provision_fops) {
-   fput(file);
-   return -EINVAL;
-   }
-
-   encl->attributes_mask |= SGX_ATTR_PROVISIONKEY;
-
-   fput(file);
-   return 0;
+   return sgx_set_attribute(>attributes_mask, params.fd);
 }
 
 long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index 227f1e2..92cb11d 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -1,14 +1,17 @@
 // SPDX-License-Identifier: GPL-2.0
 /*  Copyright(c) 2016-20 Intel Corporation. */
 
+#include 
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
 #include 
 #include 
+#include 
 #include "driver.h"
 #include "encl.h"
 #include "encls.h"
@@ -743,6 +746,51 @@ void sgx_update_lepubkeyhash(u64 

[tip: x86/sgx] x86/cpufeatures: Add SGX1 and SGX2 sub-features

2021-04-07 Thread tip-bot2 for Sean Christopherson
The following commit has been merged into the x86/sgx branch of tip:

Commit-ID: b8921dccf3b25798409d35155b5d127085de72c2
Gitweb:
https://git.kernel.org/tip/b8921dccf3b25798409d35155b5d127085de72c2
Author:Sean Christopherson 
AuthorDate:Fri, 19 Mar 2021 20:22:18 +13:00
Committer: Borislav Petkov 
CommitterDate: Thu, 25 Mar 2021 17:33:11 +01:00

x86/cpufeatures: Add SGX1 and SGX2 sub-features

Add SGX1 and SGX2 feature flags, via CPUID.0x12.0x0.EAX, as scattered
features, since adding a new leaf for only two bits would be wasteful.
As part of virtualizing SGX, KVM will expose the SGX CPUID leafs to its
guest, and to do so correctly needs to query hardware and kernel support
for SGX1 and SGX2.

Suppress both SGX1 and SGX2 from /proc/cpuinfo. SGX1 basically means
SGX, and for SGX2 there is no concrete use case of using it in
/proc/cpuinfo.

Signed-off-by: Sean Christopherson 
Signed-off-by: Kai Huang 
Signed-off-by: Borislav Petkov 
Acked-by: Dave Hansen 
Acked-by: Jarkko Sakkinen 
Link: 
https://lkml.kernel.org/r/d787827dbfca6b3210ac3e432e3ac1202727e786.1616136308.git.kai.hu...@intel.com
---
 arch/x86/include/asm/cpufeatures.h | 2 ++
 arch/x86/kernel/cpu/cpuid-deps.c   | 2 ++
 arch/x86/kernel/cpu/scattered.c| 2 ++
 3 files changed, 6 insertions(+)

diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index cc96e26..1f918f5 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -290,6 +290,8 @@
 #define X86_FEATURE_FENCE_SWAPGS_KERNEL(11*32+ 5) /* "" LFENCE in 
kernel entry SWAPGS path */
 #define X86_FEATURE_SPLIT_LOCK_DETECT  (11*32+ 6) /* #AC for split lock */
 #define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory 
Bandwidth Allocation */
+#define X86_FEATURE_SGX1   (11*32+ 8) /* "" Basic SGX */
+#define X86_FEATURE_SGX2   (11*32+ 9) /* "" SGX Enclave Dynamic 
Memory Management (EDMM) */
 
 /* Intel-defined CPU features, CPUID level 0x0007:1 (EAX), word 12 */
 #define X86_FEATURE_AVX_VNNI   (12*32+ 4) /* AVX VNNI instructions */
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index d40f8e0..defda61 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -73,6 +73,8 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_ENQCMD,   X86_FEATURE_XSAVES},
{ X86_FEATURE_PER_THREAD_MBA,   X86_FEATURE_MBA   },
{ X86_FEATURE_SGX_LC,   X86_FEATURE_SGX   },
+   { X86_FEATURE_SGX1, X86_FEATURE_SGX   },
+   { X86_FEATURE_SGX2, X86_FEATURE_SGX1  },
{}
 };
 
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 972ec3b..21d1f06 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -36,6 +36,8 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_CDP_L2,   CPUID_ECX,  2, 0x0010, 2 },
{ X86_FEATURE_MBA,  CPUID_EBX,  3, 0x0010, 0 },
{ X86_FEATURE_PER_THREAD_MBA,   CPUID_ECX,  0, 0x0010, 3 },
+   { X86_FEATURE_SGX1, CPUID_EAX,  0, 0x0012, 0 },
+   { X86_FEATURE_SGX2, CPUID_EAX,  1, 0x0012, 0 },
{ X86_FEATURE_HW_PSTATE,CPUID_EDX,  7, 0x8007, 0 },
{ X86_FEATURE_CPB,  CPUID_EDX,  9, 0x8007, 0 },
{ X86_FEATURE_PROC_FEEDBACK,CPUID_EDX, 11, 0x8007, 0 },


[tip: x86/sgx] x86/sgx: Introduce virtual EPC for use by KVM guests

2021-04-07 Thread tip-bot2 for Sean Christopherson
The following commit has been merged into the x86/sgx branch of tip:

Commit-ID: 540745ddbc70eabdc7dbd3fcc00fe4fb17cd59ba
Gitweb:
https://git.kernel.org/tip/540745ddbc70eabdc7dbd3fcc00fe4fb17cd59ba
Author:Sean Christopherson 
AuthorDate:Fri, 19 Mar 2021 20:22:21 +13:00
Committer: Borislav Petkov 
CommitterDate: Tue, 06 Apr 2021 09:43:17 +02:00

x86/sgx: Introduce virtual EPC for use by KVM guests

Add a misc device /dev/sgx_vepc to allow userspace to allocate "raw"
Enclave Page Cache (EPC) without an associated enclave. The intended
and only known use case for raw EPC allocation is to expose EPC to a
KVM guest, hence the 'vepc' moniker, virt.{c,h} files and X86_SGX_KVM
Kconfig.

The SGX driver uses the misc device /dev/sgx_enclave to support
userspace in creating an enclave. Each file descriptor returned from
opening /dev/sgx_enclave represents an enclave. Unlike the SGX driver,
KVM doesn't control how the guest uses the EPC, therefore EPC allocated
to a KVM guest is not associated with an enclave, and /dev/sgx_enclave
is not suitable for allocating EPC for a KVM guest.

Having separate device nodes for the SGX driver and KVM virtual EPC also
allows separate permission control for running host SGX enclaves and KVM
SGX guests.

To use /dev/sgx_vepc to allocate a virtual EPC instance with particular
size, the hypervisor opens /dev/sgx_vepc, and uses mmap() with the
intended size to get an address range of virtual EPC. Then it may use
the address range to create one KVM memory slot as virtual EPC for
a guest.

Implement the "raw" EPC allocation in the x86 core-SGX subsystem via
/dev/sgx_vepc rather than in KVM. Doing so has two major advantages:

  - Does not require changes to KVM's uAPI, e.g. EPC gets handled as
just another memory backend for guests.

  - EPC management is wholly contained in the SGX subsystem, e.g. SGX
does not have to export any symbols, changes to reclaim flows don't
need to be routed through KVM, SGX's dirty laundry doesn't have to
get aired out for the world to see, and so on and so forth.

The virtual EPC pages allocated to guests are currently not reclaimable.
Reclaiming an EPC page used by enclave requires a special reclaim
mechanism separate from normal page reclaim, and that mechanism is not
supported for virutal EPC pages. Due to the complications of handling
reclaim conflicts between guest and host, reclaiming virtual EPC pages
is significantly more complex than basic support for SGX virtualization.

 [ bp:
   - Massage commit message and comments
   - use cpu_feature_enabled()
   - vertically align struct members init
   - massage Virtual EPC clarification text
   - move Kconfig prompt to Virtualization ]

Signed-off-by: Sean Christopherson 
Co-developed-by: Kai Huang 
Signed-off-by: Kai Huang 
Signed-off-by: Borislav Petkov 
Acked-by: Dave Hansen 
Acked-by: Jarkko Sakkinen 
Link: 
https://lkml.kernel.org/r/0c38ced8c8e5a69872db4d6a1c0dabd01e07cad7.1616136308.git.kai.hu...@intel.com
---
 Documentation/x86/sgx.rst|  16 ++-
 arch/x86/kernel/cpu/sgx/Makefile |   1 +-
 arch/x86/kernel/cpu/sgx/sgx.h|   9 +-
 arch/x86/kernel/cpu/sgx/virt.c   | 259 ++-
 arch/x86/kvm/Kconfig |  12 +-
 5 files changed, 297 insertions(+)
 create mode 100644 arch/x86/kernel/cpu/sgx/virt.c

diff --git a/Documentation/x86/sgx.rst b/Documentation/x86/sgx.rst
index f90076e..dd0ac96 100644
--- a/Documentation/x86/sgx.rst
+++ b/Documentation/x86/sgx.rst
@@ -234,3 +234,19 @@ As a result, when this happpens, user should stop running 
any new
 SGX workloads, (or just any new workloads), and migrate all valuable
 workloads. Although a machine reboot can recover all EPC memory, the bug
 should be reported to Linux developers.
+
+
+Virtual EPC
+===
+
+The implementation has also a virtual EPC driver to support SGX enclaves
+in guests. Unlike the SGX driver, an EPC page allocated by the virtual
+EPC driver doesn't have a specific enclave associated with it. This is
+because KVM doesn't track how a guest uses EPC pages.
+
+As a result, the SGX core page reclaimer doesn't support reclaiming EPC
+pages allocated to KVM guests through the virtual EPC driver. If the
+user wants to deploy SGX applications both on the host and in guests
+on the same machine, the user should reserve enough EPC (by taking out
+total virtual EPC size of all SGX VMs from the physical EPC size) for
+host SGX applications so they can run with acceptable performance.
diff --git a/arch/x86/kernel/cpu/sgx/Makefile b/arch/x86/kernel/cpu/sgx/Makefile
index 91d3dc7..9c16567 100644
--- a/arch/x86/kernel/cpu/sgx/Makefile
+++ b/arch/x86/kernel/cpu/sgx/Makefile
@@ -3,3 +3,4 @@ obj-y += \
encl.o \
ioctl.o \
main.o
+obj-$(CONFIG_X86_SGX_KVM)  += virt.o
diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h
index 4aa40c6..4854f39 100644
--- a/arch/x86/kernel/cpu/sgx/sgx.h
+++ b/arch/x86/kernel/cpu/sgx/sgx.h
@@ 

[tip: x86/sgx] x86/sgx: Add SGX_CHILD_PRESENT hardware error code

2021-04-07 Thread tip-bot2 for Sean Christopherson
The following commit has been merged into the x86/sgx branch of tip:

Commit-ID: 231d3dbdda192e3b3c7b79f4c3b0616f6c7f31b7
Gitweb:
https://git.kernel.org/tip/231d3dbdda192e3b3c7b79f4c3b0616f6c7f31b7
Author:Sean Christopherson 
AuthorDate:Fri, 19 Mar 2021 20:22:20 +13:00
Committer: Borislav Petkov 
CommitterDate: Fri, 26 Mar 2021 22:51:36 +01:00

x86/sgx: Add SGX_CHILD_PRESENT hardware error code

SGX driver can accurately track how enclave pages are used.  This
enables SECS to be specifically targeted and EREMOVE'd only after all
child pages have been EREMOVE'd.  This ensures that SGX driver will
never encounter SGX_CHILD_PRESENT in normal operation.

Virtual EPC is different.  The host does not track how EPC pages are
used by the guest, so it cannot guarantee EREMOVE success.  It might,
for instance, encounter a SECS with a non-zero child count.

Add a definition of SGX_CHILD_PRESENT.  It will be used exclusively by
the SGX virtualization driver to handle recoverable EREMOVE errors when
saniziting EPC pages after they are freed.

Signed-off-by: Sean Christopherson 
Signed-off-by: Kai Huang 
Signed-off-by: Borislav Petkov 
Acked-by: Dave Hansen 
Acked-by: Jarkko Sakkinen 
Link: 
https://lkml.kernel.org/r/050b198e882afde7e6eba8e6a0d4da39161dbb5a.1616136308.git.kai.hu...@intel.com
---
 arch/x86/kernel/cpu/sgx/arch.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kernel/cpu/sgx/arch.h b/arch/x86/kernel/cpu/sgx/arch.h
index dd7602c..abf99bb 100644
--- a/arch/x86/kernel/cpu/sgx/arch.h
+++ b/arch/x86/kernel/cpu/sgx/arch.h
@@ -26,12 +26,14 @@
  * enum sgx_return_code - The return code type for ENCLS, ENCLU and ENCLV
  * %SGX_NOT_TRACKED:   Previous ETRACK's shootdown sequence has not
  * been completed yet.
+ * %SGX_CHILD_PRESENT  SECS has child pages present in the EPC.
  * %SGX_INVALID_EINITTOKEN:EINITTOKEN is invalid and enclave signer's
  * public key does not match IA32_SGXLEPUBKEYHASH.
  * %SGX_UNMASKED_EVENT:An unmasked event, e.g. INTR, was 
received
  */
 enum sgx_return_code {
SGX_NOT_TRACKED = 11,
+   SGX_CHILD_PRESENT   = 13,
SGX_INVALID_EINITTOKEN  = 16,
SGX_UNMASKED_EVENT  = 128,
 };


[tip: x86/sgx] x86/cpu/intel: Allow SGX virtualization without Launch Control support

2021-04-07 Thread tip-bot2 for Sean Christopherson
The following commit has been merged into the x86/sgx branch of tip:

Commit-ID: 332bfc7becf479de8a55864cc5ed0024baea28aa
Gitweb:
https://git.kernel.org/tip/332bfc7becf479de8a55864cc5ed0024baea28aa
Author:Sean Christopherson 
AuthorDate:Fri, 19 Mar 2021 20:22:58 +13:00
Committer: Borislav Petkov 
CommitterDate: Tue, 06 Apr 2021 09:43:41 +02:00

x86/cpu/intel: Allow SGX virtualization without Launch Control support

The kernel will currently disable all SGX support if the hardware does
not support launch control.  Make it more permissive to allow SGX
virtualization on systems without Launch Control support.  This will
allow KVM to expose SGX to guests that have less-strict requirements on
the availability of flexible launch control.

Improve error message to distinguish between three cases.  There are two
cases where SGX support is completely disabled:
1) SGX has been disabled completely by the BIOS
2) SGX LC is locked by the BIOS.  Bare-metal support is disabled because
   of LC unavailability.  SGX virtualization is unavailable (because of
   Kconfig).
One where it is partially available:
3) SGX LC is locked by the BIOS.  Bare-metal support is disabled because
   of LC unavailability.  SGX virtualization is supported.

Signed-off-by: Sean Christopherson 
Co-developed-by: Kai Huang 
Signed-off-by: Kai Huang 
Signed-off-by: Borislav Petkov 
Acked-by: Jarkko Sakkinen 
Acked-by: Dave Hansen 
Link: 
https://lkml.kernel.org/r/b3329777076509b3b601550da288c8f3c406a865.1616136308.git.kai.hu...@intel.com
---
 arch/x86/kernel/cpu/feat_ctl.c | 59 -
 1 file changed, 44 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/cpu/feat_ctl.c b/arch/x86/kernel/cpu/feat_ctl.c
index 27533a6..da696eb 100644
--- a/arch/x86/kernel/cpu/feat_ctl.c
+++ b/arch/x86/kernel/cpu/feat_ctl.c
@@ -104,8 +104,9 @@ early_param("nosgx", nosgx);
 
 void init_ia32_feat_ctl(struct cpuinfo_x86 *c)
 {
+   bool enable_sgx_kvm = false, enable_sgx_driver = false;
bool tboot = tboot_enabled();
-   bool enable_sgx;
+   bool enable_vmx;
u64 msr;
 
if (rdmsrl_safe(MSR_IA32_FEAT_CTL, )) {
@@ -114,13 +115,19 @@ void init_ia32_feat_ctl(struct cpuinfo_x86 *c)
return;
}
 
-   /*
-* Enable SGX if and only if the kernel supports SGX and Launch Control
-* is supported, i.e. disable SGX if the LE hash MSRs can't be written.
-*/
-   enable_sgx = cpu_has(c, X86_FEATURE_SGX) &&
-cpu_has(c, X86_FEATURE_SGX_LC) &&
-IS_ENABLED(CONFIG_X86_SGX);
+   enable_vmx = cpu_has(c, X86_FEATURE_VMX) &&
+IS_ENABLED(CONFIG_KVM_INTEL);
+
+   if (cpu_has(c, X86_FEATURE_SGX) && IS_ENABLED(CONFIG_X86_SGX)) {
+   /*
+* Separate out SGX driver enabling from KVM.  This allows KVM
+* guests to use SGX even if the kernel SGX driver refuses to
+* use it.  This happens if flexible Launch Control is not
+* available.
+*/
+   enable_sgx_driver = cpu_has(c, X86_FEATURE_SGX_LC);
+   enable_sgx_kvm = enable_vmx && IS_ENABLED(CONFIG_X86_SGX_KVM);
+   }
 
if (msr & FEAT_CTL_LOCKED)
goto update_caps;
@@ -136,15 +143,18 @@ void init_ia32_feat_ctl(struct cpuinfo_x86 *c)
 * i.e. KVM is enabled, to avoid unnecessarily adding an attack vector
 * for the kernel, e.g. using VMX to hide malicious code.
 */
-   if (cpu_has(c, X86_FEATURE_VMX) && IS_ENABLED(CONFIG_KVM_INTEL)) {
+   if (enable_vmx) {
msr |= FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
 
if (tboot)
msr |= FEAT_CTL_VMX_ENABLED_INSIDE_SMX;
}
 
-   if (enable_sgx)
-   msr |= FEAT_CTL_SGX_ENABLED | FEAT_CTL_SGX_LC_ENABLED;
+   if (enable_sgx_kvm || enable_sgx_driver) {
+   msr |= FEAT_CTL_SGX_ENABLED;
+   if (enable_sgx_driver)
+   msr |= FEAT_CTL_SGX_LC_ENABLED;
+   }
 
wrmsrl(MSR_IA32_FEAT_CTL, msr);
 
@@ -167,10 +177,29 @@ update_caps:
}
 
 update_sgx:
-   if (!(msr & FEAT_CTL_SGX_ENABLED) ||
-   !(msr & FEAT_CTL_SGX_LC_ENABLED) || !enable_sgx) {
-   if (enable_sgx)
-   pr_err_once("SGX disabled by BIOS\n");
+   if (!(msr & FEAT_CTL_SGX_ENABLED)) {
+   if (enable_sgx_kvm || enable_sgx_driver)
+   pr_err_once("SGX disabled by BIOS.\n");
clear_cpu_cap(c, X86_FEATURE_SGX);
+   return;
+   }
+
+   /*
+* VMX feature bit may be cleared due to being disabled in BIOS,
+* in which case SGX virtualization cannot be supported either.
+*/
+   if (!cpu_has(c, X86_FEATURE_VMX) && enable_sgx_kvm) {
+   pr_err_once("SGX virtualization disabled due to lack of 
VMX.\n");
+   

[tip: x86/sgx] x86/sgx: Expose SGX architectural definitions to the kernel

2021-04-07 Thread tip-bot2 for Sean Christopherson
The following commit has been merged into the x86/sgx branch of tip:

Commit-ID: 8ca52cc38dc8fdcbdbd0c23eafb19db5e5f5c8d0
Gitweb:
https://git.kernel.org/tip/8ca52cc38dc8fdcbdbd0c23eafb19db5e5f5c8d0
Author:Sean Christopherson 
AuthorDate:Fri, 19 Mar 2021 20:23:03 +13:00
Committer: Borislav Petkov 
CommitterDate: Tue, 06 Apr 2021 09:43:41 +02:00

x86/sgx: Expose SGX architectural definitions to the kernel

Expose SGX architectural structures, as KVM will use many of the
architectural constants and structs to virtualize SGX.

Name the new header file as asm/sgx.h, rather than asm/sgx_arch.h, to
have single header to provide SGX facilities to share with other kernel
componments. Also update MAINTAINERS to include asm/sgx.h.

Signed-off-by: Sean Christopherson 
Co-developed-by: Kai Huang 
Signed-off-by: Kai Huang 
Signed-off-by: Borislav Petkov 
Acked-by: Jarkko Sakkinen 
Acked-by: Dave Hansen 
Link: 
https://lkml.kernel.org/r/6bf47acd91ab4d709e66ad1692c7803e4c9063a0.1616136308.git.kai.hu...@intel.com
---
 MAINTAINERS   |   1 +-
 arch/x86/include/asm/sgx.h| 350 +-
 arch/x86/kernel/cpu/sgx/arch.h| 340 +
 arch/x86/kernel/cpu/sgx/encl.c|   2 +-
 arch/x86/kernel/cpu/sgx/sgx.h |   2 +-
 tools/testing/selftests/sgx/defines.h |   2 +-
 6 files changed, 354 insertions(+), 343 deletions(-)
 create mode 100644 arch/x86/include/asm/sgx.h
 delete mode 100644 arch/x86/kernel/cpu/sgx/arch.h

diff --git a/MAINTAINERS b/MAINTAINERS
index aa84121..0cb606a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9274,6 +9274,7 @@ Q:
https://patchwork.kernel.org/project/intel-sgx/list/
 T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/sgx
 F: Documentation/x86/sgx.rst
 F: arch/x86/entry/vdso/vsgx.S
+F: arch/x86/include/asm/sgx.h
 F: arch/x86/include/uapi/asm/sgx.h
 F: arch/x86/kernel/cpu/sgx/*
 F: tools/testing/selftests/sgx/*
diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h
new file mode 100644
index 000..14bb5f7
--- /dev/null
+++ b/arch/x86/include/asm/sgx.h
@@ -0,0 +1,350 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/**
+ * Copyright(c) 2016-20 Intel Corporation.
+ *
+ * Intel Software Guard Extensions (SGX) support.
+ */
+#ifndef _ASM_X86_SGX_H
+#define _ASM_X86_SGX_H
+
+#include 
+#include 
+
+/*
+ * This file contains both data structures defined by SGX architecture and 
Linux
+ * defined software data structures and functions.  The two should not be mixed
+ * together for better readibility.  The architectural definitions come first.
+ */
+
+/* The SGX specific CPUID function. */
+#define SGX_CPUID  0x12
+/* EPC enumeration. */
+#define SGX_CPUID_EPC  2
+/* An invalid EPC section, i.e. the end marker. */
+#define SGX_CPUID_EPC_INVALID  0x0
+/* A valid EPC section. */
+#define SGX_CPUID_EPC_SECTION  0x1
+/* The bitmask for the EPC section type. */
+#define SGX_CPUID_EPC_MASK GENMASK(3, 0)
+
+/**
+ * enum sgx_return_code - The return code type for ENCLS, ENCLU and ENCLV
+ * %SGX_NOT_TRACKED:   Previous ETRACK's shootdown sequence has not
+ * been completed yet.
+ * %SGX_CHILD_PRESENT  SECS has child pages present in the EPC.
+ * %SGX_INVALID_EINITTOKEN:EINITTOKEN is invalid and enclave signer's
+ * public key does not match IA32_SGXLEPUBKEYHASH.
+ * %SGX_UNMASKED_EVENT:An unmasked event, e.g. INTR, was 
received
+ */
+enum sgx_return_code {
+   SGX_NOT_TRACKED = 11,
+   SGX_CHILD_PRESENT   = 13,
+   SGX_INVALID_EINITTOKEN  = 16,
+   SGX_UNMASKED_EVENT  = 128,
+};
+
+/* The modulus size for 3072-bit RSA keys. */
+#define SGX_MODULUS_SIZE 384
+
+/**
+ * enum sgx_miscselect - additional information to an SSA frame
+ * %SGX_MISC_EXINFO:   Report #PF or #GP to the SSA frame.
+ *
+ * Save State Area (SSA) is a stack inside the enclave used to store processor
+ * state when an exception or interrupt occurs. This enum defines additional
+ * information stored to an SSA frame.
+ */
+enum sgx_miscselect {
+   SGX_MISC_EXINFO = BIT(0),
+};
+
+#define SGX_MISC_RESERVED_MASK GENMASK_ULL(63, 1)
+
+#define SGX_SSA_GPRS_SIZE  184
+#define SGX_SSA_MISC_EXINFO_SIZE   16
+
+/**
+ * enum sgx_attributes - the attributes field in  sgx_secs
+ * %SGX_ATTR_INIT: Enclave can be entered (is initialized).
+ * %SGX_ATTR_DEBUG:Allow ENCLS(EDBGRD) and ENCLS(EDBGWR).
+ * %SGX_ATTR_MODE64BIT:Tell that this a 64-bit enclave.
+ * %SGX_ATTR_PROVISIONKEY:  Allow to use provisioning keys for remote
+ * attestation.
+ * %SGX_ATTR_KSS:  Allow to use key separation and sharing (KSS).
+ * %SGX_ATTR_EINITTOKENKEY:Allow to use token signing key that is used to
+ * 

[tip: x86/sgx] x86/sgx: Add SGX2 ENCLS leaf definitions (EAUG, EMODPR and EMODT)

2021-04-07 Thread tip-bot2 for Sean Christopherson
The following commit has been merged into the x86/sgx branch of tip:

Commit-ID: 32ddda8e445df3de477db14d386fb3518042224a
Gitweb:
https://git.kernel.org/tip/32ddda8e445df3de477db14d386fb3518042224a
Author:Sean Christopherson 
AuthorDate:Fri, 19 Mar 2021 20:23:05 +13:00
Committer: Borislav Petkov 
CommitterDate: Tue, 06 Apr 2021 09:43:42 +02:00

x86/sgx: Add SGX2 ENCLS leaf definitions (EAUG, EMODPR and EMODT)

Define the ENCLS leafs that are available with SGX2, also referred to as
Enclave Dynamic Memory Management (EDMM).  The leafs will be used by KVM
to conditionally expose SGX2 capabilities to guests.

Signed-off-by: Sean Christopherson 
Signed-off-by: Kai Huang 
Signed-off-by: Borislav Petkov 
Acked-by: Jarkko Sakkinen 
Acked-by: Dave Hansen 
Link: 
https://lkml.kernel.org/r/5f0970c251ebcc6d5add132f0d750cc753b7060f.1616136308.git.kai.hu...@intel.com
---
 arch/x86/include/asm/sgx.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h
index 34f4423..3b025af 100644
--- a/arch/x86/include/asm/sgx.h
+++ b/arch/x86/include/asm/sgx.h
@@ -40,6 +40,9 @@ enum sgx_encls_function {
EPA = 0x0A,
EWB = 0x0B,
ETRACK  = 0x0C,
+   EAUG= 0x0D,
+   EMODPR  = 0x0E,
+   EMODT   = 0x0F,
 };
 
 /**


[tip: x86/sgx] x86/sgx: Add encls_faulted() helper

2021-04-07 Thread tip-bot2 for Sean Christopherson
The following commit has been merged into the x86/sgx branch of tip:

Commit-ID: a67136b458e5e63822b19c35794451122fe2bf3e
Gitweb:
https://git.kernel.org/tip/a67136b458e5e63822b19c35794451122fe2bf3e
Author:Sean Christopherson 
AuthorDate:Fri, 19 Mar 2021 20:23:06 +13:00
Committer: Borislav Petkov 
CommitterDate: Tue, 06 Apr 2021 09:43:42 +02:00

x86/sgx: Add encls_faulted() helper

Add a helper to extract the fault indicator from an encoded ENCLS return
value.  SGX virtualization will also need to detect ENCLS faults.

Signed-off-by: Sean Christopherson 
Signed-off-by: Kai Huang 
Signed-off-by: Borislav Petkov 
Acked-by: Jarkko Sakkinen 
Acked-by: Dave Hansen 
Link: 
https://lkml.kernel.org/r/c1f955898110de2f669da536fc6cf62e003dff88.1616136308.git.kai.hu...@intel.com
---
 arch/x86/kernel/cpu/sgx/encls.h | 15 ++-
 arch/x86/kernel/cpu/sgx/ioctl.c |  2 +-
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/sgx/encls.h b/arch/x86/kernel/cpu/sgx/encls.h
index be5c496..9b20484 100644
--- a/arch/x86/kernel/cpu/sgx/encls.h
+++ b/arch/x86/kernel/cpu/sgx/encls.h
@@ -40,6 +40,19 @@
} while (0);  \
 }
 
+/*
+ * encls_faulted() - Check if an ENCLS leaf faulted given an error code
+ * @ret:   the return value of an ENCLS leaf function call
+ *
+ * Return:
+ * - true: ENCLS leaf faulted.
+ * - false:Otherwise.
+ */
+static inline bool encls_faulted(int ret)
+{
+   return ret & ENCLS_FAULT_FLAG;
+}
+
 /**
  * encls_failed() - Check if an ENCLS function failed
  * @ret:   the return value of an ENCLS function call
@@ -50,7 +63,7 @@
  */
 static inline bool encls_failed(int ret)
 {
-   if (ret & ENCLS_FAULT_FLAG)
+   if (encls_faulted(ret))
return ENCLS_TRAPNR(ret) != X86_TRAP_PF;
 
return !!ret;
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index 354e309..11e3f96 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -568,7 +568,7 @@ static int sgx_encl_init(struct sgx_encl *encl, struct 
sgx_sigstruct *sigstruct,
}
}
 
-   if (ret & ENCLS_FAULT_FLAG) {
+   if (encls_faulted(ret)) {
if (encls_failed(ret))
ENCLS_WARN(ret, "EINIT");
 


[tip: x86/sgx] x86/sgx: Move ENCLS leaf definitions to sgx.h

2021-04-07 Thread tip-bot2 for Sean Christopherson
The following commit has been merged into the x86/sgx branch of tip:

Commit-ID: 9c55c78a73ce6e62a1d46ba6e4f242c23c29b812
Gitweb:
https://git.kernel.org/tip/9c55c78a73ce6e62a1d46ba6e4f242c23c29b812
Author:Sean Christopherson 
AuthorDate:Fri, 19 Mar 2021 20:23:04 +13:00
Committer: Borislav Petkov 
CommitterDate: Tue, 06 Apr 2021 09:43:41 +02:00

x86/sgx: Move ENCLS leaf definitions to sgx.h

Move the ENCLS leaf definitions to sgx.h so that they can be used by
KVM.

Signed-off-by: Sean Christopherson 
Signed-off-by: Kai Huang 
Signed-off-by: Borislav Petkov 
Acked-by: Jarkko Sakkinen 
Acked-by: Dave Hansen 
Link: 
https://lkml.kernel.org/r/2e6cd7c5c1ced620cfcd292c3c6c382827fde6b2.1616136308.git.kai.hu...@intel.com
---
 arch/x86/include/asm/sgx.h  | 15 +++
 arch/x86/kernel/cpu/sgx/encls.h | 15 ---
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h
index 14bb5f7..34f4423 100644
--- a/arch/x86/include/asm/sgx.h
+++ b/arch/x86/include/asm/sgx.h
@@ -27,6 +27,21 @@
 /* The bitmask for the EPC section type. */
 #define SGX_CPUID_EPC_MASK GENMASK(3, 0)
 
+enum sgx_encls_function {
+   ECREATE = 0x00,
+   EADD= 0x01,
+   EINIT   = 0x02,
+   EREMOVE = 0x03,
+   EDGBRD  = 0x04,
+   EDGBWR  = 0x05,
+   EEXTEND = 0x06,
+   ELDU= 0x08,
+   EBLOCK  = 0x09,
+   EPA = 0x0A,
+   EWB = 0x0B,
+   ETRACK  = 0x0C,
+};
+
 /**
  * enum sgx_return_code - The return code type for ENCLS, ENCLU and ENCLV
  * %SGX_NOT_TRACKED:   Previous ETRACK's shootdown sequence has not
diff --git a/arch/x86/kernel/cpu/sgx/encls.h b/arch/x86/kernel/cpu/sgx/encls.h
index 443188f..be5c496 100644
--- a/arch/x86/kernel/cpu/sgx/encls.h
+++ b/arch/x86/kernel/cpu/sgx/encls.h
@@ -11,21 +11,6 @@
 #include 
 #include "sgx.h"
 
-enum sgx_encls_function {
-   ECREATE = 0x00,
-   EADD= 0x01,
-   EINIT   = 0x02,
-   EREMOVE = 0x03,
-   EDGBRD  = 0x04,
-   EDGBWR  = 0x05,
-   EEXTEND = 0x06,
-   ELDU= 0x08,
-   EBLOCK  = 0x09,
-   EPA = 0x0A,
-   EWB = 0x0B,
-   ETRACK  = 0x0C,
-};
-
 /**
  * ENCLS_FAULT_FLAG - flag signifying an ENCLS return code is a trapnr
  *


[tip: x86/sgx] x86/sgx: Add helpers to expose ECREATE and EINIT to KVM

2021-04-07 Thread tip-bot2 for Sean Christopherson
The following commit has been merged into the x86/sgx branch of tip:

Commit-ID: d155030b1e7c0e448aab22a803f7a71ea2e117d7
Gitweb:
https://git.kernel.org/tip/d155030b1e7c0e448aab22a803f7a71ea2e117d7
Author:Sean Christopherson 
AuthorDate:Fri, 19 Mar 2021 20:23:08 +13:00
Committer: Borislav Petkov 
CommitterDate: Tue, 06 Apr 2021 19:18:27 +02:00

x86/sgx: Add helpers to expose ECREATE and EINIT to KVM

The host kernel must intercept ECREATE to impose policies on guests, and
intercept EINIT to be able to write guest's virtual SGX_LEPUBKEYHASH MSR
values to hardware before running guest's EINIT so it can run correctly
according to hardware behavior.

Provide wrappers around __ecreate() and __einit() to hide the ugliness
of overloading the ENCLS return value to encode multiple error formats
in a single int.  KVM will trap-and-execute ECREATE and EINIT as part
of SGX virtualization, and reflect ENCLS execution result to guest by
setting up guest's GPRs, or on an exception, injecting the correct fault
based on return value of __ecreate() and __einit().

Use host userspace addresses (provided by KVM based on guest physical
address of ENCLS parameters) to execute ENCLS/EINIT when possible.
Accesses to both EPC and memory originating from ENCLS are subject to
segmentation and paging mechanisms.  It's also possible to generate
kernel mappings for ENCLS parameters by resolving PFN but using
__uaccess_xx() is simpler.

 [ bp: Return early if the __user memory accesses fail, use
   cpu_feature_enabled(). ]

Signed-off-by: Sean Christopherson 
Signed-off-by: Kai Huang 
Signed-off-by: Borislav Petkov 
Acked-by: Jarkko Sakkinen 
Link: 
https://lkml.kernel.org/r/20e09daf559aa5e9e680a0b4b5fba940f1bad86e.1616136308.git.kai.hu...@intel.com
---
 arch/x86/include/asm/sgx.h |   7 ++-
 arch/x86/kernel/cpu/sgx/virt.c | 117 -
 2 files changed, 124 insertions(+)

diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h
index 3b025af..954042e 100644
--- a/arch/x86/include/asm/sgx.h
+++ b/arch/x86/include/asm/sgx.h
@@ -365,4 +365,11 @@ struct sgx_sigstruct {
  * comment!
  */
 
+#ifdef CONFIG_X86_SGX_KVM
+int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs,
+int *trapnr);
+int sgx_virt_einit(void __user *sigstruct, void __user *token,
+  void __user *secs, u64 *lepubkeyhash, int *trapnr);
+#endif
+
 #endif /* _ASM_X86_SGX_H */
diff --git a/arch/x86/kernel/cpu/sgx/virt.c b/arch/x86/kernel/cpu/sgx/virt.c
index 259cc46..7d221ea 100644
--- a/arch/x86/kernel/cpu/sgx/virt.c
+++ b/arch/x86/kernel/cpu/sgx/virt.c
@@ -257,3 +257,120 @@ int __init sgx_vepc_init(void)
 
return misc_register(_vepc_dev);
 }
+
+/**
+ * sgx_virt_ecreate() - Run ECREATE on behalf of guest
+ * @pageinfo:  Pointer to PAGEINFO structure
+ * @secs:  Userspace pointer to SECS page
+ * @trapnr:trap number injected to guest in case of ECREATE error
+ *
+ * Run ECREATE on behalf of guest after KVM traps ECREATE for the purpose
+ * of enforcing policies of guest's enclaves, and return the trap number
+ * which should be injected to guest in case of any ECREATE error.
+ *
+ * Return:
+ * -  0:   ECREATE was successful.
+ * - <0:   on error.
+ */
+int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs,
+int *trapnr)
+{
+   int ret;
+
+   /*
+* @secs is an untrusted, userspace-provided address.  It comes from
+* KVM and is assumed to be a valid pointer which points somewhere in
+* userspace.  This can fault and call SGX or other fault handlers when
+* userspace mapping @secs doesn't exist.
+*
+* Add a WARN() to make sure @secs is already valid userspace pointer
+* from caller (KVM), who should already have handled invalid pointer
+* case (for instance, made by malicious guest).  All other checks,
+* such as alignment of @secs, are deferred to ENCLS itself.
+*/
+   if (WARN_ON_ONCE(!access_ok(secs, PAGE_SIZE)))
+   return -EINVAL;
+
+   __uaccess_begin();
+   ret = __ecreate(pageinfo, (void *)secs);
+   __uaccess_end();
+
+   if (encls_faulted(ret)) {
+   *trapnr = ENCLS_TRAPNR(ret);
+   return -EFAULT;
+   }
+
+   /* ECREATE doesn't return an error code, it faults or succeeds. */
+   WARN_ON_ONCE(ret);
+   return 0;
+}
+EXPORT_SYMBOL_GPL(sgx_virt_ecreate);
+
+static int __sgx_virt_einit(void __user *sigstruct, void __user *token,
+   void __user *secs)
+{
+   int ret;
+
+   /*
+* Make sure all userspace pointers from caller (KVM) are valid.
+* All other checks deferred to ENCLS itself.  Also see comment
+* for @secs in sgx_virt_ecreate().
+*/
+#define SGX_EINITTOKEN_SIZE304
+   if (WARN_ON_ONCE(!access_ok(sigstruct, sizeof(struct sgx_sigstruct)) ||
+  

[tip: perf/urgent] x86/perf: Use RET0 as default for guest_get_msrs to handle "no PMU" case

2021-03-11 Thread tip-bot2 for Sean Christopherson
The following commit has been merged into the perf/urgent branch of tip:

Commit-ID: c8e2fe13d1d1f3a02842b7b909d4e4846a4b6a2c
Gitweb:
https://git.kernel.org/tip/c8e2fe13d1d1f3a02842b7b909d4e4846a4b6a2c
Author:Sean Christopherson 
AuthorDate:Tue, 09 Mar 2021 09:10:19 -08:00
Committer: Peter Zijlstra 
CommitterDate: Wed, 10 Mar 2021 16:45:09 +01:00

x86/perf: Use RET0 as default for guest_get_msrs to handle "no PMU" case

Initialize x86_pmu.guest_get_msrs to return 0/NULL to handle the "nop"
case.  Patching in perf_guest_get_msrs_nop() during setup does not work
if there is no PMU, as setup bails before updating the static calls,
leaving x86_pmu.guest_get_msrs NULL and thus a complete nop.  Ultimately,
this causes VMX abort on VM-Exit due to KVM putting random garbage from
the stack into the MSR load list.

Add a comment in KVM to note that nr_msrs is valid if and only if the
return value is non-NULL.

Fixes: abd562df94d1 ("x86/perf: Use static_call for x86_pmu.guest_get_msrs")
Reported-by: Dmitry Vyukov 
Reported-by: syzbot+cce9ef2dd25246f81...@syzkaller.appspotmail.com
Suggested-by: Peter Zijlstra 
Signed-off-by: Sean Christopherson 
Signed-off-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20210309171019.1125243-1-sea...@google.com
---
 arch/x86/events/core.c | 15 ++-
 arch/x86/kvm/vmx/vmx.c |  2 +-
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 6ddeed3..18df171 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -81,7 +81,11 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_swap_task_ctx, 
*x86_pmu.swap_task_ctx);
 DEFINE_STATIC_CALL_NULL(x86_pmu_drain_pebs,   *x86_pmu.drain_pebs);
 DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases);
 
-DEFINE_STATIC_CALL_NULL(x86_pmu_guest_get_msrs,  *x86_pmu.guest_get_msrs);
+/*
+ * This one is magic, it will get called even when PMU init fails (because
+ * there is no PMU), in which case it should simply return NULL.
+ */
+DEFINE_STATIC_CALL_RET0(x86_pmu_guest_get_msrs, *x86_pmu.guest_get_msrs);
 
 u64 __read_mostly hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
@@ -1944,13 +1948,6 @@ static void _x86_pmu_read(struct perf_event *event)
x86_perf_event_update(event);
 }
 
-static inline struct perf_guest_switch_msr *
-perf_guest_get_msrs_nop(int *nr)
-{
-   *nr = 0;
-   return NULL;
-}
-
 static int __init init_hw_perf_events(void)
 {
struct x86_pmu_quirk *quirk;
@@ -2025,7 +2022,7 @@ static int __init init_hw_perf_events(void)
x86_pmu.read = _x86_pmu_read;
 
if (!x86_pmu.guest_get_msrs)
-   x86_pmu.guest_get_msrs = perf_guest_get_msrs_nop;
+   x86_pmu.guest_get_msrs = (void *)&__static_call_return0;
 
x86_pmu_static_call_update();
 
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 50810d4..32cf828 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6580,8 +6580,8 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
int i, nr_msrs;
struct perf_guest_switch_msr *msrs;
 
+   /* Note, nr_msrs may be garbage if perf_guest_get_msrs() returns NULL. 
*/
msrs = perf_guest_get_msrs(_msrs);
-
if (!msrs)
return;
 


[tip: x86/cpu] x86/cpufeatures: Assign dedicated feature word for CPUID_0x8000001F[EAX]

2021-01-28 Thread tip-bot2 for Sean Christopherson
The following commit has been merged into the x86/cpu branch of tip:

Commit-ID: fb35d30fe5b06cc2f0405da8fbe0be5330d1
Gitweb:
https://git.kernel.org/tip/fb35d30fe5b06cc2f0405da8fbe0be5330d1
Author:Sean Christopherson 
AuthorDate:Fri, 22 Jan 2021 12:40:46 -08:00
Committer: Borislav Petkov 
CommitterDate: Thu, 28 Jan 2021 17:41:24 +01:00

x86/cpufeatures: Assign dedicated feature word for CPUID_0x801F[EAX]

Collect the scattered SME/SEV related feature flags into a dedicated
word.  There are now five recognized features in CPUID.0x801F.EAX,
with at least one more on the horizon (SEV-SNP).  Using a dedicated word
allows KVM to use its automagic CPUID adjustment logic when reporting
the set of supported features to userspace.

No functional change intended.

Signed-off-by: Sean Christopherson 
Signed-off-by: Borislav Petkov 
Reviewed-by: Brijesh Singh 
Link: https://lkml.kernel.org/r/20210122204047.2860075-2-sea...@google.com
---
 arch/x86/include/asm/cpufeature.h  |  7 +--
 arch/x86/include/asm/cpufeatures.h | 17 +++--
 arch/x86/include/asm/disabled-features.h   |  3 ++-
 arch/x86/include/asm/required-features.h   |  3 ++-
 arch/x86/kernel/cpu/common.c   |  3 +++-
 arch/x86/kernel/cpu/scattered.c|  5 +-
 tools/arch/x86/include/asm/disabled-features.h |  3 ++-
 tools/arch/x86/include/asm/required-features.h |  3 ++-
 8 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/asm/cpufeature.h 
b/arch/x86/include/asm/cpufeature.h
index 59bf91c..1728d4c 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -30,6 +30,7 @@ enum cpuid_leafs
CPUID_7_ECX,
CPUID_8000_0007_EBX,
CPUID_7_EDX,
+   CPUID_8000_001F_EAX,
 };
 
 #ifdef CONFIG_X86_FEATURE_NAMES
@@ -88,8 +89,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) ||\
   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) ||\
   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) ||\
+  CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) ||\
   REQUIRED_MASK_CHECK||\
-  BUILD_BUG_ON_ZERO(NCAPINTS != 19))
+  BUILD_BUG_ON_ZERO(NCAPINTS != 20))
 
 #define DISABLED_MASK_BIT_SET(feature_bit) \
 ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK,  0, feature_bit) ||\
@@ -111,8 +113,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) ||\
   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) ||\
   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) ||\
+  CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) ||\
   DISABLED_MASK_CHECK||\
-  BUILD_BUG_ON_ZERO(NCAPINTS != 19))
+  BUILD_BUG_ON_ZERO(NCAPINTS != 20))
 
 #define cpu_has(c, bit)
\
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 :  \
diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index 84b8878..1feb6c0 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -13,7 +13,7 @@
 /*
  * Defines x86 CPU feature bits
  */
-#define NCAPINTS   19 /* N 32-bit words worth of 
info */
+#define NCAPINTS   20 /* N 32-bit words worth of 
info */
 #define NBUGINTS   1  /* N 32-bit bug flags */
 
 /*
@@ -96,7 +96,7 @@
 #define X86_FEATURE_SYSCALL32  ( 3*32+14) /* "" syscall in IA32 
userspace */
 #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 
userspace */
 #define X86_FEATURE_REP_GOOD   ( 3*32+16) /* REP microcode works well 
*/
-#define X86_FEATURE_SME_COHERENT   ( 3*32+17) /* "" AMD hardware-enforced 
cache coherency */
+/* FREE!( 3*32+17) */
 #define X86_FEATURE_LFENCE_RDTSC   ( 3*32+18) /* "" LFENCE synchronizes 
RDTSC */
 #define X86_FEATURE_ACC_POWER  ( 3*32+19) /* AMD Accumulated Power 
Mechanism */
 #define X86_FEATURE_NOPL   ( 3*32+20) /* The NOPL (0F 1F) 
instructions */
@@ -201,7 +201,7 @@
 #define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && 
CR4.PCIDE=1 */
 #define X86_FEATURE_HW_PSTATE  ( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK  ( 7*32+ 9) /* AMD ProcFeedbackInterface 
*/
-#define X86_FEATURE_SME( 7*32+10) /* AMD Secure Memory 
Encryption */
+/* FREE!( 7*32+10) */
 #define X86_FEATURE_PTI( 7*32+11) /* Kernel Page Table 
Isolation enabled */
 

[tip: x86/cleanups] x86/asm: Drop unused RDPID macro

2020-11-26 Thread tip-bot2 for Sean Christopherson
The following commit has been merged into the x86/cleanups branch of tip:

Commit-ID: 8539d3f06710a9e91b9968fa736549d7c6b44206
Gitweb:
https://git.kernel.org/tip/8539d3f06710a9e91b9968fa736549d7c6b44206
Author:Sean Christopherson 
AuthorDate:Tue, 27 Oct 2020 14:45:32 -07:00
Committer: Borislav Petkov 
CommitterDate: Thu, 26 Nov 2020 12:58:56 +01:00

x86/asm: Drop unused RDPID macro

Drop the GAS-compatible RDPID macro. RDPID is unsafe in the kernel
because KVM loads guest's TSC_AUX on VM-entry and may not restore the
host's value until the CPU returns to userspace.

See

  6a3ea3e68b8a ("x86/entry/64: Do not use RDPID in paranoid entry to accomodate 
KVM")

for details.

It can always be resurrected from git history, if needed.

 [ bp: Massage commit message. ]

Signed-off-by: Sean Christopherson 
Signed-off-by: Borislav Petkov 
Link: 
https://lkml.kernel.org/r/20201027214532.1792-1-sean.j.christopher...@intel.com
---
 arch/x86/include/asm/inst.h | 15 ---
 1 file changed, 15 deletions(-)

diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h
index bd7f024..438ccd4 100644
--- a/arch/x86/include/asm/inst.h
+++ b/arch/x86/include/asm/inst.h
@@ -143,21 +143,6 @@
.macro MODRM mod opd1 opd2
.byte \mod | (\opd1 & 7) | ((\opd2 & 7) << 3)
.endm
-
-.macro RDPID opd
-   REG_TYPE rdpid_opd_type \opd
-   .if rdpid_opd_type == REG_TYPE_R64
-   R64_NUM rdpid_opd \opd
-   .else
-   R32_NUM rdpid_opd \opd
-   .endif
-   .byte 0xf3
-   .if rdpid_opd > 7
-   PFX_REX rdpid_opd 0
-   .endif
-   .byte 0x0f, 0xc7
-   MODRM 0xc0 rdpid_opd 0x7
-.endm
 #endif
 
 #endif


[tip: x86/sgx] x86/vdso: Implement a vDSO for Intel SGX enclave call

2020-11-18 Thread tip-bot2 for Sean Christopherson
The following commit has been merged into the x86/sgx branch of tip:

Commit-ID: 84664369520170f48546c55cbc1f3fbde9b1e140
Gitweb:
https://git.kernel.org/tip/84664369520170f48546c55cbc1f3fbde9b1e140
Author:Sean Christopherson 
AuthorDate:Fri, 13 Nov 2020 00:01:30 +02:00
Committer: Borislav Petkov 
CommitterDate: Wed, 18 Nov 2020 18:02:50 +01:00

x86/vdso: Implement a vDSO for Intel SGX enclave call

Enclaves encounter exceptions for lots of reasons: everything from enclave
page faults to NULL pointer dereferences, to system calls that must be
“proxied” to the kernel from outside the enclave.

In addition to the code contained inside an enclave, there is also
supporting code outside the enclave called an “SGX runtime”, which is
virtually always implemented inside a shared library.  The runtime helps
build the enclave and handles things like *re*building the enclave if it
got destroyed by something like a suspend/resume cycle.

The rebuilding has traditionally been handled in SIGSEGV handlers,
registered by the library.  But, being process-wide, shared state, signal
handling and shared libraries do not mix well.

Introduce a vDSO function call that wraps the enclave entry functions
(EENTER/ERESUME functions of the ENCLU instruciton) and returns information
about any exceptions to the caller in the SGX runtime.

Instead of generating a signal, the kernel places exception information in
RDI, RSI and RDX. The kernel-provided userspace portion of the vDSO handler
will place this information in a user-provided buffer or trigger a
user-provided callback at the time of the exception.

The vDSO function calling convention uses the standard RDI RSI, RDX, RCX,
R8 and R9 registers.  This makes it possible to declare the vDSO as a C
prototype, but other than that there is no specific support for SystemV
ABI. Things like storing XSAVE are the responsibility of the enclave and
the runtime.

 [ bp: Change vsgx.o build dependency to CONFIG_X86_SGX. ]

Suggested-by: Andy Lutomirski 
Signed-off-by: Sean Christopherson 
Co-developed-by: Cedric Xing 
Signed-off-by: Cedric Xing 
Co-developed-by: Jarkko Sakkinen 
Signed-off-by: Jarkko Sakkinen 
Signed-off-by: Borislav Petkov 
Tested-by: Jethro Beekman 
Link: https://lkml.kernel.org/r/20201112220135.165028-20-jar...@kernel.org
---
 arch/x86/entry/vdso/Makefile|   2 +-
 arch/x86/entry/vdso/vdso.lds.S  |   1 +-
 arch/x86/entry/vdso/vsgx.S  | 151 +++-
 arch/x86/include/asm/enclu.h|   9 ++-
 arch/x86/include/uapi/asm/sgx.h |  91 +++-
 5 files changed, 254 insertions(+)
 create mode 100644 arch/x86/entry/vdso/vsgx.S
 create mode 100644 arch/x86/include/asm/enclu.h

diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 2ad757f..02e3e42 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -27,6 +27,7 @@ VDSO32-$(CONFIG_IA32_EMULATION)   := y
 vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
 vobjs32-y := vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o
 vobjs32-y += vdso32/vclock_gettime.o
+vobjs-$(CONFIG_X86_SGX)+= vsgx.o
 
 # files to link into kernel
 obj-y  += vma.o extable.o
@@ -98,6 +99,7 @@ $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) 
$(RETPOLINE_CFLAGS
 CFLAGS_REMOVE_vclock_gettime.o = -pg
 CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg
 CFLAGS_REMOVE_vgetcpu.o = -pg
+CFLAGS_REMOVE_vsgx.o = -pg
 
 #
 # X32 processes use x32 vDSO to access 64bit kernel data.
diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S
index 36b644e..4bf4846 100644
--- a/arch/x86/entry/vdso/vdso.lds.S
+++ b/arch/x86/entry/vdso/vdso.lds.S
@@ -27,6 +27,7 @@ VERSION {
__vdso_time;
clock_getres;
__vdso_clock_getres;
+   __vdso_sgx_enter_enclave;
local: *;
};
 }
diff --git a/arch/x86/entry/vdso/vsgx.S b/arch/x86/entry/vdso/vsgx.S
new file mode 100644
index 000..86a0e94
--- /dev/null
+++ b/arch/x86/entry/vdso/vsgx.S
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include 
+#include 
+#include 
+#include 
+
+#include "extable.h"
+
+/* Relative to %rbp. */
+#define SGX_ENCLAVE_OFFSET_OF_RUN  16
+
+/* The offsets relative to struct sgx_enclave_run. */
+#define SGX_ENCLAVE_RUN_TCS0
+#define SGX_ENCLAVE_RUN_LEAF   8
+#define SGX_ENCLAVE_RUN_EXCEPTION_VECTOR   12
+#define SGX_ENCLAVE_RUN_EXCEPTION_ERROR_CODE   14
+#define SGX_ENCLAVE_RUN_EXCEPTION_ADDR 16
+#define SGX_ENCLAVE_RUN_USER_HANDLER   24
+#define SGX_ENCLAVE_RUN_USER_DATA  32  /* not used */
+#define SGX_ENCLAVE_RUN_RESERVED_START 40
+#define SGX_ENCLAVE_RUN_RESERVED_END   256
+
+.code64
+.section .text, "ax"
+
+SYM_FUNC_START(__vdso_sgx_enter_enclave)
+   /* Prolog */
+   .cfi_startproc
+   push%rbp
+   .cfi_adjust_cfa_offset  8
+ 

[tip: x86/sgx] x86/vdso: Add support for exception fixup in vDSO functions

2020-11-18 Thread tip-bot2 for Sean Christopherson
The following commit has been merged into the x86/sgx branch of tip:

Commit-ID: 8382c668ce4f367d902f4a340a1bfa9e46096ec1
Gitweb:
https://git.kernel.org/tip/8382c668ce4f367d902f4a340a1bfa9e46096ec1
Author:Sean Christopherson 
AuthorDate:Fri, 13 Nov 2020 00:01:27 +02:00
Committer: Borislav Petkov 
CommitterDate: Wed, 18 Nov 2020 18:02:50 +01:00

x86/vdso: Add support for exception fixup in vDSO functions

Signals are a horrid little mechanism.  They are especially nasty in
multi-threaded environments because signal state like handlers is global
across the entire process.  But, signals are basically the only way that
userspace can “gracefully” handle and recover from exceptions.

The kernel generally does not like exceptions to occur during execution.
But, exceptions are a fact of life and must be handled in some
circumstances.  The kernel handles them by keeping a list of individual
instructions which may cause exceptions.  Instead of truly handling the
exception and returning to the instruction that caused it, the kernel
instead restarts execution at a *different* instruction.  This makes it
obvious to that thread of execution that the exception occurred and lets
*that* code handle the exception instead of the handler.

This is not dissimilar to the try/catch exceptions mechanisms that some
programming languages have, but applied *very* surgically to single
instructions.  It effectively changes the visible architecture of the
instruction.

Problem
===

SGX generates a lot of signals, and the code to enter and exit enclaves and
muck with signal handling is truly horrid.  At the same time, an approach
like kernel exception fixup can not be easily applied to userspace
instructions because it changes the visible instruction architecture.

Solution


The vDSO is a special page of kernel-provided instructions that run in
userspace.  Any userspace calling into the vDSO knows that it is special.
This allows the kernel a place to legitimately rewrite the user/kernel
contract and change instruction behavior.

Add support for fixing up exceptions that occur while executing in the
vDSO.  This replaces what could traditionally only be done with signal
handling.

This new mechanism will be used to replace previously direct use of SGX
instructions by userspace.

Just introduce the vDSO infrastructure.  Later patches will actually
replace signal generation with vDSO exception fixup.

Suggested-by: Andy Lutomirski 
Signed-off-by: Sean Christopherson 
Signed-off-by: Jarkko Sakkinen 
Signed-off-by: Borislav Petkov 
Acked-by: Jethro Beekman 
Link: https://lkml.kernel.org/r/20201112220135.165028-17-jar...@kernel.org
---
 arch/x86/entry/vdso/Makefile  |  6 +--
 arch/x86/entry/vdso/extable.c | 46 -
 arch/x86/entry/vdso/extable.h | 28 +++-
 arch/x86/entry/vdso/vdso-layout.lds.S |  9 -
 arch/x86/entry/vdso/vdso2c.h  | 50 +-
 arch/x86/include/asm/vdso.h   |  5 +++-
 6 files changed, 139 insertions(+), 5 deletions(-)
 create mode 100644 arch/x86/entry/vdso/extable.c
 create mode 100644 arch/x86/entry/vdso/extable.h

diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 2124374..2ad757f 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -29,7 +29,7 @@ vobjs32-y := vdso32/note.o vdso32/system_call.o 
vdso32/sigreturn.o
 vobjs32-y += vdso32/vclock_gettime.o
 
 # files to link into kernel
-obj-y  += vma.o
+obj-y  += vma.o extable.o
 KASAN_SANITIZE_vma.o   := y
 UBSAN_SANITIZE_vma.o   := y
 KCSAN_SANITIZE_vma.o   := y
@@ -128,8 +128,8 @@ $(obj)/%-x32.o: $(obj)/%.o FORCE
 
 targets += vdsox32.lds $(vobjx32s-y)
 
-$(obj)/%.so: OBJCOPYFLAGS := -S
-$(obj)/%.so: $(obj)/%.so.dbg FORCE
+$(obj)/%.so: OBJCOPYFLAGS := -S --remove-section __ex_table
+$(obj)/%.so: $(obj)/%.so.dbg
$(call if_changed,objcopy)
 
 $(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE
diff --git a/arch/x86/entry/vdso/extable.c b/arch/x86/entry/vdso/extable.c
new file mode 100644
index 000..afcf5b6
--- /dev/null
+++ b/arch/x86/entry/vdso/extable.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+#include 
+#include 
+#include 
+#include 
+#include 
+
+struct vdso_exception_table_entry {
+   int insn, fixup;
+};
+
+bool fixup_vdso_exception(struct pt_regs *regs, int trapnr,
+ unsigned long error_code, unsigned long fault_addr)
+{
+   const struct vdso_image *image = current->mm->context.vdso_image;
+   const struct vdso_exception_table_entry *extable;
+   unsigned int nr_entries, i;
+   unsigned long base;
+
+   /*
+* Do not attempt to fixup #DB or #BP.  It's impossible to identify
+* whether or not a #DB/#BP originated from within an SGX enclave and
+* SGX enclaves are currently the only use case for vDSO fixup.

[tip: x86/sgx] mm: Add 'mprotect' hook to struct vm_operations_struct

2020-11-18 Thread tip-bot2 for Sean Christopherson
The following commit has been merged into the x86/sgx branch of tip:

Commit-ID: 95bb7c42ac8a94ce3d0eb059ad64430390351ccb
Gitweb:
https://git.kernel.org/tip/95bb7c42ac8a94ce3d0eb059ad64430390351ccb
Author:Sean Christopherson 
AuthorDate:Fri, 13 Nov 2020 00:01:21 +02:00
Committer: Borislav Petkov 
CommitterDate: Tue, 17 Nov 2020 14:36:14 +01:00

mm: Add 'mprotect' hook to struct vm_operations_struct

Background
==

1. SGX enclave pages are populated with data by copying from normal memory
   via ioctl() (SGX_IOC_ENCLAVE_ADD_PAGES), which will be added later in
   this series.
2. It is desirable to be able to restrict those normal memory data sources.
   For instance, to ensure that the source data is executable before
   copying data to an executable enclave page.
3. Enclave page permissions are dynamic (just like normal permissions) and
   can be adjusted at runtime with mprotect().

This creates a problem because the original data source may have long since
vanished at the time when enclave page permissions are established (mmap()
or mprotect()).

The solution (elsewhere in this series) is to force enclave creators to
declare their paging permission *intent* up front to the ioctl().  This
intent can be immediately compared to the source data’s mapping and
rejected if necessary.

The “intent” is also stashed off for later comparison with enclave
PTEs. This ensures that any future mmap()/mprotect() operations
performed by the enclave creator or done on behalf of the enclave
can be compared with the earlier declared permissions.

Problem
===

There is an existing mmap() hook which allows SGX to perform this
permission comparison at mmap() time.  However, there is no corresponding
->mprotect() hook.

Solution


Add a vm_ops->mprotect() hook so that mprotect() operations which are
inconsistent with any page's stashed intent can be rejected by the driver.

Signed-off-by: Sean Christopherson 
Co-developed-by: Jarkko Sakkinen 
Signed-off-by: Jarkko Sakkinen 
Signed-off-by: Borislav Petkov 
Acked-by: Jethro Beekman 
Acked-by: Dave Hansen 
Acked-by: Mel Gorman 
Acked-by: Hillf Danton 
Cc: linux...@kvack.org
Link: https://lkml.kernel.org/r/20201112220135.165028-11-jar...@kernel.org
---
 include/linux/mm.h | 7 +++
 mm/mprotect.c  | 7 +++
 2 files changed, 14 insertions(+)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index db6ae4d..1813fa8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -559,6 +559,13 @@ struct vm_operations_struct {
void (*close)(struct vm_area_struct * area);
int (*split)(struct vm_area_struct * area, unsigned long addr);
int (*mremap)(struct vm_area_struct * area);
+   /*
+* Called by mprotect() to make driver-specific permission
+* checks before mprotect() is finalised.   The VMA must not
+* be modified.  Returns 0 if eprotect() can proceed.
+*/
+   int (*mprotect)(struct vm_area_struct *vma, unsigned long start,
+   unsigned long end, unsigned long newflags);
vm_fault_t (*fault)(struct vm_fault *vmf);
vm_fault_t (*huge_fault)(struct vm_fault *vmf,
enum page_entry_size pe_size);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 56c02be..ab70902 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -616,9 +616,16 @@ static int do_mprotect_pkey(unsigned long start, size_t 
len,
tmp = vma->vm_end;
if (tmp > end)
tmp = end;
+
+   if (vma->vm_ops && vma->vm_ops->mprotect)
+   error = vma->vm_ops->mprotect(vma, nstart, tmp, 
newflags);
+   if (error)
+   goto out;
+
error = mprotect_fixup(vma, , nstart, tmp, newflags);
if (error)
goto out;
+
nstart = tmp;
 
if (nstart < prev->vm_end)


[tip: x86/sgx] x86/cpu/intel: Detect SGX support

2020-11-18 Thread tip-bot2 for Sean Christopherson
The following commit has been merged into the x86/sgx branch of tip:

Commit-ID: 224ab3527f89f69ae57dc53555826667ac46a3cc
Gitweb:
https://git.kernel.org/tip/224ab3527f89f69ae57dc53555826667ac46a3cc
Author:Sean Christopherson 
AuthorDate:Fri, 13 Nov 2020 00:01:18 +02:00
Committer: Borislav Petkov 
CommitterDate: Tue, 17 Nov 2020 14:36:13 +01:00

x86/cpu/intel: Detect SGX support

Kernel support for SGX is ultimately decided by the state of the launch
control bits in the feature control MSR (MSR_IA32_FEAT_CTL).  If the
hardware supports SGX, but neglects to support flexible launch control, the
kernel will not enable SGX.

Enable SGX at feature control MSR initialization and update the associated
X86_FEATURE flags accordingly.  Disable X86_FEATURE_SGX (and all
derivatives) if the kernel is not able to establish itself as the authority
over SGX Launch Control.

All checks are performed for each logical CPU (not just boot CPU) in order
to verify that MSR_IA32_FEATURE_CONTROL is correctly configured on all
CPUs. All SGX code in this series expects the same configuration from all
CPUs.

This differs from VMX where X86_FEATURE_VMX is intentionally cleared only
for the current CPU so that KVM can provide additional information if KVM
fails to load like which CPU doesn't support VMX.  There’s not much the
kernel or an administrator can do to fix the situation, so SGX neglects to
convey additional details about these kinds of failures if they occur.

Signed-off-by: Sean Christopherson 
Co-developed-by: Jarkko Sakkinen 
Signed-off-by: Jarkko Sakkinen 
Signed-off-by: Borislav Petkov 
Acked-by: Jethro Beekman 
Link: https://lkml.kernel.org/r/20201112220135.165028-8-jar...@kernel.org
---
 arch/x86/kernel/cpu/feat_ctl.c | 29 -
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/feat_ctl.c b/arch/x86/kernel/cpu/feat_ctl.c
index 29a3bed..d38e973 100644
--- a/arch/x86/kernel/cpu/feat_ctl.c
+++ b/arch/x86/kernel/cpu/feat_ctl.c
@@ -93,16 +93,32 @@ static void init_vmx_capabilities(struct cpuinfo_x86 *c)
 }
 #endif /* CONFIG_X86_VMX_FEATURE_NAMES */
 
+static void clear_sgx_caps(void)
+{
+   setup_clear_cpu_cap(X86_FEATURE_SGX);
+   setup_clear_cpu_cap(X86_FEATURE_SGX_LC);
+}
+
 void init_ia32_feat_ctl(struct cpuinfo_x86 *c)
 {
bool tboot = tboot_enabled();
+   bool enable_sgx;
u64 msr;
 
if (rdmsrl_safe(MSR_IA32_FEAT_CTL, )) {
clear_cpu_cap(c, X86_FEATURE_VMX);
+   clear_sgx_caps();
return;
}
 
+   /*
+* Enable SGX if and only if the kernel supports SGX and Launch Control
+* is supported, i.e. disable SGX if the LE hash MSRs can't be written.
+*/
+   enable_sgx = cpu_has(c, X86_FEATURE_SGX) &&
+cpu_has(c, X86_FEATURE_SGX_LC) &&
+IS_ENABLED(CONFIG_X86_SGX);
+
if (msr & FEAT_CTL_LOCKED)
goto update_caps;
 
@@ -124,13 +140,16 @@ void init_ia32_feat_ctl(struct cpuinfo_x86 *c)
msr |= FEAT_CTL_VMX_ENABLED_INSIDE_SMX;
}
 
+   if (enable_sgx)
+   msr |= FEAT_CTL_SGX_ENABLED | FEAT_CTL_SGX_LC_ENABLED;
+
wrmsrl(MSR_IA32_FEAT_CTL, msr);
 
 update_caps:
set_cpu_cap(c, X86_FEATURE_MSR_IA32_FEAT_CTL);
 
if (!cpu_has(c, X86_FEATURE_VMX))
-   return;
+   goto update_sgx;
 
if ( (tboot && !(msr & FEAT_CTL_VMX_ENABLED_INSIDE_SMX)) ||
(!tboot && !(msr & FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX))) {
@@ -143,4 +162,12 @@ update_caps:
init_vmx_capabilities(c);
 #endif
}
+
+update_sgx:
+   if (!(msr & FEAT_CTL_SGX_ENABLED) ||
+   !(msr & FEAT_CTL_SGX_LC_ENABLED) || !enable_sgx) {
+   if (enable_sgx)
+   pr_err_once("SGX disabled by BIOS\n");
+   clear_sgx_caps();
+   }
 }


[tip: x86/sgx] x86/mm: Signal SIGSEGV with PF_SGX

2020-11-18 Thread tip-bot2 for Sean Christopherson
The following commit has been merged into the x86/sgx branch of tip:

Commit-ID: 74faeee06db81a06add0def6a394210c8fef0ab7
Gitweb:
https://git.kernel.org/tip/74faeee06db81a06add0def6a394210c8fef0ab7
Author:Sean Christopherson 
AuthorDate:Fri, 13 Nov 2020 00:01:17 +02:00
Committer: Borislav Petkov 
CommitterDate: Tue, 17 Nov 2020 14:36:13 +01:00

x86/mm: Signal SIGSEGV with PF_SGX

The x86 architecture has a set of page fault error codes.  These indicate
things like whether the fault occurred from a write, or whether it
originated in userspace.

The SGX hardware architecture has its own per-page memory management
metadata (EPCM) [*] and hardware which is separate from the normal x86 MMU.
The architecture has a new page fault error code: PF_SGX.  This new error
code bit is set whenever a page fault occurs as the result of the SGX MMU.

These faults occur for a variety of reasons.  For instance, an access
attempt to enclave memory from outside the enclave causes a PF_SGX fault.
PF_SGX would also be set for permission conflicts, such as if a write to an
enclave page occurs and the page is marked read-write in the x86 page
tables but is read-only in the EPCM.

These faults do not always indicate errors, though.  SGX pages are
encrypted with a key that is destroyed at hardware reset, including
suspend. Throwing a SIGSEGV allows user space software to react and recover
when these events occur.

Include PF_SGX in the PF error codes list and throw SIGSEGV when it is
encountered.

[*] Intel SDM: 36.5.1 Enclave Page Cache Map (EPCM)

 [ bp: Add bit 15 to the comment above enum x86_pf_error_code too. ]

Signed-off-by: Sean Christopherson 
Signed-off-by: Jarkko Sakkinen 
Signed-off-by: Borislav Petkov 
Acked-by: Jethro Beekman 
Link: https://lkml.kernel.org/r/20201112220135.165028-7-jar...@kernel.org
---
 arch/x86/include/asm/trap_pf.h |  2 ++
 arch/x86/mm/fault.c| 12 
 2 files changed, 14 insertions(+)

diff --git a/arch/x86/include/asm/trap_pf.h b/arch/x86/include/asm/trap_pf.h
index 305bc12..10b1de5 100644
--- a/arch/x86/include/asm/trap_pf.h
+++ b/arch/x86/include/asm/trap_pf.h
@@ -11,6 +11,7 @@
  *   bit 3 ==  1: use of reserved bit detected
  *   bit 4 ==  1: fault was an instruction fetch
  *   bit 5 ==  1: protection keys block access
+ *   bit 15 == 1: SGX MMU page-fault
  */
 enum x86_pf_error_code {
X86_PF_PROT =   1 << 0,
@@ -19,6 +20,7 @@ enum x86_pf_error_code {
X86_PF_RSVD =   1 << 3,
X86_PF_INSTR=   1 << 4,
X86_PF_PK   =   1 << 5,
+   X86_PF_SGX  =   1 << 15,
 };
 
 #endif /* _ASM_X86_TRAP_PF_H */
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 82bf37a..9339fee 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1102,6 +1102,18 @@ access_error(unsigned long error_code, struct 
vm_area_struct *vma)
return 1;
 
/*
+* SGX hardware blocked the access.  This usually happens
+* when the enclave memory contents have been destroyed, like
+* after a suspend/resume cycle. In any case, the kernel can't
+* fix the cause of the fault.  Handle the fault as an access
+* error even in cases where no actual access violation
+* occurred.  This allows userspace to rebuild the enclave in
+* response to the signal.
+*/
+   if (unlikely(error_code & X86_PF_SGX))
+   return 1;
+
+   /*
 * Make sure to check the VMA so that we do not perform
 * faults just to hit a X86_PF_PK as soon as we fill in a
 * page.


[tip: x86/sgx] x86/{cpufeatures,msr}: Add Intel SGX Launch Control hardware bits

2020-11-18 Thread tip-bot2 for Sean Christopherson
The following commit has been merged into the x86/sgx branch of tip:

Commit-ID: d205e0f1426e0f99e2b4f387c49f2d8b66e129dd
Gitweb:
https://git.kernel.org/tip/d205e0f1426e0f99e2b4f387c49f2d8b66e129dd
Author:Sean Christopherson 
AuthorDate:Fri, 13 Nov 2020 00:01:15 +02:00
Committer: Borislav Petkov 
CommitterDate: Tue, 17 Nov 2020 14:36:13 +01:00

x86/{cpufeatures,msr}: Add Intel SGX Launch Control hardware bits

The SGX Launch Control hardware helps restrict which enclaves the
hardware will run.  Launch control is intended to restrict what software
can run with enclave protections, which helps protect the overall system
from bad enclaves.

For the kernel's purposes, there are effectively two modes in which the
launch control hardware can operate: rigid and flexible. In its rigid
mode, an entity other than the kernel has ultimate authority over which
enclaves can be run (firmware, Intel, etc...). In its flexible mode, the
kernel has ultimate authority over which enclaves can run.

Enable X86_FEATURE_SGX_LC to enumerate when the CPU supports SGX Launch
Control in general.

Add MSR_IA32_SGXLEPUBKEYHASH{0, 1, 2, 3}, which when combined contain a
SHA256 hash of a 3072-bit RSA public key. The hardware allows SGX enclaves
signed with this public key to initialize and run [*]. Enclaves not signed
with this key can not initialize and run.

Add FEAT_CTL_SGX_LC_ENABLED, which informs whether the SGXLEPUBKEYHASH MSRs
can be written by the kernel.

If the MSRs do not exist or are read-only, the launch control hardware is
operating in rigid mode. Linux does not and will not support creating
enclaves when hardware is configured in rigid mode because it takes away
the authority for launch decisions from the kernel. Note, this does not
preclude KVM from virtualizing/exposing SGX to a KVM guest when launch
control hardware is operating in rigid mode.

[*] Intel SDM: 38.1.4 Intel SGX Launch Control Configuration

Signed-off-by: Sean Christopherson 
Co-developed-by: Jarkko Sakkinen 
Signed-off-by: Jarkko Sakkinen 
Signed-off-by: Borislav Petkov 
Acked-by: Jethro Beekman 
Link: https://lkml.kernel.org/r/20201112220135.165028-5-jar...@kernel.org
---
 arch/x86/include/asm/cpufeatures.h | 1 +
 arch/x86/include/asm/msr-index.h   | 7 +++
 2 files changed, 8 insertions(+)

diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index 1181f5c..f5ef2d5 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -357,6 +357,7 @@
 #define X86_FEATURE_MOVDIRI(16*32+27) /* MOVDIRI instruction */
 #define X86_FEATURE_MOVDIR64B  (16*32+28) /* MOVDIR64B instruction */
 #define X86_FEATURE_ENQCMD (16*32+29) /* ENQCMD and ENQCMDS 
instructions */
+#define X86_FEATURE_SGX_LC (16*32+30) /* Software Guard Extensions 
Launch Control */
 
 /* AMD-defined CPU features, CPUID level 0x8007 (EBX), word 17 */
 #define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery 
support */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 258d555..d0c6cff 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -609,6 +609,7 @@
 #define FEAT_CTL_LOCKEDBIT(0)
 #define FEAT_CTL_VMX_ENABLED_INSIDE_SMXBIT(1)
 #define FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX   BIT(2)
+#define FEAT_CTL_SGX_LC_ENABLEDBIT(17)
 #define FEAT_CTL_SGX_ENABLED   BIT(18)
 #define FEAT_CTL_LMCE_ENABLED  BIT(20)
 
@@ -629,6 +630,12 @@
 #define MSR_IA32_UCODE_WRITE   0x0079
 #define MSR_IA32_UCODE_REV 0x008b
 
+/* Intel SGX Launch Enclave Public Key Hash MSRs */
+#define MSR_IA32_SGXLEPUBKEYHASH0  0x008C
+#define MSR_IA32_SGXLEPUBKEYHASH1  0x008D
+#define MSR_IA32_SGXLEPUBKEYHASH2  0x008E
+#define MSR_IA32_SGXLEPUBKEYHASH3  0x008F
+
 #define MSR_IA32_SMM_MONITOR_CTL   0x009b
 #define MSR_IA32_SMBASE0x009e
 


[tip: x86/sgx] x86/cpufeatures: Add Intel SGX hardware bits

2020-11-18 Thread tip-bot2 for Sean Christopherson
The following commit has been merged into the x86/sgx branch of tip:

Commit-ID: e7b6385b01d8e9fb7a97887c3ea649abb95bb8c8
Gitweb:
https://git.kernel.org/tip/e7b6385b01d8e9fb7a97887c3ea649abb95bb8c8
Author:Sean Christopherson 
AuthorDate:Fri, 13 Nov 2020 00:01:14 +02:00
Committer: Borislav Petkov 
CommitterDate: Tue, 17 Nov 2020 14:36:13 +01:00

x86/cpufeatures: Add Intel SGX hardware bits

Populate X86_FEATURE_SGX feature from CPUID and tie it to the Kconfig
option with disabled-features.h.

IA32_FEATURE_CONTROL.SGX_ENABLE must be examined in addition to the CPUID
bits to enable full SGX support.  The BIOS must both set this bit and lock
IA32_FEATURE_CONTROL for SGX to be supported (Intel SDM section 36.7.1).
The setting or clearing of this bit has no impact on the CPUID bits above,
which is why it needs to be detected separately.

Signed-off-by: Sean Christopherson 
Co-developed-by: Jarkko Sakkinen 
Signed-off-by: Jarkko Sakkinen 
Signed-off-by: Borislav Petkov 
Acked-by: Jethro Beekman 
Link: https://lkml.kernel.org/r/20201112220135.165028-4-jar...@kernel.org
---
 arch/x86/include/asm/cpufeatures.h   | 1 +
 arch/x86/include/asm/disabled-features.h | 8 +++-
 arch/x86/include/asm/msr-index.h | 1 +
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index dad350d..1181f5c 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -241,6 +241,7 @@
 /* Intel-defined CPU features, CPUID level 0x0007:0 (EBX), word 9 */
 #define X86_FEATURE_FSGSBASE   ( 9*32+ 0) /* RDFSBASE, WRFSBASE, 
RDGSBASE, WRGSBASE instructions*/
 #define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3B */
+#define X86_FEATURE_SGX( 9*32+ 2) /* Software Guard 
Extensions */
 #define X86_FEATURE_BMI1   ( 9*32+ 3) /* 1st group bit 
manipulation extensions */
 #define X86_FEATURE_HLE( 9*32+ 4) /* Hardware Lock 
Elision */
 #define X86_FEATURE_AVX2   ( 9*32+ 5) /* AVX2 instructions */
diff --git a/arch/x86/include/asm/disabled-features.h 
b/arch/x86/include/asm/disabled-features.h
index 5861d34..7947cb1 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -62,6 +62,12 @@
 # define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
 #endif
 
+#ifdef CONFIG_X86_SGX
+# define DISABLE_SGX   0
+#else
+# define DISABLE_SGX   (1 << (X86_FEATURE_SGX & 31))
+#endif
+
 /*
  * Make sure to add features to the correct mask
  */
@@ -74,7 +80,7 @@
 #define DISABLED_MASK6 0
 #define DISABLED_MASK7 (DISABLE_PTI)
 #define DISABLED_MASK8 0
-#define DISABLED_MASK9 (DISABLE_SMAP)
+#define DISABLED_MASK9 (DISABLE_SMAP|DISABLE_SGX)
 #define DISABLED_MASK100
 #define DISABLED_MASK110
 #define DISABLED_MASK120
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 972a34d..258d555 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -609,6 +609,7 @@
 #define FEAT_CTL_LOCKEDBIT(0)
 #define FEAT_CTL_VMX_ENABLED_INSIDE_SMXBIT(1)
 #define FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX   BIT(2)
+#define FEAT_CTL_SGX_ENABLED   BIT(18)
 #define FEAT_CTL_LMCE_ENABLED  BIT(20)
 
 #define MSR_IA32_TSC_ADJUST 0x003b


[tip: x86/sgx] x86/sgx: Initialize metadata for Enclave Page Cache (EPC) sections

2020-11-18 Thread tip-bot2 for Sean Christopherson
The following commit has been merged into the x86/sgx branch of tip:

Commit-ID: e7e0545299d8cb0fd6fe3ba50401b7f5c3937362
Gitweb:
https://git.kernel.org/tip/e7e0545299d8cb0fd6fe3ba50401b7f5c3937362
Author:Sean Christopherson 
AuthorDate:Fri, 13 Nov 2020 00:01:16 +02:00
Committer: Borislav Petkov 
CommitterDate: Tue, 17 Nov 2020 14:36:13 +01:00

x86/sgx: Initialize metadata for Enclave Page Cache (EPC) sections

Although carved out of normal DRAM, enclave memory is marked in the
system memory map as reserved and is not managed by the core mm.  There
may be several regions spread across the system.  Each contiguous region
is called an Enclave Page Cache (EPC) section.  EPC sections are
enumerated via CPUID

Enclave pages can only be accessed when they are mapped as part of an
enclave, by a hardware thread running inside the enclave.

Parse CPUID data, create metadata for EPC pages and populate a simple
EPC page allocator.  Although much smaller, ‘struct sgx_epc_page’
metadata is the SGX analog of the core mm ‘struct page’.

Similar to how the core mm’s page->flags encode zone and NUMA
information, embed the EPC section index to the first eight bits of
sgx_epc_page->desc.  This allows a quick reverse lookup from EPC page to
EPC section.  Existing client hardware supports only a single section,
while upcoming server hardware will support at most eight sections.
Thus, eight bits should be enough for long term needs.

Signed-off-by: Sean Christopherson 
Co-developed-by: Serge Ayoun 
Signed-off-by: Serge Ayoun 
Co-developed-by: Jarkko Sakkinen 
Signed-off-by: Jarkko Sakkinen 
Signed-off-by: Borislav Petkov 
Acked-by: Jethro Beekman 
Link: https://lkml.kernel.org/r/20201112220135.165028-6-jar...@kernel.org
---
 arch/x86/Kconfig |  17 +++-
 arch/x86/kernel/cpu/Makefile |   1 +-
 arch/x86/kernel/cpu/sgx/Makefile |   2 +-
 arch/x86/kernel/cpu/sgx/main.c   | 190 ++-
 arch/x86/kernel/cpu/sgx/sgx.h|  60 +-
 5 files changed, 270 insertions(+)
 create mode 100644 arch/x86/kernel/cpu/sgx/Makefile
 create mode 100644 arch/x86/kernel/cpu/sgx/main.c
 create mode 100644 arch/x86/kernel/cpu/sgx/sgx.h

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f6946b8..618d1aa 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1930,6 +1930,23 @@ config X86_INTEL_TSX_MODE_AUTO
  side channel attacks- equals the tsx=auto command line parameter.
 endchoice
 
+config X86_SGX
+   bool "Software Guard eXtensions (SGX)"
+   depends on X86_64 && CPU_SUP_INTEL
+   depends on CRYPTO=y
+   depends on CRYPTO_SHA256=y
+   select SRCU
+   select MMU_NOTIFIER
+   help
+ Intel(R) Software Guard eXtensions (SGX) is a set of CPU instructions
+ that can be used by applications to set aside private regions of code
+ and data, referred to as enclaves. An enclave's private memory can
+ only be accessed by code running within the enclave. Accesses from
+ outside the enclave, including other enclaves, are disallowed by
+ hardware.
+
+ If unsure, say N.
+
 config EFI
bool "EFI runtime service support"
depends on ACPI
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 93792b4..637b499 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_X86_MCE) += mce/
 obj-$(CONFIG_MTRR) += mtrr/
 obj-$(CONFIG_MICROCODE)+= microcode/
 obj-$(CONFIG_X86_CPU_RESCTRL)  += resctrl/
+obj-$(CONFIG_X86_SGX)  += sgx/
 
 obj-$(CONFIG_X86_LOCAL_APIC)   += perfctr-watchdog.o
 
diff --git a/arch/x86/kernel/cpu/sgx/Makefile b/arch/x86/kernel/cpu/sgx/Makefile
new file mode 100644
index 000..79510ce
--- /dev/null
+++ b/arch/x86/kernel/cpu/sgx/Makefile
@@ -0,0 +1,2 @@
+obj-y += \
+   main.o
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
new file mode 100644
index 000..187a237
--- /dev/null
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -0,0 +1,190 @@
+// SPDX-License-Identifier: GPL-2.0
+/*  Copyright(c) 2016-20 Intel Corporation. */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "encls.h"
+
+struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS];
+static int sgx_nr_epc_sections;
+static struct task_struct *ksgxd_tsk;
+
+/*
+ * Reset dirty EPC pages to uninitialized state. Laundry can be left with SECS
+ * pages whose child pages blocked EREMOVE.
+ */
+static void sgx_sanitize_section(struct sgx_epc_section *section)
+{
+   struct sgx_epc_page *page;
+   LIST_HEAD(dirty);
+   int ret;
+
+   while (!list_empty(>laundry_list)) {
+   if (kthread_should_stop())
+   return;
+
+   spin_lock(>lock);
+
+   page = list_first_entry(>laundry_list,
+  

[tip: x86/sgx] x86/fault: Add a helper function to sanitize error code

2020-11-18 Thread tip-bot2 for Sean Christopherson
The following commit has been merged into the x86/sgx branch of tip:

Commit-ID: cd072dab453a9b4a9f7927f9eddca5a156fbd87d
Gitweb:
https://git.kernel.org/tip/cd072dab453a9b4a9f7927f9eddca5a156fbd87d
Author:Sean Christopherson 
AuthorDate:Fri, 13 Nov 2020 00:01:28 +02:00
Committer: Borislav Petkov 
CommitterDate: Wed, 18 Nov 2020 18:02:50 +01:00

x86/fault: Add a helper function to sanitize error code

vDSO exception fixup is a replacement for signals in limited situations.
Signals and vDSO exception fixup need to provide similar information to
userspace, including the hardware error code.

That hardware error code needs to be sanitized.  For instance, if userspace
accesses a kernel address, the error code could indicate to userspace
whether the address had a Present=1 PTE.  That can leak information about
the kernel layout to userspace, which is bad.

The existing signal code does this sanitization, but fairly late in the
signal process.  The vDSO exception code runs before the sanitization
happens.

Move error code sanitization out of the signal code and into a helper.
Call the helper in the signal code.

Signed-off-by: Sean Christopherson 
Signed-off-by: Jarkko Sakkinen 
Signed-off-by: Borislav Petkov 
Acked-by: Jethro Beekman 
Link: https://lkml.kernel.org/r/20201112220135.165028-18-jar...@kernel.org
---
 arch/x86/mm/fault.c | 26 ++
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 9339fee..0161d4a 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -602,11 +602,9 @@ pgtable_bad(struct pt_regs *regs, unsigned long error_code,
oops_end(flags, regs, sig);
 }
 
-static void set_signal_archinfo(unsigned long address,
-   unsigned long error_code)
+static void sanitize_error_code(unsigned long address,
+   unsigned long *error_code)
 {
-   struct task_struct *tsk = current;
-
/*
 * To avoid leaking information about the kernel page
 * table layout, pretend that user-mode accesses to
@@ -617,7 +615,13 @@ static void set_signal_archinfo(unsigned long address,
 * information and does not appear to cause any problems.
 */
if (address >= TASK_SIZE_MAX)
-   error_code |= X86_PF_PROT;
+   *error_code |= X86_PF_PROT;
+}
+
+static void set_signal_archinfo(unsigned long address,
+   unsigned long error_code)
+{
+   struct task_struct *tsk = current;
 
tsk->thread.trap_nr = X86_TRAP_PF;
tsk->thread.error_code = error_code | X86_PF_USER;
@@ -658,6 +662,8 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 * faulting through the emulate_vsyscall() logic.
 */
if (current->thread.sig_on_uaccess_err && signal) {
+   sanitize_error_code(address, _code);
+
set_signal_archinfo(address, error_code);
 
/* XXX: hwpoison faults will set the wrong code. */
@@ -806,13 +812,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long 
error_code,
if (is_errata100(regs, address))
return;
 
-   /*
-* To avoid leaking information about the kernel page table
-* layout, pretend that user-mode accesses to kernel addresses
-* are always protection faults.
-*/
-   if (address >= TASK_SIZE_MAX)
-   error_code |= X86_PF_PROT;
+   sanitize_error_code(address, _code);
 
if (likely(show_unhandled_signals))
show_signal_msg(regs, error_code, address, tsk);
@@ -931,6 +931,8 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, 
unsigned long address,
if (is_prefetch(regs, error_code, address))
return;
 
+   sanitize_error_code(address, _code);
+
set_signal_archinfo(address, error_code);
 
 #ifdef CONFIG_MEMORY_FAILURE


[tip: x86/sgx] x86/traps: Attempt to fixup exceptions in vDSO before signaling

2020-11-18 Thread tip-bot2 for Sean Christopherson
The following commit has been merged into the x86/sgx branch of tip:

Commit-ID: 334872a0919890a70cccd00b8e11931020a819be
Gitweb:
https://git.kernel.org/tip/334872a0919890a70cccd00b8e11931020a819be
Author:Sean Christopherson 
AuthorDate:Fri, 13 Nov 2020 00:01:29 +02:00
Committer: Borislav Petkov 
CommitterDate: Wed, 18 Nov 2020 18:02:50 +01:00

x86/traps: Attempt to fixup exceptions in vDSO before signaling

vDSO functions can now leverage an exception fixup mechanism similar to
kernel exception fixup.  For vDSO exception fixup, the initial user is
Intel's Software Guard Extensions (SGX), which will wrap the low-level
transitions to/from the enclave, i.e. EENTER and ERESUME instructions,
in a vDSO function and leverage fixup to intercept exceptions that would
otherwise generate a signal.  This allows the vDSO wrapper to return the
fault information directly to its caller, obviating the need for SGX
applications and libraries to juggle signal handlers.

Attempt to fixup vDSO exceptions immediately prior to populating and
sending signal information.  Except for the delivery mechanism, an
exception in a vDSO function should be treated like any other exception
in userspace, e.g. any fault that is successfully handled by the kernel
should not be directly visible to userspace.

Although it's debatable whether or not all exceptions are of interest to
enclaves, defer to the vDSO fixup to decide whether to do fixup or
generate a signal.  Future users of vDSO fixup, if there ever are any,
will undoubtedly have different requirements than SGX enclaves, e.g. the
fixup vs. signal logic can be made function specific if/when necessary.

Suggested-by: Andy Lutomirski 
Signed-off-by: Sean Christopherson 
Signed-off-by: Jarkko Sakkinen 
Signed-off-by: Borislav Petkov 
Acked-by: Jethro Beekman 
Link: https://lkml.kernel.org/r/20201112220135.165028-19-jar...@kernel.org
---
 arch/x86/kernel/traps.c | 10 ++
 arch/x86/mm/fault.c |  7 +++
 2 files changed, 17 insertions(+)

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index e19df6c..7798d86 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -60,6 +60,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #ifdef CONFIG_X86_64
 #include 
@@ -117,6 +118,9 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, 
const char *str,
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = trapnr;
die(str, regs, error_code);
+   } else {
+   if (fixup_vdso_exception(regs, trapnr, error_code, 0))
+   return 0;
}
 
/*
@@ -550,6 +554,9 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = X86_TRAP_GP;
 
+   if (fixup_vdso_exception(regs, X86_TRAP_GP, error_code, 0))
+   return;
+
show_signal(tsk, SIGSEGV, "", desc, regs, error_code);
force_sig(SIGSEGV);
goto exit;
@@ -1048,6 +1055,9 @@ static void math_error(struct pt_regs *regs, int trapnr)
if (!si_code)
goto exit;
 
+   if (fixup_vdso_exception(regs, trapnr, 0, 0))
+   return;
+
force_sig_fault(SIGFPE, si_code,
(void __user *)uprobe_get_trap_addr(regs));
 exit:
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 0161d4a..f1f1b5a 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -30,6 +30,7 @@
 #include /* exception stack  
*/
 #include  /* VMALLOC_START, ...   */
 #include   /* kvm_handle_async_pf  */
+#include   /* fixup_vdso_exception()   */
 
 #define CREATE_TRACE_POINTS
 #include 
@@ -814,6 +815,9 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long 
error_code,
 
sanitize_error_code(address, _code);
 
+   if (fixup_vdso_exception(regs, X86_TRAP_PF, error_code, 
address))
+   return;
+
if (likely(show_unhandled_signals))
show_signal_msg(regs, error_code, address, tsk);
 
@@ -933,6 +937,9 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, 
unsigned long address,
 
sanitize_error_code(address, _code);
 
+   if (fixup_vdso_exception(regs, X86_TRAP_PF, error_code, address))
+   return;
+
set_signal_archinfo(address, error_code);
 
 #ifdef CONFIG_MEMORY_FAILURE


[tip: x86/urgent] x86/entry/64: Do not use RDPID in paranoid entry to accomodate KVM

2020-08-21 Thread tip-bot2 for Sean Christopherson
The following commit has been merged into the x86/urgent branch of tip:

Commit-ID: 6a3ea3e68b8a8a26c4aaac03432ed92269c9a14e
Gitweb:
https://git.kernel.org/tip/6a3ea3e68b8a8a26c4aaac03432ed92269c9a14e
Author:Sean Christopherson 
AuthorDate:Fri, 21 Aug 2020 06:52:29 -04:00
Committer: Thomas Gleixner 
CommitterDate: Fri, 21 Aug 2020 16:15:27 +02:00

x86/entry/64: Do not use RDPID in paranoid entry to accomodate KVM

KVM has an optmization to avoid expensive MRS read/writes on
VMENTER/EXIT. It caches the MSR values and restores them either when
leaving the run loop, on preemption or when going out to user space.

The affected MSRs are not required for kernel context operations. This
changed with the recently introduced mechanism to handle FSGSBASE in the
paranoid entry code which has to retrieve the kernel GSBASE value by
accessing per CPU memory. The mechanism needs to retrieve the CPU number
and uses either LSL or RDPID if the processor supports it.

Unfortunately RDPID uses MSR_TSC_AUX which is in the list of cached and
lazily restored MSRs, which means between the point where the guest value
is written and the point of restore, MSR_TSC_AUX contains a random number.

If an NMI or any other exception which uses the paranoid entry path happens
in such a context, then RDPID returns the random guest MSR_TSC_AUX value.

As a consequence this reads from the wrong memory location to retrieve the
kernel GSBASE value. Kernel GS is used to for all regular this_cpu_*()
operations. If the GSBASE in the exception handler points to the per CPU
memory of a different CPU then this has the obvious consequences of data
corruption and crashes.

As the paranoid entry path is the only place which accesses MSR_TSX_AUX
(via RDPID) and the fallback via LSL is not significantly slower, remove
the RDPID alternative from the entry path and always use LSL.

The alternative would be to write MSR_TSC_AUX on every VMENTER and VMEXIT
which would be inflicting massive overhead on that code path.

[ tglx: Rewrote changelog ]

Fixes: eaad981291ee3 ("x86/entry/64: Introduce the FIND_PERCPU_BASE macro")
Reported-by: Tom Lendacky 
Debugged-by: Tom Lendacky 
Suggested-by: Andy Lutomirski 
Suggested-by: Peter Zijlstra 
Signed-off-by: Sean Christopherson 
Signed-off-by: Paolo Bonzini 
Signed-off-by: Thomas Gleixner 
Link: https://lore.kernel.org/r/20200821105229.18938-1-pbonz...@redhat.com
---
 arch/x86/entry/calling.h | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 98e4d88..ae9b0d4 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -374,12 +374,14 @@ For 32-bit we have the following conventions - kernel is 
built with
  * Fetch the per-CPU GSBASE value for this processor and put it in @reg.
  * We normally use %gs for accessing per-CPU data, but we are setting up
  * %gs here and obviously can not use %gs itself to access per-CPU data.
+ *
+ * Do not use RDPID, because KVM loads guest's TSC_AUX on vm-entry and
+ * may not restore the host's value until the CPU returns to userspace.
+ * Thus the kernel would consume a guest's TSC_AUX if an NMI arrives
+ * while running KVM's run loop.
  */
 .macro GET_PERCPU_BASE reg:req
-   ALTERNATIVE \
-   "LOAD_CPU_AND_NODE_SEG_LIMIT \reg", \
-   "RDPID  \reg", \
-   X86_FEATURE_RDPID
+   LOAD_CPU_AND_NODE_SEG_LIMIT \reg
andq$VDSO_CPUNODE_MASK, \reg
movq__per_cpu_offset(, \reg, 8), \reg
 .endm


[tip: x86/urgent] x86/split_lock: Don't write MSR_TEST_CTRL on CPUs that aren't whitelisted

2020-06-30 Thread tip-bot2 for Sean Christopherson
The following commit has been merged into the x86/urgent branch of tip:

Commit-ID: 009bce1df0bb5eb970b9eb98d963861f7fe353c7
Gitweb:
https://git.kernel.org/tip/009bce1df0bb5eb970b9eb98d963861f7fe353c7
Author:Sean Christopherson 
AuthorDate:Fri, 05 Jun 2020 12:26:05 -07:00
Committer: Thomas Gleixner 
CommitterDate: Tue, 30 Jun 2020 14:09:31 +02:00

x86/split_lock: Don't write MSR_TEST_CTRL on CPUs that aren't whitelisted

Choo! Choo!  All aboard the Split Lock Express, with direct service to
Wreckage!

Skip split_lock_verify_msr() if the CPU isn't whitelisted as a possible
SLD-enabled CPU model to avoid writing MSR_TEST_CTRL.  MSR_TEST_CTRL
exists, and is writable, on many generations of CPUs.  Writing the MSR,
even with '0', can result in bizarre, undocumented behavior.

This fixes a crash on Haswell when resuming from suspend with a live KVM
guest.  Because APs use the standard SMP boot flow for resume, they will
go through split_lock_init() and the subsequent RDMSR/WRMSR sequence,
which runs even when sld_state==sld_off to ensure SLD is disabled.  On
Haswell (at least, my Haswell), writing MSR_TEST_CTRL with '0' will
succeed and _may_ take the SMT _sibling_ out of VMX root mode.

When KVM has an active guest, KVM performs VMXON as part of CPU onlining
(see kvm_starting_cpu()).  Because SMP boot is serialized, the resulting
flow is effectively:

  on_each_ap_cpu() {
 WRMSR(MSR_TEST_CTRL, 0)
 VMXON
  }

As a result, the WRMSR can disable VMX on a different CPU that has
already done VMXON.  This ultimately results in a #UD on VMPTRLD when
KVM regains control and attempt run its vCPUs.

The above voodoo was confirmed by reworking KVM's VMXON flow to write
MSR_TEST_CTRL prior to VMXON, and to serialize the sequence as above.
Further verification of the insanity was done by redoing VMXON on all
APs after the initial WRMSR->VMXON sequence.  The additional VMXON,
which should VM-Fail, occasionally succeeded, and also eliminated the
unexpected #UD on VMPTRLD.

The damage done by writing MSR_TEST_CTRL doesn't appear to be limited
to VMX, e.g. after suspend with an active KVM guest, subsequent reboots
almost always hang (even when fudging VMXON), a #UD on a random Jcc was
observed, suspend/resume stability is qualitatively poor, and so on and
so forth.

  kernel BUG at arch/x86/kvm/x86.c:386!
  CPU: 1 PID: 2592 Comm: CPU 6/KVM Tainted: G  D
  Hardware name: ASUS Q87M-E/Q87M-E, BIOS 1102 03/03/2014
  RIP: 0010:kvm_spurious_fault+0xf/0x20
  Call Trace:
   vmx_vcpu_load_vmcs+0x1fb/0x2b0
   vmx_vcpu_load+0x3e/0x160
   kvm_arch_vcpu_load+0x48/0x260
   finish_task_switch+0x140/0x260
   __schedule+0x460/0x720
   _cond_resched+0x2d/0x40
   kvm_arch_vcpu_ioctl_run+0x82e/0x1ca0
   kvm_vcpu_ioctl+0x363/0x5c0
   ksys_ioctl+0x88/0xa0
   __x64_sys_ioctl+0x16/0x20
   do_syscall_64+0x4c/0x170
   entry_SYSCALL_64_after_hwframe+0x44/0xa9

Fixes: dbaba47085b0c ("x86/split_lock: Rework the initialization flow of split 
lock detection")
Signed-off-by: Sean Christopherson 
Signed-off-by: Thomas Gleixner 
Cc: sta...@vger.kernel.org
Link: 
https://lkml.kernel.org/r/20200605192605.7439-1-sean.j.christopher...@intel.com

---
 arch/x86/kernel/cpu/intel.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index c25a67a..0ab48f1 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -50,6 +50,13 @@ static enum split_lock_detect_state sld_state 
__ro_after_init = sld_off;
 static u64 msr_test_ctrl_cache __ro_after_init;
 
 /*
+ * With a name like MSR_TEST_CTL it should go without saying, but don't touch
+ * MSR_TEST_CTL unless the CPU is one of the whitelisted models.  Writing it
+ * on CPUs that do not support SLD can cause fireworks, even when writing '0'.
+ */
+static bool cpu_model_supports_sld __ro_after_init;
+
+/*
  * Processors which have self-snooping capability can handle conflicting
  * memory type across CPUs by snooping its own cache. However, there exists
  * CPU models in which having conflicting memory types still leads to
@@ -1071,7 +1078,8 @@ static void sld_update_msr(bool on)
 
 static void split_lock_init(void)
 {
-   split_lock_verify_msr(sld_state != sld_off);
+   if (cpu_model_supports_sld)
+   split_lock_verify_msr(sld_state != sld_off);
 }
 
 static void split_lock_warn(unsigned long ip)
@@ -1177,5 +1185,6 @@ void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c)
return;
}
 
+   cpu_model_supports_sld = true;
split_lock_setup();
 }


[tip: x86/urgent] x86/cpu: Reinitialize IA32_FEAT_CTL MSR on BSP during wakeup

2020-06-15 Thread tip-bot2 for Sean Christopherson
The following commit has been merged into the x86/urgent branch of tip:

Commit-ID: 5d5103595e9e53048bb7e70ee2673c897ab38300
Gitweb:
https://git.kernel.org/tip/5d5103595e9e53048bb7e70ee2673c897ab38300
Author:Sean Christopherson 
AuthorDate:Mon, 08 Jun 2020 10:41:34 -07:00
Committer: Borislav Petkov 
CommitterDate: Mon, 15 Jun 2020 14:18:37 +02:00

x86/cpu: Reinitialize IA32_FEAT_CTL MSR on BSP during wakeup

Reinitialize IA32_FEAT_CTL on the BSP during wakeup to handle the case
where firmware doesn't initialize or save/restore across S3.  This fixes
a bug where IA32_FEAT_CTL is left uninitialized and results in VMXON
taking a #GP due to VMX not being fully enabled, i.e. breaks KVM.

Use init_ia32_feat_ctl() to "restore" IA32_FEAT_CTL as it already deals
with the case where the MSR is locked, and because APs already redo
init_ia32_feat_ctl() during suspend by virtue of the SMP boot flow being
used to reinitialize APs upon wakeup.  Do the call in the early wakeup
flow to avoid dependencies in the syscore_ops chain, e.g. simply adding
a resume hook is not guaranteed to work, as KVM does VMXON in its own
resume hook, kvm_resume(), when KVM has active guests.

Fixes: 21bd3467a58e ("KVM: VMX: Drop initialization of IA32_FEAT_CTL MSR")
Reported-by: Brad Campbell 
Signed-off-by: Sean Christopherson 
Signed-off-by: Borislav Petkov 
Reviewed-by: Liam Merwick 
Reviewed-by: Maxim Levitsky 
Tested-by: Brad Campbell 
Cc: sta...@vger.kernel.org # v5.6
Link: 
https://lkml.kernel.org/r/20200608174134.11157-1-sean.j.christopher...@intel.com
---
 arch/x86/include/asm/cpu.h| 5 +
 arch/x86/kernel/cpu/centaur.c | 1 +
 arch/x86/kernel/cpu/cpu.h | 4 
 arch/x86/kernel/cpu/zhaoxin.c | 1 +
 arch/x86/power/cpu.c  | 6 ++
 5 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index dd17c2d..da78ccb 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -58,4 +58,9 @@ static inline bool handle_guest_split_lock(unsigned long ip)
return false;
 }
 #endif
+#ifdef CONFIG_IA32_FEAT_CTL
+void init_ia32_feat_ctl(struct cpuinfo_x86 *c);
+#else
+static inline void init_ia32_feat_ctl(struct cpuinfo_x86 *c) {}
+#endif
 #endif /* _ASM_X86_CPU_H */
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index 4267925..c5cf336 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -3,6 +3,7 @@
 #include 
 #include 
 
+#include 
 #include 
 #include 
 #include 
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index fb538fc..9d03369 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -81,8 +81,4 @@ extern void update_srbds_msr(void);
 
 extern u64 x86_read_arch_cap_msr(void);
 
-#ifdef CONFIG_IA32_FEAT_CTL
-void init_ia32_feat_ctl(struct cpuinfo_x86 *c);
-#endif
-
 #endif /* ARCH_X86_CPU_H */
diff --git a/arch/x86/kernel/cpu/zhaoxin.c b/arch/x86/kernel/cpu/zhaoxin.c
index df1358b..05fa4ef 100644
--- a/arch/x86/kernel/cpu/zhaoxin.c
+++ b/arch/x86/kernel/cpu/zhaoxin.c
@@ -2,6 +2,7 @@
 #include 
 #include 
 
+#include 
 #include 
 
 #include "cpu.h"
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 7c65102..db1378c 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -193,6 +193,8 @@ static void fix_processor_context(void)
  */
 static void notrace __restore_processor_state(struct saved_context *ctxt)
 {
+   struct cpuinfo_x86 *c;
+
if (ctxt->misc_enable_saved)
wrmsrl(MSR_IA32_MISC_ENABLE, ctxt->misc_enable);
/*
@@ -263,6 +265,10 @@ static void notrace __restore_processor_state(struct 
saved_context *ctxt)
mtrr_bp_restore();
perf_restore_debug_store();
msr_restore_context(ctxt);
+
+   c = _data(smp_processor_id());
+   if (cpu_has(c, X86_FEATURE_MSR_IA32_FEAT_CTL))
+   init_ia32_feat_ctl(c);
 }
 
 /* Needed by apm.c */


[tip: x86/urgent] x86/apic/x2apic: Fix a NULL pointer deref when handling a dying cpu

2019-10-15 Thread tip-bot2 for Sean Christopherson
The following commit has been merged into the x86/urgent branch of tip:

Commit-ID: 7a22e03b0c02988e91003c505b34d752a51de344
Gitweb:
https://git.kernel.org/tip/7a22e03b0c02988e91003c505b34d752a51de344
Author:Sean Christopherson 
AuthorDate:Tue, 01 Oct 2019 13:50:19 -07:00
Committer: Thomas Gleixner 
CommitterDate: Tue, 15 Oct 2019 10:57:09 +02:00

x86/apic/x2apic: Fix a NULL pointer deref when handling a dying cpu

Check that the per-cpu cluster mask pointer has been set prior to
clearing a dying cpu's bit.  The per-cpu pointer is not set until the
target cpu reaches smp_callin() during CPUHP_BRINGUP_CPU, whereas the
teardown function, x2apic_dead_cpu(), is associated with the earlier
CPUHP_X2APIC_PREPARE.  If an error occurs before the cpu is awakened,
e.g. if do_boot_cpu() itself fails, x2apic_dead_cpu() will dereference
the NULL pointer and cause a panic.

  smpboot: do_boot_cpu failed(-22) to wakeup CPU#1
  BUG: kernel NULL pointer dereference, address: 0008
  RIP: 0010:x2apic_dead_cpu+0x1a/0x30
  Call Trace:
   cpuhp_invoke_callback+0x9a/0x580
   _cpu_up+0x10d/0x140
   do_cpu_up+0x69/0xb0
   smp_init+0x63/0xa9
   kernel_init_freeable+0xd7/0x229
   ? rest_init+0xa0/0xa0
   kernel_init+0xa/0x100
   ret_from_fork+0x35/0x40

Fixes: 023a611748fd5 ("x86/apic/x2apic: Simplify cluster management")
Signed-off-by: Sean Christopherson 
Signed-off-by: Thomas Gleixner 
Cc: sta...@vger.kernel.org
Link: 
https://lkml.kernel.org/r/20191001205019.5789-1-sean.j.christopher...@intel.com

---
 arch/x86/kernel/apic/x2apic_cluster.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic/x2apic_cluster.c 
b/arch/x86/kernel/apic/x2apic_cluster.c
index 45e92cb..b0889c4 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -156,7 +156,8 @@ static int x2apic_dead_cpu(unsigned int dead_cpu)
 {
struct cluster_mask *cmsk = per_cpu(cluster_masks, dead_cpu);
 
-   cpumask_clear_cpu(dead_cpu, >mask);
+   if (cmsk)
+   cpumask_clear_cpu(dead_cpu, >mask);
free_cpumask_var(per_cpu(ipi_mask, dead_cpu));
return 0;
 }


[tip: x86/urgent] x86/apic/x2apic: Fix a NULL pointer deref when handling a dying cpu

2019-10-15 Thread tip-bot2 for Sean Christopherson
The following commit has been merged into the x86/urgent branch of tip:

Commit-ID: 7a22e03b0c02988e91003c505b34d752a51de344
Gitweb:
https://git.kernel.org/tip/7a22e03b0c02988e91003c505b34d752a51de344
Author:Sean Christopherson 
AuthorDate:Tue, 01 Oct 2019 13:50:19 -07:00
Committer: Thomas Gleixner 
CommitterDate: Tue, 15 Oct 2019 10:57:09 +02:00

x86/apic/x2apic: Fix a NULL pointer deref when handling a dying cpu

Check that the per-cpu cluster mask pointer has been set prior to
clearing a dying cpu's bit.  The per-cpu pointer is not set until the
target cpu reaches smp_callin() during CPUHP_BRINGUP_CPU, whereas the
teardown function, x2apic_dead_cpu(), is associated with the earlier
CPUHP_X2APIC_PREPARE.  If an error occurs before the cpu is awakened,
e.g. if do_boot_cpu() itself fails, x2apic_dead_cpu() will dereference
the NULL pointer and cause a panic.

  smpboot: do_boot_cpu failed(-22) to wakeup CPU#1
  BUG: kernel NULL pointer dereference, address: 0008
  RIP: 0010:x2apic_dead_cpu+0x1a/0x30
  Call Trace:
   cpuhp_invoke_callback+0x9a/0x580
   _cpu_up+0x10d/0x140
   do_cpu_up+0x69/0xb0
   smp_init+0x63/0xa9
   kernel_init_freeable+0xd7/0x229
   ? rest_init+0xa0/0xa0
   kernel_init+0xa/0x100
   ret_from_fork+0x35/0x40

Fixes: 023a611748fd5 ("x86/apic/x2apic: Simplify cluster management")
Signed-off-by: Sean Christopherson 
Signed-off-by: Thomas Gleixner 
Cc: sta...@vger.kernel.org
Link: 
https://lkml.kernel.org/r/20191001205019.5789-1-sean.j.christopher...@intel.com

---
 arch/x86/kernel/apic/x2apic_cluster.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic/x2apic_cluster.c 
b/arch/x86/kernel/apic/x2apic_cluster.c
index 45e92cb..b0889c4 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -156,7 +156,8 @@ static int x2apic_dead_cpu(unsigned int dead_cpu)
 {
struct cluster_mask *cmsk = per_cpu(cluster_masks, dead_cpu);
 
-   cpumask_clear_cpu(dead_cpu, >mask);
+   if (cmsk)
+   cpumask_clear_cpu(dead_cpu, >mask);
free_cpumask_var(per_cpu(ipi_mask, dead_cpu));
return 0;
 }


[tip: x86/urgent] x86/retpoline: Don't clobber RFLAGS during CALL_NOSPEC on i386

2019-08-23 Thread tip-bot2 for Sean Christopherson
The following commit has been merged into the x86/urgent branch of tip:

Commit-ID: b63f20a778c88b6a04458ed6ffc69da953d3a109
Gitweb:
https://git.kernel.org/tip/b63f20a778c88b6a04458ed6ffc69da953d3a109
Author:Sean Christopherson 
AuthorDate:Thu, 22 Aug 2019 14:11:22 -07:00
Committer: Thomas Gleixner 
CommitterDate: Fri, 23 Aug 2019 17:38:13 +02:00

x86/retpoline: Don't clobber RFLAGS during CALL_NOSPEC on i386

Use 'lea' instead of 'add' when adjusting %rsp in CALL_NOSPEC so as to
avoid clobbering flags.

KVM's emulator makes indirect calls into a jump table of sorts, where
the destination of the CALL_NOSPEC is a small blob of code that performs
fast emulation by executing the target instruction with fixed operands.

  adcb_al_dl:
 0x000339f8 <+0>:   adc%dl,%al
 0x000339fa <+2>:   ret

A major motiviation for doing fast emulation is to leverage the CPU to
handle consumption and manipulation of arithmetic flags, i.e. RFLAGS is
both an input and output to the target of CALL_NOSPEC.  Clobbering flags
results in all sorts of incorrect emulation, e.g. Jcc instructions often
take the wrong path.  Sans the nops...

  asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n"
 0x0003595a <+58>:  mov0xc0(%ebx),%eax
 0x00035960 <+64>:  mov0x60(%ebx),%edx
 0x00035963 <+67>:  mov0x90(%ebx),%ecx
 0x00035969 <+73>:  push   %edi
 0x0003596a <+74>:  popf
 0x0003596b <+75>:  call   *%esi
 0x000359a0 <+128>: pushf
 0x000359a1 <+129>: pop%edi
 0x000359a2 <+130>: mov%eax,0xc0(%ebx)
 0x000359b1 <+145>: mov%edx,0x60(%ebx)

  ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
 0x000359a8 <+136>: mov-0x10(%ebp),%eax
 0x000359ab <+139>: and$0x8d5,%edi
 0x000359b4 <+148>: and$0xf72a,%eax
 0x000359b9 <+153>: or %eax,%edi
 0x000359bd <+157>: mov%edi,0x4(%ebx)

For the most part this has gone unnoticed as emulation of guest code
that can trigger fast emulation is effectively limited to MMIO when
running on modern hardware, and MMIO is rarely, if ever, accessed by
instructions that affect or consume flags.

Breakage is almost instantaneous when running with unrestricted guest
disabled, in which case KVM must emulate all instructions when the guest
has invalid state, e.g. when the guest is in Big Real Mode during early
BIOS.

Fixes: 776b043848fd2 ("x86/retpoline: Add initial retpoline support")
Fixes: 1a29b5b7f347a ("KVM: x86: Make indirect calls in emulator speculation 
safe")
Signed-off-by: Sean Christopherson 
Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Cc: sta...@vger.kernel.org
Link: 
https://lkml.kernel.org/r/20190822211122.27579-1-sean.j.christopher...@intel.com

---
 arch/x86/include/asm/nospec-branch.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/nospec-branch.h 
b/arch/x86/include/asm/nospec-branch.h
index 109f974..80bc209 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -192,7 +192,7 @@
"   lfence;\n"  \
"   jmp902b;\n" \
"   .align 16\n"\
-   "903:   addl   $4, %%esp;\n"\
+   "903:   lea4(%%esp), %%esp;\n"  \
"   pushl  %[thunk_target];\n"  \
"   ret;\n" \
"   .align 16\n"\