[tip: x86/sgx] x86/sgx: Move provisioning device creation out of SGX driver
The following commit has been merged into the x86/sgx branch of tip: Commit-ID: b3754e5d3da320af2bebb7a690002685c7f5c15c Gitweb: https://git.kernel.org/tip/b3754e5d3da320af2bebb7a690002685c7f5c15c Author:Sean Christopherson AuthorDate:Fri, 19 Mar 2021 20:23:09 +13:00 Committer: Borislav Petkov CommitterDate: Tue, 06 Apr 2021 19:18:46 +02:00 x86/sgx: Move provisioning device creation out of SGX driver And extract sgx_set_attribute() out of sgx_ioc_enclave_provision() and export it as symbol for KVM to use. The provisioning key is sensitive. The SGX driver only allows to create an enclave which can access the provisioning key when the enclave creator has permission to open /dev/sgx_provision. It should apply to a VM as well, as the provisioning key is platform-specific, thus an unrestricted VM can also potentially compromise the provisioning key. Move the provisioning device creation out of sgx_drv_init() to sgx_init() as a preparation for adding SGX virtualization support, so that even if the SGX driver is not enabled due to flexible launch control not being available, SGX virtualization can still be enabled, and use it to restrict a VM's capability of being able to access the provisioning key. [ bp: Massage commit message. ] Signed-off-by: Sean Christopherson Signed-off-by: Kai Huang Signed-off-by: Borislav Petkov Reviewed-by: Jarkko Sakkinen Acked-by: Dave Hansen Link: https://lkml.kernel.org/r/0f4d044d621561f26d5f4ef73e8dc6cd18cc7e79.1616136308.git.kai.hu...@intel.com --- arch/x86/include/asm/sgx.h | 3 ++- arch/x86/kernel/cpu/sgx/driver.c | 17 +- arch/x86/kernel/cpu/sgx/ioctl.c | 16 + arch/x86/kernel/cpu/sgx/main.c | 57 ++- 4 files changed, 61 insertions(+), 32 deletions(-) diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h index 954042e..a16e2c9 100644 --- a/arch/x86/include/asm/sgx.h +++ b/arch/x86/include/asm/sgx.h @@ -372,4 +372,7 @@ int sgx_virt_einit(void __user *sigstruct, void __user *token, void __user *secs, u64 *lepubkeyhash, int *trapnr); #endif +int sgx_set_attribute(unsigned long *allowed_attributes, + unsigned int attribute_fd); + #endif /* _ASM_X86_SGX_H */ diff --git a/arch/x86/kernel/cpu/sgx/driver.c b/arch/x86/kernel/cpu/sgx/driver.c index 8ce6d83..aa9b8b8 100644 --- a/arch/x86/kernel/cpu/sgx/driver.c +++ b/arch/x86/kernel/cpu/sgx/driver.c @@ -136,10 +136,6 @@ static const struct file_operations sgx_encl_fops = { .get_unmapped_area = sgx_get_unmapped_area, }; -const struct file_operations sgx_provision_fops = { - .owner = THIS_MODULE, -}; - static struct miscdevice sgx_dev_enclave = { .minor = MISC_DYNAMIC_MINOR, .name = "sgx_enclave", @@ -147,13 +143,6 @@ static struct miscdevice sgx_dev_enclave = { .fops = _encl_fops, }; -static struct miscdevice sgx_dev_provision = { - .minor = MISC_DYNAMIC_MINOR, - .name = "sgx_provision", - .nodename = "sgx_provision", - .fops = _provision_fops, -}; - int __init sgx_drv_init(void) { unsigned int eax, ebx, ecx, edx; @@ -187,11 +176,5 @@ int __init sgx_drv_init(void) if (ret) return ret; - ret = misc_register(_dev_provision); - if (ret) { - misc_deregister(_dev_enclave); - return ret; - } - return 0; } diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c index 7be9c06..83df20e 100644 --- a/arch/x86/kernel/cpu/sgx/ioctl.c +++ b/arch/x86/kernel/cpu/sgx/ioctl.c @@ -2,6 +2,7 @@ /* Copyright(c) 2016-20 Intel Corporation. */ #include +#include #include #include #include @@ -666,24 +667,11 @@ out: static long sgx_ioc_enclave_provision(struct sgx_encl *encl, void __user *arg) { struct sgx_enclave_provision params; - struct file *file; if (copy_from_user(, arg, sizeof(params))) return -EFAULT; - file = fget(params.fd); - if (!file) - return -EINVAL; - - if (file->f_op != _provision_fops) { - fput(file); - return -EINVAL; - } - - encl->attributes_mask |= SGX_ATTR_PROVISIONKEY; - - fput(file); - return 0; + return sgx_set_attribute(>attributes_mask, params.fd); } long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index 227f1e2..92cb11d 100644 --- a/arch/x86/kernel/cpu/sgx/main.c +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -1,14 +1,17 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2016-20 Intel Corporation. */ +#include #include #include #include +#include #include #include #include #include #include +#include #include "driver.h" #include "encl.h" #include "encls.h" @@ -743,6 +746,51 @@ void sgx_update_lepubkeyhash(u64
[tip: x86/sgx] x86/cpufeatures: Add SGX1 and SGX2 sub-features
The following commit has been merged into the x86/sgx branch of tip: Commit-ID: b8921dccf3b25798409d35155b5d127085de72c2 Gitweb: https://git.kernel.org/tip/b8921dccf3b25798409d35155b5d127085de72c2 Author:Sean Christopherson AuthorDate:Fri, 19 Mar 2021 20:22:18 +13:00 Committer: Borislav Petkov CommitterDate: Thu, 25 Mar 2021 17:33:11 +01:00 x86/cpufeatures: Add SGX1 and SGX2 sub-features Add SGX1 and SGX2 feature flags, via CPUID.0x12.0x0.EAX, as scattered features, since adding a new leaf for only two bits would be wasteful. As part of virtualizing SGX, KVM will expose the SGX CPUID leafs to its guest, and to do so correctly needs to query hardware and kernel support for SGX1 and SGX2. Suppress both SGX1 and SGX2 from /proc/cpuinfo. SGX1 basically means SGX, and for SGX2 there is no concrete use case of using it in /proc/cpuinfo. Signed-off-by: Sean Christopherson Signed-off-by: Kai Huang Signed-off-by: Borislav Petkov Acked-by: Dave Hansen Acked-by: Jarkko Sakkinen Link: https://lkml.kernel.org/r/d787827dbfca6b3210ac3e432e3ac1202727e786.1616136308.git.kai.hu...@intel.com --- arch/x86/include/asm/cpufeatures.h | 2 ++ arch/x86/kernel/cpu/cpuid-deps.c | 2 ++ arch/x86/kernel/cpu/scattered.c| 2 ++ 3 files changed, 6 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index cc96e26..1f918f5 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -290,6 +290,8 @@ #define X86_FEATURE_FENCE_SWAPGS_KERNEL(11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ #define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */ #define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ +#define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */ +#define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */ /* Intel-defined CPU features, CPUID level 0x0007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index d40f8e0..defda61 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -73,6 +73,8 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_ENQCMD, X86_FEATURE_XSAVES}, { X86_FEATURE_PER_THREAD_MBA, X86_FEATURE_MBA }, { X86_FEATURE_SGX_LC, X86_FEATURE_SGX }, + { X86_FEATURE_SGX1, X86_FEATURE_SGX }, + { X86_FEATURE_SGX2, X86_FEATURE_SGX1 }, {} }; diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 972ec3b..21d1f06 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -36,6 +36,8 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_CDP_L2, CPUID_ECX, 2, 0x0010, 2 }, { X86_FEATURE_MBA, CPUID_EBX, 3, 0x0010, 0 }, { X86_FEATURE_PER_THREAD_MBA, CPUID_ECX, 0, 0x0010, 3 }, + { X86_FEATURE_SGX1, CPUID_EAX, 0, 0x0012, 0 }, + { X86_FEATURE_SGX2, CPUID_EAX, 1, 0x0012, 0 }, { X86_FEATURE_HW_PSTATE,CPUID_EDX, 7, 0x8007, 0 }, { X86_FEATURE_CPB, CPUID_EDX, 9, 0x8007, 0 }, { X86_FEATURE_PROC_FEEDBACK,CPUID_EDX, 11, 0x8007, 0 },
[tip: x86/sgx] x86/sgx: Introduce virtual EPC for use by KVM guests
The following commit has been merged into the x86/sgx branch of tip: Commit-ID: 540745ddbc70eabdc7dbd3fcc00fe4fb17cd59ba Gitweb: https://git.kernel.org/tip/540745ddbc70eabdc7dbd3fcc00fe4fb17cd59ba Author:Sean Christopherson AuthorDate:Fri, 19 Mar 2021 20:22:21 +13:00 Committer: Borislav Petkov CommitterDate: Tue, 06 Apr 2021 09:43:17 +02:00 x86/sgx: Introduce virtual EPC for use by KVM guests Add a misc device /dev/sgx_vepc to allow userspace to allocate "raw" Enclave Page Cache (EPC) without an associated enclave. The intended and only known use case for raw EPC allocation is to expose EPC to a KVM guest, hence the 'vepc' moniker, virt.{c,h} files and X86_SGX_KVM Kconfig. The SGX driver uses the misc device /dev/sgx_enclave to support userspace in creating an enclave. Each file descriptor returned from opening /dev/sgx_enclave represents an enclave. Unlike the SGX driver, KVM doesn't control how the guest uses the EPC, therefore EPC allocated to a KVM guest is not associated with an enclave, and /dev/sgx_enclave is not suitable for allocating EPC for a KVM guest. Having separate device nodes for the SGX driver and KVM virtual EPC also allows separate permission control for running host SGX enclaves and KVM SGX guests. To use /dev/sgx_vepc to allocate a virtual EPC instance with particular size, the hypervisor opens /dev/sgx_vepc, and uses mmap() with the intended size to get an address range of virtual EPC. Then it may use the address range to create one KVM memory slot as virtual EPC for a guest. Implement the "raw" EPC allocation in the x86 core-SGX subsystem via /dev/sgx_vepc rather than in KVM. Doing so has two major advantages: - Does not require changes to KVM's uAPI, e.g. EPC gets handled as just another memory backend for guests. - EPC management is wholly contained in the SGX subsystem, e.g. SGX does not have to export any symbols, changes to reclaim flows don't need to be routed through KVM, SGX's dirty laundry doesn't have to get aired out for the world to see, and so on and so forth. The virtual EPC pages allocated to guests are currently not reclaimable. Reclaiming an EPC page used by enclave requires a special reclaim mechanism separate from normal page reclaim, and that mechanism is not supported for virutal EPC pages. Due to the complications of handling reclaim conflicts between guest and host, reclaiming virtual EPC pages is significantly more complex than basic support for SGX virtualization. [ bp: - Massage commit message and comments - use cpu_feature_enabled() - vertically align struct members init - massage Virtual EPC clarification text - move Kconfig prompt to Virtualization ] Signed-off-by: Sean Christopherson Co-developed-by: Kai Huang Signed-off-by: Kai Huang Signed-off-by: Borislav Petkov Acked-by: Dave Hansen Acked-by: Jarkko Sakkinen Link: https://lkml.kernel.org/r/0c38ced8c8e5a69872db4d6a1c0dabd01e07cad7.1616136308.git.kai.hu...@intel.com --- Documentation/x86/sgx.rst| 16 ++- arch/x86/kernel/cpu/sgx/Makefile | 1 +- arch/x86/kernel/cpu/sgx/sgx.h| 9 +- arch/x86/kernel/cpu/sgx/virt.c | 259 ++- arch/x86/kvm/Kconfig | 12 +- 5 files changed, 297 insertions(+) create mode 100644 arch/x86/kernel/cpu/sgx/virt.c diff --git a/Documentation/x86/sgx.rst b/Documentation/x86/sgx.rst index f90076e..dd0ac96 100644 --- a/Documentation/x86/sgx.rst +++ b/Documentation/x86/sgx.rst @@ -234,3 +234,19 @@ As a result, when this happpens, user should stop running any new SGX workloads, (or just any new workloads), and migrate all valuable workloads. Although a machine reboot can recover all EPC memory, the bug should be reported to Linux developers. + + +Virtual EPC +=== + +The implementation has also a virtual EPC driver to support SGX enclaves +in guests. Unlike the SGX driver, an EPC page allocated by the virtual +EPC driver doesn't have a specific enclave associated with it. This is +because KVM doesn't track how a guest uses EPC pages. + +As a result, the SGX core page reclaimer doesn't support reclaiming EPC +pages allocated to KVM guests through the virtual EPC driver. If the +user wants to deploy SGX applications both on the host and in guests +on the same machine, the user should reserve enough EPC (by taking out +total virtual EPC size of all SGX VMs from the physical EPC size) for +host SGX applications so they can run with acceptable performance. diff --git a/arch/x86/kernel/cpu/sgx/Makefile b/arch/x86/kernel/cpu/sgx/Makefile index 91d3dc7..9c16567 100644 --- a/arch/x86/kernel/cpu/sgx/Makefile +++ b/arch/x86/kernel/cpu/sgx/Makefile @@ -3,3 +3,4 @@ obj-y += \ encl.o \ ioctl.o \ main.o +obj-$(CONFIG_X86_SGX_KVM) += virt.o diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h index 4aa40c6..4854f39 100644 --- a/arch/x86/kernel/cpu/sgx/sgx.h +++ b/arch/x86/kernel/cpu/sgx/sgx.h @@
[tip: x86/sgx] x86/sgx: Add SGX_CHILD_PRESENT hardware error code
The following commit has been merged into the x86/sgx branch of tip: Commit-ID: 231d3dbdda192e3b3c7b79f4c3b0616f6c7f31b7 Gitweb: https://git.kernel.org/tip/231d3dbdda192e3b3c7b79f4c3b0616f6c7f31b7 Author:Sean Christopherson AuthorDate:Fri, 19 Mar 2021 20:22:20 +13:00 Committer: Borislav Petkov CommitterDate: Fri, 26 Mar 2021 22:51:36 +01:00 x86/sgx: Add SGX_CHILD_PRESENT hardware error code SGX driver can accurately track how enclave pages are used. This enables SECS to be specifically targeted and EREMOVE'd only after all child pages have been EREMOVE'd. This ensures that SGX driver will never encounter SGX_CHILD_PRESENT in normal operation. Virtual EPC is different. The host does not track how EPC pages are used by the guest, so it cannot guarantee EREMOVE success. It might, for instance, encounter a SECS with a non-zero child count. Add a definition of SGX_CHILD_PRESENT. It will be used exclusively by the SGX virtualization driver to handle recoverable EREMOVE errors when saniziting EPC pages after they are freed. Signed-off-by: Sean Christopherson Signed-off-by: Kai Huang Signed-off-by: Borislav Petkov Acked-by: Dave Hansen Acked-by: Jarkko Sakkinen Link: https://lkml.kernel.org/r/050b198e882afde7e6eba8e6a0d4da39161dbb5a.1616136308.git.kai.hu...@intel.com --- arch/x86/kernel/cpu/sgx/arch.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/cpu/sgx/arch.h b/arch/x86/kernel/cpu/sgx/arch.h index dd7602c..abf99bb 100644 --- a/arch/x86/kernel/cpu/sgx/arch.h +++ b/arch/x86/kernel/cpu/sgx/arch.h @@ -26,12 +26,14 @@ * enum sgx_return_code - The return code type for ENCLS, ENCLU and ENCLV * %SGX_NOT_TRACKED: Previous ETRACK's shootdown sequence has not * been completed yet. + * %SGX_CHILD_PRESENT SECS has child pages present in the EPC. * %SGX_INVALID_EINITTOKEN:EINITTOKEN is invalid and enclave signer's * public key does not match IA32_SGXLEPUBKEYHASH. * %SGX_UNMASKED_EVENT:An unmasked event, e.g. INTR, was received */ enum sgx_return_code { SGX_NOT_TRACKED = 11, + SGX_CHILD_PRESENT = 13, SGX_INVALID_EINITTOKEN = 16, SGX_UNMASKED_EVENT = 128, };
[tip: x86/sgx] x86/cpu/intel: Allow SGX virtualization without Launch Control support
The following commit has been merged into the x86/sgx branch of tip: Commit-ID: 332bfc7becf479de8a55864cc5ed0024baea28aa Gitweb: https://git.kernel.org/tip/332bfc7becf479de8a55864cc5ed0024baea28aa Author:Sean Christopherson AuthorDate:Fri, 19 Mar 2021 20:22:58 +13:00 Committer: Borislav Petkov CommitterDate: Tue, 06 Apr 2021 09:43:41 +02:00 x86/cpu/intel: Allow SGX virtualization without Launch Control support The kernel will currently disable all SGX support if the hardware does not support launch control. Make it more permissive to allow SGX virtualization on systems without Launch Control support. This will allow KVM to expose SGX to guests that have less-strict requirements on the availability of flexible launch control. Improve error message to distinguish between three cases. There are two cases where SGX support is completely disabled: 1) SGX has been disabled completely by the BIOS 2) SGX LC is locked by the BIOS. Bare-metal support is disabled because of LC unavailability. SGX virtualization is unavailable (because of Kconfig). One where it is partially available: 3) SGX LC is locked by the BIOS. Bare-metal support is disabled because of LC unavailability. SGX virtualization is supported. Signed-off-by: Sean Christopherson Co-developed-by: Kai Huang Signed-off-by: Kai Huang Signed-off-by: Borislav Petkov Acked-by: Jarkko Sakkinen Acked-by: Dave Hansen Link: https://lkml.kernel.org/r/b3329777076509b3b601550da288c8f3c406a865.1616136308.git.kai.hu...@intel.com --- arch/x86/kernel/cpu/feat_ctl.c | 59 - 1 file changed, 44 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/cpu/feat_ctl.c b/arch/x86/kernel/cpu/feat_ctl.c index 27533a6..da696eb 100644 --- a/arch/x86/kernel/cpu/feat_ctl.c +++ b/arch/x86/kernel/cpu/feat_ctl.c @@ -104,8 +104,9 @@ early_param("nosgx", nosgx); void init_ia32_feat_ctl(struct cpuinfo_x86 *c) { + bool enable_sgx_kvm = false, enable_sgx_driver = false; bool tboot = tboot_enabled(); - bool enable_sgx; + bool enable_vmx; u64 msr; if (rdmsrl_safe(MSR_IA32_FEAT_CTL, )) { @@ -114,13 +115,19 @@ void init_ia32_feat_ctl(struct cpuinfo_x86 *c) return; } - /* -* Enable SGX if and only if the kernel supports SGX and Launch Control -* is supported, i.e. disable SGX if the LE hash MSRs can't be written. -*/ - enable_sgx = cpu_has(c, X86_FEATURE_SGX) && -cpu_has(c, X86_FEATURE_SGX_LC) && -IS_ENABLED(CONFIG_X86_SGX); + enable_vmx = cpu_has(c, X86_FEATURE_VMX) && +IS_ENABLED(CONFIG_KVM_INTEL); + + if (cpu_has(c, X86_FEATURE_SGX) && IS_ENABLED(CONFIG_X86_SGX)) { + /* +* Separate out SGX driver enabling from KVM. This allows KVM +* guests to use SGX even if the kernel SGX driver refuses to +* use it. This happens if flexible Launch Control is not +* available. +*/ + enable_sgx_driver = cpu_has(c, X86_FEATURE_SGX_LC); + enable_sgx_kvm = enable_vmx && IS_ENABLED(CONFIG_X86_SGX_KVM); + } if (msr & FEAT_CTL_LOCKED) goto update_caps; @@ -136,15 +143,18 @@ void init_ia32_feat_ctl(struct cpuinfo_x86 *c) * i.e. KVM is enabled, to avoid unnecessarily adding an attack vector * for the kernel, e.g. using VMX to hide malicious code. */ - if (cpu_has(c, X86_FEATURE_VMX) && IS_ENABLED(CONFIG_KVM_INTEL)) { + if (enable_vmx) { msr |= FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX; if (tboot) msr |= FEAT_CTL_VMX_ENABLED_INSIDE_SMX; } - if (enable_sgx) - msr |= FEAT_CTL_SGX_ENABLED | FEAT_CTL_SGX_LC_ENABLED; + if (enable_sgx_kvm || enable_sgx_driver) { + msr |= FEAT_CTL_SGX_ENABLED; + if (enable_sgx_driver) + msr |= FEAT_CTL_SGX_LC_ENABLED; + } wrmsrl(MSR_IA32_FEAT_CTL, msr); @@ -167,10 +177,29 @@ update_caps: } update_sgx: - if (!(msr & FEAT_CTL_SGX_ENABLED) || - !(msr & FEAT_CTL_SGX_LC_ENABLED) || !enable_sgx) { - if (enable_sgx) - pr_err_once("SGX disabled by BIOS\n"); + if (!(msr & FEAT_CTL_SGX_ENABLED)) { + if (enable_sgx_kvm || enable_sgx_driver) + pr_err_once("SGX disabled by BIOS.\n"); clear_cpu_cap(c, X86_FEATURE_SGX); + return; + } + + /* +* VMX feature bit may be cleared due to being disabled in BIOS, +* in which case SGX virtualization cannot be supported either. +*/ + if (!cpu_has(c, X86_FEATURE_VMX) && enable_sgx_kvm) { + pr_err_once("SGX virtualization disabled due to lack of VMX.\n"); +
[tip: x86/sgx] x86/sgx: Expose SGX architectural definitions to the kernel
The following commit has been merged into the x86/sgx branch of tip: Commit-ID: 8ca52cc38dc8fdcbdbd0c23eafb19db5e5f5c8d0 Gitweb: https://git.kernel.org/tip/8ca52cc38dc8fdcbdbd0c23eafb19db5e5f5c8d0 Author:Sean Christopherson AuthorDate:Fri, 19 Mar 2021 20:23:03 +13:00 Committer: Borislav Petkov CommitterDate: Tue, 06 Apr 2021 09:43:41 +02:00 x86/sgx: Expose SGX architectural definitions to the kernel Expose SGX architectural structures, as KVM will use many of the architectural constants and structs to virtualize SGX. Name the new header file as asm/sgx.h, rather than asm/sgx_arch.h, to have single header to provide SGX facilities to share with other kernel componments. Also update MAINTAINERS to include asm/sgx.h. Signed-off-by: Sean Christopherson Co-developed-by: Kai Huang Signed-off-by: Kai Huang Signed-off-by: Borislav Petkov Acked-by: Jarkko Sakkinen Acked-by: Dave Hansen Link: https://lkml.kernel.org/r/6bf47acd91ab4d709e66ad1692c7803e4c9063a0.1616136308.git.kai.hu...@intel.com --- MAINTAINERS | 1 +- arch/x86/include/asm/sgx.h| 350 +- arch/x86/kernel/cpu/sgx/arch.h| 340 + arch/x86/kernel/cpu/sgx/encl.c| 2 +- arch/x86/kernel/cpu/sgx/sgx.h | 2 +- tools/testing/selftests/sgx/defines.h | 2 +- 6 files changed, 354 insertions(+), 343 deletions(-) create mode 100644 arch/x86/include/asm/sgx.h delete mode 100644 arch/x86/kernel/cpu/sgx/arch.h diff --git a/MAINTAINERS b/MAINTAINERS index aa84121..0cb606a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9274,6 +9274,7 @@ Q: https://patchwork.kernel.org/project/intel-sgx/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/sgx F: Documentation/x86/sgx.rst F: arch/x86/entry/vdso/vsgx.S +F: arch/x86/include/asm/sgx.h F: arch/x86/include/uapi/asm/sgx.h F: arch/x86/kernel/cpu/sgx/* F: tools/testing/selftests/sgx/* diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h new file mode 100644 index 000..14bb5f7 --- /dev/null +++ b/arch/x86/include/asm/sgx.h @@ -0,0 +1,350 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/** + * Copyright(c) 2016-20 Intel Corporation. + * + * Intel Software Guard Extensions (SGX) support. + */ +#ifndef _ASM_X86_SGX_H +#define _ASM_X86_SGX_H + +#include +#include + +/* + * This file contains both data structures defined by SGX architecture and Linux + * defined software data structures and functions. The two should not be mixed + * together for better readibility. The architectural definitions come first. + */ + +/* The SGX specific CPUID function. */ +#define SGX_CPUID 0x12 +/* EPC enumeration. */ +#define SGX_CPUID_EPC 2 +/* An invalid EPC section, i.e. the end marker. */ +#define SGX_CPUID_EPC_INVALID 0x0 +/* A valid EPC section. */ +#define SGX_CPUID_EPC_SECTION 0x1 +/* The bitmask for the EPC section type. */ +#define SGX_CPUID_EPC_MASK GENMASK(3, 0) + +/** + * enum sgx_return_code - The return code type for ENCLS, ENCLU and ENCLV + * %SGX_NOT_TRACKED: Previous ETRACK's shootdown sequence has not + * been completed yet. + * %SGX_CHILD_PRESENT SECS has child pages present in the EPC. + * %SGX_INVALID_EINITTOKEN:EINITTOKEN is invalid and enclave signer's + * public key does not match IA32_SGXLEPUBKEYHASH. + * %SGX_UNMASKED_EVENT:An unmasked event, e.g. INTR, was received + */ +enum sgx_return_code { + SGX_NOT_TRACKED = 11, + SGX_CHILD_PRESENT = 13, + SGX_INVALID_EINITTOKEN = 16, + SGX_UNMASKED_EVENT = 128, +}; + +/* The modulus size for 3072-bit RSA keys. */ +#define SGX_MODULUS_SIZE 384 + +/** + * enum sgx_miscselect - additional information to an SSA frame + * %SGX_MISC_EXINFO: Report #PF or #GP to the SSA frame. + * + * Save State Area (SSA) is a stack inside the enclave used to store processor + * state when an exception or interrupt occurs. This enum defines additional + * information stored to an SSA frame. + */ +enum sgx_miscselect { + SGX_MISC_EXINFO = BIT(0), +}; + +#define SGX_MISC_RESERVED_MASK GENMASK_ULL(63, 1) + +#define SGX_SSA_GPRS_SIZE 184 +#define SGX_SSA_MISC_EXINFO_SIZE 16 + +/** + * enum sgx_attributes - the attributes field in sgx_secs + * %SGX_ATTR_INIT: Enclave can be entered (is initialized). + * %SGX_ATTR_DEBUG:Allow ENCLS(EDBGRD) and ENCLS(EDBGWR). + * %SGX_ATTR_MODE64BIT:Tell that this a 64-bit enclave. + * %SGX_ATTR_PROVISIONKEY: Allow to use provisioning keys for remote + * attestation. + * %SGX_ATTR_KSS: Allow to use key separation and sharing (KSS). + * %SGX_ATTR_EINITTOKENKEY:Allow to use token signing key that is used to + *
[tip: x86/sgx] x86/sgx: Add SGX2 ENCLS leaf definitions (EAUG, EMODPR and EMODT)
The following commit has been merged into the x86/sgx branch of tip: Commit-ID: 32ddda8e445df3de477db14d386fb3518042224a Gitweb: https://git.kernel.org/tip/32ddda8e445df3de477db14d386fb3518042224a Author:Sean Christopherson AuthorDate:Fri, 19 Mar 2021 20:23:05 +13:00 Committer: Borislav Petkov CommitterDate: Tue, 06 Apr 2021 09:43:42 +02:00 x86/sgx: Add SGX2 ENCLS leaf definitions (EAUG, EMODPR and EMODT) Define the ENCLS leafs that are available with SGX2, also referred to as Enclave Dynamic Memory Management (EDMM). The leafs will be used by KVM to conditionally expose SGX2 capabilities to guests. Signed-off-by: Sean Christopherson Signed-off-by: Kai Huang Signed-off-by: Borislav Petkov Acked-by: Jarkko Sakkinen Acked-by: Dave Hansen Link: https://lkml.kernel.org/r/5f0970c251ebcc6d5add132f0d750cc753b7060f.1616136308.git.kai.hu...@intel.com --- arch/x86/include/asm/sgx.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h index 34f4423..3b025af 100644 --- a/arch/x86/include/asm/sgx.h +++ b/arch/x86/include/asm/sgx.h @@ -40,6 +40,9 @@ enum sgx_encls_function { EPA = 0x0A, EWB = 0x0B, ETRACK = 0x0C, + EAUG= 0x0D, + EMODPR = 0x0E, + EMODT = 0x0F, }; /**
[tip: x86/sgx] x86/sgx: Add encls_faulted() helper
The following commit has been merged into the x86/sgx branch of tip: Commit-ID: a67136b458e5e63822b19c35794451122fe2bf3e Gitweb: https://git.kernel.org/tip/a67136b458e5e63822b19c35794451122fe2bf3e Author:Sean Christopherson AuthorDate:Fri, 19 Mar 2021 20:23:06 +13:00 Committer: Borislav Petkov CommitterDate: Tue, 06 Apr 2021 09:43:42 +02:00 x86/sgx: Add encls_faulted() helper Add a helper to extract the fault indicator from an encoded ENCLS return value. SGX virtualization will also need to detect ENCLS faults. Signed-off-by: Sean Christopherson Signed-off-by: Kai Huang Signed-off-by: Borislav Petkov Acked-by: Jarkko Sakkinen Acked-by: Dave Hansen Link: https://lkml.kernel.org/r/c1f955898110de2f669da536fc6cf62e003dff88.1616136308.git.kai.hu...@intel.com --- arch/x86/kernel/cpu/sgx/encls.h | 15 ++- arch/x86/kernel/cpu/sgx/ioctl.c | 2 +- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/sgx/encls.h b/arch/x86/kernel/cpu/sgx/encls.h index be5c496..9b20484 100644 --- a/arch/x86/kernel/cpu/sgx/encls.h +++ b/arch/x86/kernel/cpu/sgx/encls.h @@ -40,6 +40,19 @@ } while (0); \ } +/* + * encls_faulted() - Check if an ENCLS leaf faulted given an error code + * @ret: the return value of an ENCLS leaf function call + * + * Return: + * - true: ENCLS leaf faulted. + * - false:Otherwise. + */ +static inline bool encls_faulted(int ret) +{ + return ret & ENCLS_FAULT_FLAG; +} + /** * encls_failed() - Check if an ENCLS function failed * @ret: the return value of an ENCLS function call @@ -50,7 +63,7 @@ */ static inline bool encls_failed(int ret) { - if (ret & ENCLS_FAULT_FLAG) + if (encls_faulted(ret)) return ENCLS_TRAPNR(ret) != X86_TRAP_PF; return !!ret; diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c index 354e309..11e3f96 100644 --- a/arch/x86/kernel/cpu/sgx/ioctl.c +++ b/arch/x86/kernel/cpu/sgx/ioctl.c @@ -568,7 +568,7 @@ static int sgx_encl_init(struct sgx_encl *encl, struct sgx_sigstruct *sigstruct, } } - if (ret & ENCLS_FAULT_FLAG) { + if (encls_faulted(ret)) { if (encls_failed(ret)) ENCLS_WARN(ret, "EINIT");
[tip: x86/sgx] x86/sgx: Move ENCLS leaf definitions to sgx.h
The following commit has been merged into the x86/sgx branch of tip: Commit-ID: 9c55c78a73ce6e62a1d46ba6e4f242c23c29b812 Gitweb: https://git.kernel.org/tip/9c55c78a73ce6e62a1d46ba6e4f242c23c29b812 Author:Sean Christopherson AuthorDate:Fri, 19 Mar 2021 20:23:04 +13:00 Committer: Borislav Petkov CommitterDate: Tue, 06 Apr 2021 09:43:41 +02:00 x86/sgx: Move ENCLS leaf definitions to sgx.h Move the ENCLS leaf definitions to sgx.h so that they can be used by KVM. Signed-off-by: Sean Christopherson Signed-off-by: Kai Huang Signed-off-by: Borislav Petkov Acked-by: Jarkko Sakkinen Acked-by: Dave Hansen Link: https://lkml.kernel.org/r/2e6cd7c5c1ced620cfcd292c3c6c382827fde6b2.1616136308.git.kai.hu...@intel.com --- arch/x86/include/asm/sgx.h | 15 +++ arch/x86/kernel/cpu/sgx/encls.h | 15 --- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h index 14bb5f7..34f4423 100644 --- a/arch/x86/include/asm/sgx.h +++ b/arch/x86/include/asm/sgx.h @@ -27,6 +27,21 @@ /* The bitmask for the EPC section type. */ #define SGX_CPUID_EPC_MASK GENMASK(3, 0) +enum sgx_encls_function { + ECREATE = 0x00, + EADD= 0x01, + EINIT = 0x02, + EREMOVE = 0x03, + EDGBRD = 0x04, + EDGBWR = 0x05, + EEXTEND = 0x06, + ELDU= 0x08, + EBLOCK = 0x09, + EPA = 0x0A, + EWB = 0x0B, + ETRACK = 0x0C, +}; + /** * enum sgx_return_code - The return code type for ENCLS, ENCLU and ENCLV * %SGX_NOT_TRACKED: Previous ETRACK's shootdown sequence has not diff --git a/arch/x86/kernel/cpu/sgx/encls.h b/arch/x86/kernel/cpu/sgx/encls.h index 443188f..be5c496 100644 --- a/arch/x86/kernel/cpu/sgx/encls.h +++ b/arch/x86/kernel/cpu/sgx/encls.h @@ -11,21 +11,6 @@ #include #include "sgx.h" -enum sgx_encls_function { - ECREATE = 0x00, - EADD= 0x01, - EINIT = 0x02, - EREMOVE = 0x03, - EDGBRD = 0x04, - EDGBWR = 0x05, - EEXTEND = 0x06, - ELDU= 0x08, - EBLOCK = 0x09, - EPA = 0x0A, - EWB = 0x0B, - ETRACK = 0x0C, -}; - /** * ENCLS_FAULT_FLAG - flag signifying an ENCLS return code is a trapnr *
[tip: x86/sgx] x86/sgx: Add helpers to expose ECREATE and EINIT to KVM
The following commit has been merged into the x86/sgx branch of tip: Commit-ID: d155030b1e7c0e448aab22a803f7a71ea2e117d7 Gitweb: https://git.kernel.org/tip/d155030b1e7c0e448aab22a803f7a71ea2e117d7 Author:Sean Christopherson AuthorDate:Fri, 19 Mar 2021 20:23:08 +13:00 Committer: Borislav Petkov CommitterDate: Tue, 06 Apr 2021 19:18:27 +02:00 x86/sgx: Add helpers to expose ECREATE and EINIT to KVM The host kernel must intercept ECREATE to impose policies on guests, and intercept EINIT to be able to write guest's virtual SGX_LEPUBKEYHASH MSR values to hardware before running guest's EINIT so it can run correctly according to hardware behavior. Provide wrappers around __ecreate() and __einit() to hide the ugliness of overloading the ENCLS return value to encode multiple error formats in a single int. KVM will trap-and-execute ECREATE and EINIT as part of SGX virtualization, and reflect ENCLS execution result to guest by setting up guest's GPRs, or on an exception, injecting the correct fault based on return value of __ecreate() and __einit(). Use host userspace addresses (provided by KVM based on guest physical address of ENCLS parameters) to execute ENCLS/EINIT when possible. Accesses to both EPC and memory originating from ENCLS are subject to segmentation and paging mechanisms. It's also possible to generate kernel mappings for ENCLS parameters by resolving PFN but using __uaccess_xx() is simpler. [ bp: Return early if the __user memory accesses fail, use cpu_feature_enabled(). ] Signed-off-by: Sean Christopherson Signed-off-by: Kai Huang Signed-off-by: Borislav Petkov Acked-by: Jarkko Sakkinen Link: https://lkml.kernel.org/r/20e09daf559aa5e9e680a0b4b5fba940f1bad86e.1616136308.git.kai.hu...@intel.com --- arch/x86/include/asm/sgx.h | 7 ++- arch/x86/kernel/cpu/sgx/virt.c | 117 - 2 files changed, 124 insertions(+) diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h index 3b025af..954042e 100644 --- a/arch/x86/include/asm/sgx.h +++ b/arch/x86/include/asm/sgx.h @@ -365,4 +365,11 @@ struct sgx_sigstruct { * comment! */ +#ifdef CONFIG_X86_SGX_KVM +int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs, +int *trapnr); +int sgx_virt_einit(void __user *sigstruct, void __user *token, + void __user *secs, u64 *lepubkeyhash, int *trapnr); +#endif + #endif /* _ASM_X86_SGX_H */ diff --git a/arch/x86/kernel/cpu/sgx/virt.c b/arch/x86/kernel/cpu/sgx/virt.c index 259cc46..7d221ea 100644 --- a/arch/x86/kernel/cpu/sgx/virt.c +++ b/arch/x86/kernel/cpu/sgx/virt.c @@ -257,3 +257,120 @@ int __init sgx_vepc_init(void) return misc_register(_vepc_dev); } + +/** + * sgx_virt_ecreate() - Run ECREATE on behalf of guest + * @pageinfo: Pointer to PAGEINFO structure + * @secs: Userspace pointer to SECS page + * @trapnr:trap number injected to guest in case of ECREATE error + * + * Run ECREATE on behalf of guest after KVM traps ECREATE for the purpose + * of enforcing policies of guest's enclaves, and return the trap number + * which should be injected to guest in case of any ECREATE error. + * + * Return: + * - 0: ECREATE was successful. + * - <0: on error. + */ +int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs, +int *trapnr) +{ + int ret; + + /* +* @secs is an untrusted, userspace-provided address. It comes from +* KVM and is assumed to be a valid pointer which points somewhere in +* userspace. This can fault and call SGX or other fault handlers when +* userspace mapping @secs doesn't exist. +* +* Add a WARN() to make sure @secs is already valid userspace pointer +* from caller (KVM), who should already have handled invalid pointer +* case (for instance, made by malicious guest). All other checks, +* such as alignment of @secs, are deferred to ENCLS itself. +*/ + if (WARN_ON_ONCE(!access_ok(secs, PAGE_SIZE))) + return -EINVAL; + + __uaccess_begin(); + ret = __ecreate(pageinfo, (void *)secs); + __uaccess_end(); + + if (encls_faulted(ret)) { + *trapnr = ENCLS_TRAPNR(ret); + return -EFAULT; + } + + /* ECREATE doesn't return an error code, it faults or succeeds. */ + WARN_ON_ONCE(ret); + return 0; +} +EXPORT_SYMBOL_GPL(sgx_virt_ecreate); + +static int __sgx_virt_einit(void __user *sigstruct, void __user *token, + void __user *secs) +{ + int ret; + + /* +* Make sure all userspace pointers from caller (KVM) are valid. +* All other checks deferred to ENCLS itself. Also see comment +* for @secs in sgx_virt_ecreate(). +*/ +#define SGX_EINITTOKEN_SIZE304 + if (WARN_ON_ONCE(!access_ok(sigstruct, sizeof(struct sgx_sigstruct)) || +
[tip: perf/urgent] x86/perf: Use RET0 as default for guest_get_msrs to handle "no PMU" case
The following commit has been merged into the perf/urgent branch of tip: Commit-ID: c8e2fe13d1d1f3a02842b7b909d4e4846a4b6a2c Gitweb: https://git.kernel.org/tip/c8e2fe13d1d1f3a02842b7b909d4e4846a4b6a2c Author:Sean Christopherson AuthorDate:Tue, 09 Mar 2021 09:10:19 -08:00 Committer: Peter Zijlstra CommitterDate: Wed, 10 Mar 2021 16:45:09 +01:00 x86/perf: Use RET0 as default for guest_get_msrs to handle "no PMU" case Initialize x86_pmu.guest_get_msrs to return 0/NULL to handle the "nop" case. Patching in perf_guest_get_msrs_nop() during setup does not work if there is no PMU, as setup bails before updating the static calls, leaving x86_pmu.guest_get_msrs NULL and thus a complete nop. Ultimately, this causes VMX abort on VM-Exit due to KVM putting random garbage from the stack into the MSR load list. Add a comment in KVM to note that nr_msrs is valid if and only if the return value is non-NULL. Fixes: abd562df94d1 ("x86/perf: Use static_call for x86_pmu.guest_get_msrs") Reported-by: Dmitry Vyukov Reported-by: syzbot+cce9ef2dd25246f81...@syzkaller.appspotmail.com Suggested-by: Peter Zijlstra Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210309171019.1125243-1-sea...@google.com --- arch/x86/events/core.c | 15 ++- arch/x86/kvm/vmx/vmx.c | 2 +- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 6ddeed3..18df171 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -81,7 +81,11 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx); DEFINE_STATIC_CALL_NULL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs); DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases); -DEFINE_STATIC_CALL_NULL(x86_pmu_guest_get_msrs, *x86_pmu.guest_get_msrs); +/* + * This one is magic, it will get called even when PMU init fails (because + * there is no PMU), in which case it should simply return NULL. + */ +DEFINE_STATIC_CALL_RET0(x86_pmu_guest_get_msrs, *x86_pmu.guest_get_msrs); u64 __read_mostly hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] @@ -1944,13 +1948,6 @@ static void _x86_pmu_read(struct perf_event *event) x86_perf_event_update(event); } -static inline struct perf_guest_switch_msr * -perf_guest_get_msrs_nop(int *nr) -{ - *nr = 0; - return NULL; -} - static int __init init_hw_perf_events(void) { struct x86_pmu_quirk *quirk; @@ -2025,7 +2022,7 @@ static int __init init_hw_perf_events(void) x86_pmu.read = _x86_pmu_read; if (!x86_pmu.guest_get_msrs) - x86_pmu.guest_get_msrs = perf_guest_get_msrs_nop; + x86_pmu.guest_get_msrs = (void *)&__static_call_return0; x86_pmu_static_call_update(); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 50810d4..32cf828 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6580,8 +6580,8 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) int i, nr_msrs; struct perf_guest_switch_msr *msrs; + /* Note, nr_msrs may be garbage if perf_guest_get_msrs() returns NULL. */ msrs = perf_guest_get_msrs(_msrs); - if (!msrs) return;
[tip: x86/cpu] x86/cpufeatures: Assign dedicated feature word for CPUID_0x8000001F[EAX]
The following commit has been merged into the x86/cpu branch of tip: Commit-ID: fb35d30fe5b06cc2f0405da8fbe0be5330d1 Gitweb: https://git.kernel.org/tip/fb35d30fe5b06cc2f0405da8fbe0be5330d1 Author:Sean Christopherson AuthorDate:Fri, 22 Jan 2021 12:40:46 -08:00 Committer: Borislav Petkov CommitterDate: Thu, 28 Jan 2021 17:41:24 +01:00 x86/cpufeatures: Assign dedicated feature word for CPUID_0x801F[EAX] Collect the scattered SME/SEV related feature flags into a dedicated word. There are now five recognized features in CPUID.0x801F.EAX, with at least one more on the horizon (SEV-SNP). Using a dedicated word allows KVM to use its automagic CPUID adjustment logic when reporting the set of supported features to userspace. No functional change intended. Signed-off-by: Sean Christopherson Signed-off-by: Borislav Petkov Reviewed-by: Brijesh Singh Link: https://lkml.kernel.org/r/20210122204047.2860075-2-sea...@google.com --- arch/x86/include/asm/cpufeature.h | 7 +-- arch/x86/include/asm/cpufeatures.h | 17 +++-- arch/x86/include/asm/disabled-features.h | 3 ++- arch/x86/include/asm/required-features.h | 3 ++- arch/x86/kernel/cpu/common.c | 3 +++- arch/x86/kernel/cpu/scattered.c| 5 +- tools/arch/x86/include/asm/disabled-features.h | 3 ++- tools/arch/x86/include/asm/required-features.h | 3 ++- 8 files changed, 27 insertions(+), 17 deletions(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 59bf91c..1728d4c 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -30,6 +30,7 @@ enum cpuid_leafs CPUID_7_ECX, CPUID_8000_0007_EBX, CPUID_7_EDX, + CPUID_8000_001F_EAX, }; #ifdef CONFIG_X86_FEATURE_NAMES @@ -88,8 +89,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) ||\ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) ||\ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) ||\ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) ||\ REQUIRED_MASK_CHECK||\ - BUILD_BUG_ON_ZERO(NCAPINTS != 19)) + BUILD_BUG_ON_ZERO(NCAPINTS != 20)) #define DISABLED_MASK_BIT_SET(feature_bit) \ ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) ||\ @@ -111,8 +113,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) ||\ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) ||\ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) ||\ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) ||\ DISABLED_MASK_CHECK||\ - BUILD_BUG_ON_ZERO(NCAPINTS != 19)) + BUILD_BUG_ON_ZERO(NCAPINTS != 20)) #define cpu_has(c, bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 84b8878..1feb6c0 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -13,7 +13,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 19 /* N 32-bit words worth of info */ +#define NCAPINTS 20 /* N 32-bit words worth of info */ #define NBUGINTS 1 /* N 32-bit bug flags */ /* @@ -96,7 +96,7 @@ #define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */ #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ #define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ -#define X86_FEATURE_SME_COHERENT ( 3*32+17) /* "" AMD hardware-enforced cache coherency */ +/* FREE!( 3*32+17) */ #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */ #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ @@ -201,7 +201,7 @@ #define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ -#define X86_FEATURE_SME( 7*32+10) /* AMD Secure Memory Encryption */ +/* FREE!( 7*32+10) */ #define X86_FEATURE_PTI( 7*32+11) /* Kernel Page Table Isolation enabled */
[tip: x86/cleanups] x86/asm: Drop unused RDPID macro
The following commit has been merged into the x86/cleanups branch of tip: Commit-ID: 8539d3f06710a9e91b9968fa736549d7c6b44206 Gitweb: https://git.kernel.org/tip/8539d3f06710a9e91b9968fa736549d7c6b44206 Author:Sean Christopherson AuthorDate:Tue, 27 Oct 2020 14:45:32 -07:00 Committer: Borislav Petkov CommitterDate: Thu, 26 Nov 2020 12:58:56 +01:00 x86/asm: Drop unused RDPID macro Drop the GAS-compatible RDPID macro. RDPID is unsafe in the kernel because KVM loads guest's TSC_AUX on VM-entry and may not restore the host's value until the CPU returns to userspace. See 6a3ea3e68b8a ("x86/entry/64: Do not use RDPID in paranoid entry to accomodate KVM") for details. It can always be resurrected from git history, if needed. [ bp: Massage commit message. ] Signed-off-by: Sean Christopherson Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20201027214532.1792-1-sean.j.christopher...@intel.com --- arch/x86/include/asm/inst.h | 15 --- 1 file changed, 15 deletions(-) diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h index bd7f024..438ccd4 100644 --- a/arch/x86/include/asm/inst.h +++ b/arch/x86/include/asm/inst.h @@ -143,21 +143,6 @@ .macro MODRM mod opd1 opd2 .byte \mod | (\opd1 & 7) | ((\opd2 & 7) << 3) .endm - -.macro RDPID opd - REG_TYPE rdpid_opd_type \opd - .if rdpid_opd_type == REG_TYPE_R64 - R64_NUM rdpid_opd \opd - .else - R32_NUM rdpid_opd \opd - .endif - .byte 0xf3 - .if rdpid_opd > 7 - PFX_REX rdpid_opd 0 - .endif - .byte 0x0f, 0xc7 - MODRM 0xc0 rdpid_opd 0x7 -.endm #endif #endif
[tip: x86/sgx] x86/vdso: Implement a vDSO for Intel SGX enclave call
The following commit has been merged into the x86/sgx branch of tip: Commit-ID: 84664369520170f48546c55cbc1f3fbde9b1e140 Gitweb: https://git.kernel.org/tip/84664369520170f48546c55cbc1f3fbde9b1e140 Author:Sean Christopherson AuthorDate:Fri, 13 Nov 2020 00:01:30 +02:00 Committer: Borislav Petkov CommitterDate: Wed, 18 Nov 2020 18:02:50 +01:00 x86/vdso: Implement a vDSO for Intel SGX enclave call Enclaves encounter exceptions for lots of reasons: everything from enclave page faults to NULL pointer dereferences, to system calls that must be “proxied” to the kernel from outside the enclave. In addition to the code contained inside an enclave, there is also supporting code outside the enclave called an “SGX runtime”, which is virtually always implemented inside a shared library. The runtime helps build the enclave and handles things like *re*building the enclave if it got destroyed by something like a suspend/resume cycle. The rebuilding has traditionally been handled in SIGSEGV handlers, registered by the library. But, being process-wide, shared state, signal handling and shared libraries do not mix well. Introduce a vDSO function call that wraps the enclave entry functions (EENTER/ERESUME functions of the ENCLU instruciton) and returns information about any exceptions to the caller in the SGX runtime. Instead of generating a signal, the kernel places exception information in RDI, RSI and RDX. The kernel-provided userspace portion of the vDSO handler will place this information in a user-provided buffer or trigger a user-provided callback at the time of the exception. The vDSO function calling convention uses the standard RDI RSI, RDX, RCX, R8 and R9 registers. This makes it possible to declare the vDSO as a C prototype, but other than that there is no specific support for SystemV ABI. Things like storing XSAVE are the responsibility of the enclave and the runtime. [ bp: Change vsgx.o build dependency to CONFIG_X86_SGX. ] Suggested-by: Andy Lutomirski Signed-off-by: Sean Christopherson Co-developed-by: Cedric Xing Signed-off-by: Cedric Xing Co-developed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Borislav Petkov Tested-by: Jethro Beekman Link: https://lkml.kernel.org/r/20201112220135.165028-20-jar...@kernel.org --- arch/x86/entry/vdso/Makefile| 2 +- arch/x86/entry/vdso/vdso.lds.S | 1 +- arch/x86/entry/vdso/vsgx.S | 151 +++- arch/x86/include/asm/enclu.h| 9 ++- arch/x86/include/uapi/asm/sgx.h | 91 +++- 5 files changed, 254 insertions(+) create mode 100644 arch/x86/entry/vdso/vsgx.S create mode 100644 arch/x86/include/asm/enclu.h diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 2ad757f..02e3e42 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -27,6 +27,7 @@ VDSO32-$(CONFIG_IA32_EMULATION) := y vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vobjs32-y := vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o vobjs32-y += vdso32/vclock_gettime.o +vobjs-$(CONFIG_X86_SGX)+= vsgx.o # files to link into kernel obj-y += vma.o extable.o @@ -98,6 +99,7 @@ $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS CFLAGS_REMOVE_vclock_gettime.o = -pg CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg CFLAGS_REMOVE_vgetcpu.o = -pg +CFLAGS_REMOVE_vsgx.o = -pg # # X32 processes use x32 vDSO to access 64bit kernel data. diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S index 36b644e..4bf4846 100644 --- a/arch/x86/entry/vdso/vdso.lds.S +++ b/arch/x86/entry/vdso/vdso.lds.S @@ -27,6 +27,7 @@ VERSION { __vdso_time; clock_getres; __vdso_clock_getres; + __vdso_sgx_enter_enclave; local: *; }; } diff --git a/arch/x86/entry/vdso/vsgx.S b/arch/x86/entry/vdso/vsgx.S new file mode 100644 index 000..86a0e94 --- /dev/null +++ b/arch/x86/entry/vdso/vsgx.S @@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include +#include +#include + +#include "extable.h" + +/* Relative to %rbp. */ +#define SGX_ENCLAVE_OFFSET_OF_RUN 16 + +/* The offsets relative to struct sgx_enclave_run. */ +#define SGX_ENCLAVE_RUN_TCS0 +#define SGX_ENCLAVE_RUN_LEAF 8 +#define SGX_ENCLAVE_RUN_EXCEPTION_VECTOR 12 +#define SGX_ENCLAVE_RUN_EXCEPTION_ERROR_CODE 14 +#define SGX_ENCLAVE_RUN_EXCEPTION_ADDR 16 +#define SGX_ENCLAVE_RUN_USER_HANDLER 24 +#define SGX_ENCLAVE_RUN_USER_DATA 32 /* not used */ +#define SGX_ENCLAVE_RUN_RESERVED_START 40 +#define SGX_ENCLAVE_RUN_RESERVED_END 256 + +.code64 +.section .text, "ax" + +SYM_FUNC_START(__vdso_sgx_enter_enclave) + /* Prolog */ + .cfi_startproc + push%rbp + .cfi_adjust_cfa_offset 8 +
[tip: x86/sgx] x86/vdso: Add support for exception fixup in vDSO functions
The following commit has been merged into the x86/sgx branch of tip: Commit-ID: 8382c668ce4f367d902f4a340a1bfa9e46096ec1 Gitweb: https://git.kernel.org/tip/8382c668ce4f367d902f4a340a1bfa9e46096ec1 Author:Sean Christopherson AuthorDate:Fri, 13 Nov 2020 00:01:27 +02:00 Committer: Borislav Petkov CommitterDate: Wed, 18 Nov 2020 18:02:50 +01:00 x86/vdso: Add support for exception fixup in vDSO functions Signals are a horrid little mechanism. They are especially nasty in multi-threaded environments because signal state like handlers is global across the entire process. But, signals are basically the only way that userspace can “gracefully” handle and recover from exceptions. The kernel generally does not like exceptions to occur during execution. But, exceptions are a fact of life and must be handled in some circumstances. The kernel handles them by keeping a list of individual instructions which may cause exceptions. Instead of truly handling the exception and returning to the instruction that caused it, the kernel instead restarts execution at a *different* instruction. This makes it obvious to that thread of execution that the exception occurred and lets *that* code handle the exception instead of the handler. This is not dissimilar to the try/catch exceptions mechanisms that some programming languages have, but applied *very* surgically to single instructions. It effectively changes the visible architecture of the instruction. Problem === SGX generates a lot of signals, and the code to enter and exit enclaves and muck with signal handling is truly horrid. At the same time, an approach like kernel exception fixup can not be easily applied to userspace instructions because it changes the visible instruction architecture. Solution The vDSO is a special page of kernel-provided instructions that run in userspace. Any userspace calling into the vDSO knows that it is special. This allows the kernel a place to legitimately rewrite the user/kernel contract and change instruction behavior. Add support for fixing up exceptions that occur while executing in the vDSO. This replaces what could traditionally only be done with signal handling. This new mechanism will be used to replace previously direct use of SGX instructions by userspace. Just introduce the vDSO infrastructure. Later patches will actually replace signal generation with vDSO exception fixup. Suggested-by: Andy Lutomirski Signed-off-by: Sean Christopherson Signed-off-by: Jarkko Sakkinen Signed-off-by: Borislav Petkov Acked-by: Jethro Beekman Link: https://lkml.kernel.org/r/20201112220135.165028-17-jar...@kernel.org --- arch/x86/entry/vdso/Makefile | 6 +-- arch/x86/entry/vdso/extable.c | 46 - arch/x86/entry/vdso/extable.h | 28 +++- arch/x86/entry/vdso/vdso-layout.lds.S | 9 - arch/x86/entry/vdso/vdso2c.h | 50 +- arch/x86/include/asm/vdso.h | 5 +++- 6 files changed, 139 insertions(+), 5 deletions(-) create mode 100644 arch/x86/entry/vdso/extable.c create mode 100644 arch/x86/entry/vdso/extable.h diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 2124374..2ad757f 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -29,7 +29,7 @@ vobjs32-y := vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o vobjs32-y += vdso32/vclock_gettime.o # files to link into kernel -obj-y += vma.o +obj-y += vma.o extable.o KASAN_SANITIZE_vma.o := y UBSAN_SANITIZE_vma.o := y KCSAN_SANITIZE_vma.o := y @@ -128,8 +128,8 @@ $(obj)/%-x32.o: $(obj)/%.o FORCE targets += vdsox32.lds $(vobjx32s-y) -$(obj)/%.so: OBJCOPYFLAGS := -S -$(obj)/%.so: $(obj)/%.so.dbg FORCE +$(obj)/%.so: OBJCOPYFLAGS := -S --remove-section __ex_table +$(obj)/%.so: $(obj)/%.so.dbg $(call if_changed,objcopy) $(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE diff --git a/arch/x86/entry/vdso/extable.c b/arch/x86/entry/vdso/extable.c new file mode 100644 index 000..afcf5b6 --- /dev/null +++ b/arch/x86/entry/vdso/extable.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include + +struct vdso_exception_table_entry { + int insn, fixup; +}; + +bool fixup_vdso_exception(struct pt_regs *regs, int trapnr, + unsigned long error_code, unsigned long fault_addr) +{ + const struct vdso_image *image = current->mm->context.vdso_image; + const struct vdso_exception_table_entry *extable; + unsigned int nr_entries, i; + unsigned long base; + + /* +* Do not attempt to fixup #DB or #BP. It's impossible to identify +* whether or not a #DB/#BP originated from within an SGX enclave and +* SGX enclaves are currently the only use case for vDSO fixup.
[tip: x86/sgx] mm: Add 'mprotect' hook to struct vm_operations_struct
The following commit has been merged into the x86/sgx branch of tip: Commit-ID: 95bb7c42ac8a94ce3d0eb059ad64430390351ccb Gitweb: https://git.kernel.org/tip/95bb7c42ac8a94ce3d0eb059ad64430390351ccb Author:Sean Christopherson AuthorDate:Fri, 13 Nov 2020 00:01:21 +02:00 Committer: Borislav Petkov CommitterDate: Tue, 17 Nov 2020 14:36:14 +01:00 mm: Add 'mprotect' hook to struct vm_operations_struct Background == 1. SGX enclave pages are populated with data by copying from normal memory via ioctl() (SGX_IOC_ENCLAVE_ADD_PAGES), which will be added later in this series. 2. It is desirable to be able to restrict those normal memory data sources. For instance, to ensure that the source data is executable before copying data to an executable enclave page. 3. Enclave page permissions are dynamic (just like normal permissions) and can be adjusted at runtime with mprotect(). This creates a problem because the original data source may have long since vanished at the time when enclave page permissions are established (mmap() or mprotect()). The solution (elsewhere in this series) is to force enclave creators to declare their paging permission *intent* up front to the ioctl(). This intent can be immediately compared to the source data’s mapping and rejected if necessary. The “intent” is also stashed off for later comparison with enclave PTEs. This ensures that any future mmap()/mprotect() operations performed by the enclave creator or done on behalf of the enclave can be compared with the earlier declared permissions. Problem === There is an existing mmap() hook which allows SGX to perform this permission comparison at mmap() time. However, there is no corresponding ->mprotect() hook. Solution Add a vm_ops->mprotect() hook so that mprotect() operations which are inconsistent with any page's stashed intent can be rejected by the driver. Signed-off-by: Sean Christopherson Co-developed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Borislav Petkov Acked-by: Jethro Beekman Acked-by: Dave Hansen Acked-by: Mel Gorman Acked-by: Hillf Danton Cc: linux...@kvack.org Link: https://lkml.kernel.org/r/20201112220135.165028-11-jar...@kernel.org --- include/linux/mm.h | 7 +++ mm/mprotect.c | 7 +++ 2 files changed, 14 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index db6ae4d..1813fa8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -559,6 +559,13 @@ struct vm_operations_struct { void (*close)(struct vm_area_struct * area); int (*split)(struct vm_area_struct * area, unsigned long addr); int (*mremap)(struct vm_area_struct * area); + /* +* Called by mprotect() to make driver-specific permission +* checks before mprotect() is finalised. The VMA must not +* be modified. Returns 0 if eprotect() can proceed. +*/ + int (*mprotect)(struct vm_area_struct *vma, unsigned long start, + unsigned long end, unsigned long newflags); vm_fault_t (*fault)(struct vm_fault *vmf); vm_fault_t (*huge_fault)(struct vm_fault *vmf, enum page_entry_size pe_size); diff --git a/mm/mprotect.c b/mm/mprotect.c index 56c02be..ab70902 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -616,9 +616,16 @@ static int do_mprotect_pkey(unsigned long start, size_t len, tmp = vma->vm_end; if (tmp > end) tmp = end; + + if (vma->vm_ops && vma->vm_ops->mprotect) + error = vma->vm_ops->mprotect(vma, nstart, tmp, newflags); + if (error) + goto out; + error = mprotect_fixup(vma, , nstart, tmp, newflags); if (error) goto out; + nstart = tmp; if (nstart < prev->vm_end)
[tip: x86/sgx] x86/cpu/intel: Detect SGX support
The following commit has been merged into the x86/sgx branch of tip: Commit-ID: 224ab3527f89f69ae57dc53555826667ac46a3cc Gitweb: https://git.kernel.org/tip/224ab3527f89f69ae57dc53555826667ac46a3cc Author:Sean Christopherson AuthorDate:Fri, 13 Nov 2020 00:01:18 +02:00 Committer: Borislav Petkov CommitterDate: Tue, 17 Nov 2020 14:36:13 +01:00 x86/cpu/intel: Detect SGX support Kernel support for SGX is ultimately decided by the state of the launch control bits in the feature control MSR (MSR_IA32_FEAT_CTL). If the hardware supports SGX, but neglects to support flexible launch control, the kernel will not enable SGX. Enable SGX at feature control MSR initialization and update the associated X86_FEATURE flags accordingly. Disable X86_FEATURE_SGX (and all derivatives) if the kernel is not able to establish itself as the authority over SGX Launch Control. All checks are performed for each logical CPU (not just boot CPU) in order to verify that MSR_IA32_FEATURE_CONTROL is correctly configured on all CPUs. All SGX code in this series expects the same configuration from all CPUs. This differs from VMX where X86_FEATURE_VMX is intentionally cleared only for the current CPU so that KVM can provide additional information if KVM fails to load like which CPU doesn't support VMX. There’s not much the kernel or an administrator can do to fix the situation, so SGX neglects to convey additional details about these kinds of failures if they occur. Signed-off-by: Sean Christopherson Co-developed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Borislav Petkov Acked-by: Jethro Beekman Link: https://lkml.kernel.org/r/20201112220135.165028-8-jar...@kernel.org --- arch/x86/kernel/cpu/feat_ctl.c | 29 - 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/feat_ctl.c b/arch/x86/kernel/cpu/feat_ctl.c index 29a3bed..d38e973 100644 --- a/arch/x86/kernel/cpu/feat_ctl.c +++ b/arch/x86/kernel/cpu/feat_ctl.c @@ -93,16 +93,32 @@ static void init_vmx_capabilities(struct cpuinfo_x86 *c) } #endif /* CONFIG_X86_VMX_FEATURE_NAMES */ +static void clear_sgx_caps(void) +{ + setup_clear_cpu_cap(X86_FEATURE_SGX); + setup_clear_cpu_cap(X86_FEATURE_SGX_LC); +} + void init_ia32_feat_ctl(struct cpuinfo_x86 *c) { bool tboot = tboot_enabled(); + bool enable_sgx; u64 msr; if (rdmsrl_safe(MSR_IA32_FEAT_CTL, )) { clear_cpu_cap(c, X86_FEATURE_VMX); + clear_sgx_caps(); return; } + /* +* Enable SGX if and only if the kernel supports SGX and Launch Control +* is supported, i.e. disable SGX if the LE hash MSRs can't be written. +*/ + enable_sgx = cpu_has(c, X86_FEATURE_SGX) && +cpu_has(c, X86_FEATURE_SGX_LC) && +IS_ENABLED(CONFIG_X86_SGX); + if (msr & FEAT_CTL_LOCKED) goto update_caps; @@ -124,13 +140,16 @@ void init_ia32_feat_ctl(struct cpuinfo_x86 *c) msr |= FEAT_CTL_VMX_ENABLED_INSIDE_SMX; } + if (enable_sgx) + msr |= FEAT_CTL_SGX_ENABLED | FEAT_CTL_SGX_LC_ENABLED; + wrmsrl(MSR_IA32_FEAT_CTL, msr); update_caps: set_cpu_cap(c, X86_FEATURE_MSR_IA32_FEAT_CTL); if (!cpu_has(c, X86_FEATURE_VMX)) - return; + goto update_sgx; if ( (tboot && !(msr & FEAT_CTL_VMX_ENABLED_INSIDE_SMX)) || (!tboot && !(msr & FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX))) { @@ -143,4 +162,12 @@ update_caps: init_vmx_capabilities(c); #endif } + +update_sgx: + if (!(msr & FEAT_CTL_SGX_ENABLED) || + !(msr & FEAT_CTL_SGX_LC_ENABLED) || !enable_sgx) { + if (enable_sgx) + pr_err_once("SGX disabled by BIOS\n"); + clear_sgx_caps(); + } }
[tip: x86/sgx] x86/mm: Signal SIGSEGV with PF_SGX
The following commit has been merged into the x86/sgx branch of tip: Commit-ID: 74faeee06db81a06add0def6a394210c8fef0ab7 Gitweb: https://git.kernel.org/tip/74faeee06db81a06add0def6a394210c8fef0ab7 Author:Sean Christopherson AuthorDate:Fri, 13 Nov 2020 00:01:17 +02:00 Committer: Borislav Petkov CommitterDate: Tue, 17 Nov 2020 14:36:13 +01:00 x86/mm: Signal SIGSEGV with PF_SGX The x86 architecture has a set of page fault error codes. These indicate things like whether the fault occurred from a write, or whether it originated in userspace. The SGX hardware architecture has its own per-page memory management metadata (EPCM) [*] and hardware which is separate from the normal x86 MMU. The architecture has a new page fault error code: PF_SGX. This new error code bit is set whenever a page fault occurs as the result of the SGX MMU. These faults occur for a variety of reasons. For instance, an access attempt to enclave memory from outside the enclave causes a PF_SGX fault. PF_SGX would also be set for permission conflicts, such as if a write to an enclave page occurs and the page is marked read-write in the x86 page tables but is read-only in the EPCM. These faults do not always indicate errors, though. SGX pages are encrypted with a key that is destroyed at hardware reset, including suspend. Throwing a SIGSEGV allows user space software to react and recover when these events occur. Include PF_SGX in the PF error codes list and throw SIGSEGV when it is encountered. [*] Intel SDM: 36.5.1 Enclave Page Cache Map (EPCM) [ bp: Add bit 15 to the comment above enum x86_pf_error_code too. ] Signed-off-by: Sean Christopherson Signed-off-by: Jarkko Sakkinen Signed-off-by: Borislav Petkov Acked-by: Jethro Beekman Link: https://lkml.kernel.org/r/20201112220135.165028-7-jar...@kernel.org --- arch/x86/include/asm/trap_pf.h | 2 ++ arch/x86/mm/fault.c| 12 2 files changed, 14 insertions(+) diff --git a/arch/x86/include/asm/trap_pf.h b/arch/x86/include/asm/trap_pf.h index 305bc12..10b1de5 100644 --- a/arch/x86/include/asm/trap_pf.h +++ b/arch/x86/include/asm/trap_pf.h @@ -11,6 +11,7 @@ * bit 3 == 1: use of reserved bit detected * bit 4 == 1: fault was an instruction fetch * bit 5 == 1: protection keys block access + * bit 15 == 1: SGX MMU page-fault */ enum x86_pf_error_code { X86_PF_PROT = 1 << 0, @@ -19,6 +20,7 @@ enum x86_pf_error_code { X86_PF_RSVD = 1 << 3, X86_PF_INSTR= 1 << 4, X86_PF_PK = 1 << 5, + X86_PF_SGX = 1 << 15, }; #endif /* _ASM_X86_TRAP_PF_H */ diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 82bf37a..9339fee 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1102,6 +1102,18 @@ access_error(unsigned long error_code, struct vm_area_struct *vma) return 1; /* +* SGX hardware blocked the access. This usually happens +* when the enclave memory contents have been destroyed, like +* after a suspend/resume cycle. In any case, the kernel can't +* fix the cause of the fault. Handle the fault as an access +* error even in cases where no actual access violation +* occurred. This allows userspace to rebuild the enclave in +* response to the signal. +*/ + if (unlikely(error_code & X86_PF_SGX)) + return 1; + + /* * Make sure to check the VMA so that we do not perform * faults just to hit a X86_PF_PK as soon as we fill in a * page.
[tip: x86/sgx] x86/{cpufeatures,msr}: Add Intel SGX Launch Control hardware bits
The following commit has been merged into the x86/sgx branch of tip: Commit-ID: d205e0f1426e0f99e2b4f387c49f2d8b66e129dd Gitweb: https://git.kernel.org/tip/d205e0f1426e0f99e2b4f387c49f2d8b66e129dd Author:Sean Christopherson AuthorDate:Fri, 13 Nov 2020 00:01:15 +02:00 Committer: Borislav Petkov CommitterDate: Tue, 17 Nov 2020 14:36:13 +01:00 x86/{cpufeatures,msr}: Add Intel SGX Launch Control hardware bits The SGX Launch Control hardware helps restrict which enclaves the hardware will run. Launch control is intended to restrict what software can run with enclave protections, which helps protect the overall system from bad enclaves. For the kernel's purposes, there are effectively two modes in which the launch control hardware can operate: rigid and flexible. In its rigid mode, an entity other than the kernel has ultimate authority over which enclaves can be run (firmware, Intel, etc...). In its flexible mode, the kernel has ultimate authority over which enclaves can run. Enable X86_FEATURE_SGX_LC to enumerate when the CPU supports SGX Launch Control in general. Add MSR_IA32_SGXLEPUBKEYHASH{0, 1, 2, 3}, which when combined contain a SHA256 hash of a 3072-bit RSA public key. The hardware allows SGX enclaves signed with this public key to initialize and run [*]. Enclaves not signed with this key can not initialize and run. Add FEAT_CTL_SGX_LC_ENABLED, which informs whether the SGXLEPUBKEYHASH MSRs can be written by the kernel. If the MSRs do not exist or are read-only, the launch control hardware is operating in rigid mode. Linux does not and will not support creating enclaves when hardware is configured in rigid mode because it takes away the authority for launch decisions from the kernel. Note, this does not preclude KVM from virtualizing/exposing SGX to a KVM guest when launch control hardware is operating in rigid mode. [*] Intel SDM: 38.1.4 Intel SGX Launch Control Configuration Signed-off-by: Sean Christopherson Co-developed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Borislav Petkov Acked-by: Jethro Beekman Link: https://lkml.kernel.org/r/20201112220135.165028-5-jar...@kernel.org --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 7 +++ 2 files changed, 8 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 1181f5c..f5ef2d5 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -357,6 +357,7 @@ #define X86_FEATURE_MOVDIRI(16*32+27) /* MOVDIRI instruction */ #define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */ #define X86_FEATURE_ENQCMD (16*32+29) /* ENQCMD and ENQCMDS instructions */ +#define X86_FEATURE_SGX_LC (16*32+30) /* Software Guard Extensions Launch Control */ /* AMD-defined CPU features, CPUID level 0x8007 (EBX), word 17 */ #define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 258d555..d0c6cff 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -609,6 +609,7 @@ #define FEAT_CTL_LOCKEDBIT(0) #define FEAT_CTL_VMX_ENABLED_INSIDE_SMXBIT(1) #define FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX BIT(2) +#define FEAT_CTL_SGX_LC_ENABLEDBIT(17) #define FEAT_CTL_SGX_ENABLED BIT(18) #define FEAT_CTL_LMCE_ENABLED BIT(20) @@ -629,6 +630,12 @@ #define MSR_IA32_UCODE_WRITE 0x0079 #define MSR_IA32_UCODE_REV 0x008b +/* Intel SGX Launch Enclave Public Key Hash MSRs */ +#define MSR_IA32_SGXLEPUBKEYHASH0 0x008C +#define MSR_IA32_SGXLEPUBKEYHASH1 0x008D +#define MSR_IA32_SGXLEPUBKEYHASH2 0x008E +#define MSR_IA32_SGXLEPUBKEYHASH3 0x008F + #define MSR_IA32_SMM_MONITOR_CTL 0x009b #define MSR_IA32_SMBASE0x009e
[tip: x86/sgx] x86/cpufeatures: Add Intel SGX hardware bits
The following commit has been merged into the x86/sgx branch of tip: Commit-ID: e7b6385b01d8e9fb7a97887c3ea649abb95bb8c8 Gitweb: https://git.kernel.org/tip/e7b6385b01d8e9fb7a97887c3ea649abb95bb8c8 Author:Sean Christopherson AuthorDate:Fri, 13 Nov 2020 00:01:14 +02:00 Committer: Borislav Petkov CommitterDate: Tue, 17 Nov 2020 14:36:13 +01:00 x86/cpufeatures: Add Intel SGX hardware bits Populate X86_FEATURE_SGX feature from CPUID and tie it to the Kconfig option with disabled-features.h. IA32_FEATURE_CONTROL.SGX_ENABLE must be examined in addition to the CPUID bits to enable full SGX support. The BIOS must both set this bit and lock IA32_FEATURE_CONTROL for SGX to be supported (Intel SDM section 36.7.1). The setting or clearing of this bit has no impact on the CPUID bits above, which is why it needs to be detected separately. Signed-off-by: Sean Christopherson Co-developed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Borislav Petkov Acked-by: Jethro Beekman Link: https://lkml.kernel.org/r/20201112220135.165028-4-jar...@kernel.org --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/disabled-features.h | 8 +++- arch/x86/include/asm/msr-index.h | 1 + 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index dad350d..1181f5c 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -241,6 +241,7 @@ /* Intel-defined CPU features, CPUID level 0x0007:0 (EBX), word 9 */ #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ #define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3B */ +#define X86_FEATURE_SGX( 9*32+ 2) /* Software Guard Extensions */ #define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ #define X86_FEATURE_HLE( 9*32+ 4) /* Hardware Lock Elision */ #define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 5861d34..7947cb1 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -62,6 +62,12 @@ # define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) #endif +#ifdef CONFIG_X86_SGX +# define DISABLE_SGX 0 +#else +# define DISABLE_SGX (1 << (X86_FEATURE_SGX & 31)) +#endif + /* * Make sure to add features to the correct mask */ @@ -74,7 +80,7 @@ #define DISABLED_MASK6 0 #define DISABLED_MASK7 (DISABLE_PTI) #define DISABLED_MASK8 0 -#define DISABLED_MASK9 (DISABLE_SMAP) +#define DISABLED_MASK9 (DISABLE_SMAP|DISABLE_SGX) #define DISABLED_MASK100 #define DISABLED_MASK110 #define DISABLED_MASK120 diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 972a34d..258d555 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -609,6 +609,7 @@ #define FEAT_CTL_LOCKEDBIT(0) #define FEAT_CTL_VMX_ENABLED_INSIDE_SMXBIT(1) #define FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX BIT(2) +#define FEAT_CTL_SGX_ENABLED BIT(18) #define FEAT_CTL_LMCE_ENABLED BIT(20) #define MSR_IA32_TSC_ADJUST 0x003b
[tip: x86/sgx] x86/sgx: Initialize metadata for Enclave Page Cache (EPC) sections
The following commit has been merged into the x86/sgx branch of tip: Commit-ID: e7e0545299d8cb0fd6fe3ba50401b7f5c3937362 Gitweb: https://git.kernel.org/tip/e7e0545299d8cb0fd6fe3ba50401b7f5c3937362 Author:Sean Christopherson AuthorDate:Fri, 13 Nov 2020 00:01:16 +02:00 Committer: Borislav Petkov CommitterDate: Tue, 17 Nov 2020 14:36:13 +01:00 x86/sgx: Initialize metadata for Enclave Page Cache (EPC) sections Although carved out of normal DRAM, enclave memory is marked in the system memory map as reserved and is not managed by the core mm. There may be several regions spread across the system. Each contiguous region is called an Enclave Page Cache (EPC) section. EPC sections are enumerated via CPUID Enclave pages can only be accessed when they are mapped as part of an enclave, by a hardware thread running inside the enclave. Parse CPUID data, create metadata for EPC pages and populate a simple EPC page allocator. Although much smaller, ‘struct sgx_epc_page’ metadata is the SGX analog of the core mm ‘struct page’. Similar to how the core mm’s page->flags encode zone and NUMA information, embed the EPC section index to the first eight bits of sgx_epc_page->desc. This allows a quick reverse lookup from EPC page to EPC section. Existing client hardware supports only a single section, while upcoming server hardware will support at most eight sections. Thus, eight bits should be enough for long term needs. Signed-off-by: Sean Christopherson Co-developed-by: Serge Ayoun Signed-off-by: Serge Ayoun Co-developed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Borislav Petkov Acked-by: Jethro Beekman Link: https://lkml.kernel.org/r/20201112220135.165028-6-jar...@kernel.org --- arch/x86/Kconfig | 17 +++- arch/x86/kernel/cpu/Makefile | 1 +- arch/x86/kernel/cpu/sgx/Makefile | 2 +- arch/x86/kernel/cpu/sgx/main.c | 190 ++- arch/x86/kernel/cpu/sgx/sgx.h| 60 +- 5 files changed, 270 insertions(+) create mode 100644 arch/x86/kernel/cpu/sgx/Makefile create mode 100644 arch/x86/kernel/cpu/sgx/main.c create mode 100644 arch/x86/kernel/cpu/sgx/sgx.h diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f6946b8..618d1aa 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1930,6 +1930,23 @@ config X86_INTEL_TSX_MODE_AUTO side channel attacks- equals the tsx=auto command line parameter. endchoice +config X86_SGX + bool "Software Guard eXtensions (SGX)" + depends on X86_64 && CPU_SUP_INTEL + depends on CRYPTO=y + depends on CRYPTO_SHA256=y + select SRCU + select MMU_NOTIFIER + help + Intel(R) Software Guard eXtensions (SGX) is a set of CPU instructions + that can be used by applications to set aside private regions of code + and data, referred to as enclaves. An enclave's private memory can + only be accessed by code running within the enclave. Accesses from + outside the enclave, including other enclaves, are disallowed by + hardware. + + If unsure, say N. + config EFI bool "EFI runtime service support" depends on ACPI diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 93792b4..637b499 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -48,6 +48,7 @@ obj-$(CONFIG_X86_MCE) += mce/ obj-$(CONFIG_MTRR) += mtrr/ obj-$(CONFIG_MICROCODE)+= microcode/ obj-$(CONFIG_X86_CPU_RESCTRL) += resctrl/ +obj-$(CONFIG_X86_SGX) += sgx/ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o diff --git a/arch/x86/kernel/cpu/sgx/Makefile b/arch/x86/kernel/cpu/sgx/Makefile new file mode 100644 index 000..79510ce --- /dev/null +++ b/arch/x86/kernel/cpu/sgx/Makefile @@ -0,0 +1,2 @@ +obj-y += \ + main.o diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c new file mode 100644 index 000..187a237 --- /dev/null +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -0,0 +1,190 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2016-20 Intel Corporation. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "encls.h" + +struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS]; +static int sgx_nr_epc_sections; +static struct task_struct *ksgxd_tsk; + +/* + * Reset dirty EPC pages to uninitialized state. Laundry can be left with SECS + * pages whose child pages blocked EREMOVE. + */ +static void sgx_sanitize_section(struct sgx_epc_section *section) +{ + struct sgx_epc_page *page; + LIST_HEAD(dirty); + int ret; + + while (!list_empty(>laundry_list)) { + if (kthread_should_stop()) + return; + + spin_lock(>lock); + + page = list_first_entry(>laundry_list, +
[tip: x86/sgx] x86/fault: Add a helper function to sanitize error code
The following commit has been merged into the x86/sgx branch of tip: Commit-ID: cd072dab453a9b4a9f7927f9eddca5a156fbd87d Gitweb: https://git.kernel.org/tip/cd072dab453a9b4a9f7927f9eddca5a156fbd87d Author:Sean Christopherson AuthorDate:Fri, 13 Nov 2020 00:01:28 +02:00 Committer: Borislav Petkov CommitterDate: Wed, 18 Nov 2020 18:02:50 +01:00 x86/fault: Add a helper function to sanitize error code vDSO exception fixup is a replacement for signals in limited situations. Signals and vDSO exception fixup need to provide similar information to userspace, including the hardware error code. That hardware error code needs to be sanitized. For instance, if userspace accesses a kernel address, the error code could indicate to userspace whether the address had a Present=1 PTE. That can leak information about the kernel layout to userspace, which is bad. The existing signal code does this sanitization, but fairly late in the signal process. The vDSO exception code runs before the sanitization happens. Move error code sanitization out of the signal code and into a helper. Call the helper in the signal code. Signed-off-by: Sean Christopherson Signed-off-by: Jarkko Sakkinen Signed-off-by: Borislav Petkov Acked-by: Jethro Beekman Link: https://lkml.kernel.org/r/20201112220135.165028-18-jar...@kernel.org --- arch/x86/mm/fault.c | 26 ++ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 9339fee..0161d4a 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -602,11 +602,9 @@ pgtable_bad(struct pt_regs *regs, unsigned long error_code, oops_end(flags, regs, sig); } -static void set_signal_archinfo(unsigned long address, - unsigned long error_code) +static void sanitize_error_code(unsigned long address, + unsigned long *error_code) { - struct task_struct *tsk = current; - /* * To avoid leaking information about the kernel page * table layout, pretend that user-mode accesses to @@ -617,7 +615,13 @@ static void set_signal_archinfo(unsigned long address, * information and does not appear to cause any problems. */ if (address >= TASK_SIZE_MAX) - error_code |= X86_PF_PROT; + *error_code |= X86_PF_PROT; +} + +static void set_signal_archinfo(unsigned long address, + unsigned long error_code) +{ + struct task_struct *tsk = current; tsk->thread.trap_nr = X86_TRAP_PF; tsk->thread.error_code = error_code | X86_PF_USER; @@ -658,6 +662,8 @@ no_context(struct pt_regs *regs, unsigned long error_code, * faulting through the emulate_vsyscall() logic. */ if (current->thread.sig_on_uaccess_err && signal) { + sanitize_error_code(address, _code); + set_signal_archinfo(address, error_code); /* XXX: hwpoison faults will set the wrong code. */ @@ -806,13 +812,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, if (is_errata100(regs, address)) return; - /* -* To avoid leaking information about the kernel page table -* layout, pretend that user-mode accesses to kernel addresses -* are always protection faults. -*/ - if (address >= TASK_SIZE_MAX) - error_code |= X86_PF_PROT; + sanitize_error_code(address, _code); if (likely(show_unhandled_signals)) show_signal_msg(regs, error_code, address, tsk); @@ -931,6 +931,8 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, if (is_prefetch(regs, error_code, address)) return; + sanitize_error_code(address, _code); + set_signal_archinfo(address, error_code); #ifdef CONFIG_MEMORY_FAILURE
[tip: x86/sgx] x86/traps: Attempt to fixup exceptions in vDSO before signaling
The following commit has been merged into the x86/sgx branch of tip: Commit-ID: 334872a0919890a70cccd00b8e11931020a819be Gitweb: https://git.kernel.org/tip/334872a0919890a70cccd00b8e11931020a819be Author:Sean Christopherson AuthorDate:Fri, 13 Nov 2020 00:01:29 +02:00 Committer: Borislav Petkov CommitterDate: Wed, 18 Nov 2020 18:02:50 +01:00 x86/traps: Attempt to fixup exceptions in vDSO before signaling vDSO functions can now leverage an exception fixup mechanism similar to kernel exception fixup. For vDSO exception fixup, the initial user is Intel's Software Guard Extensions (SGX), which will wrap the low-level transitions to/from the enclave, i.e. EENTER and ERESUME instructions, in a vDSO function and leverage fixup to intercept exceptions that would otherwise generate a signal. This allows the vDSO wrapper to return the fault information directly to its caller, obviating the need for SGX applications and libraries to juggle signal handlers. Attempt to fixup vDSO exceptions immediately prior to populating and sending signal information. Except for the delivery mechanism, an exception in a vDSO function should be treated like any other exception in userspace, e.g. any fault that is successfully handled by the kernel should not be directly visible to userspace. Although it's debatable whether or not all exceptions are of interest to enclaves, defer to the vDSO fixup to decide whether to do fixup or generate a signal. Future users of vDSO fixup, if there ever are any, will undoubtedly have different requirements than SGX enclaves, e.g. the fixup vs. signal logic can be made function specific if/when necessary. Suggested-by: Andy Lutomirski Signed-off-by: Sean Christopherson Signed-off-by: Jarkko Sakkinen Signed-off-by: Borislav Petkov Acked-by: Jethro Beekman Link: https://lkml.kernel.org/r/20201112220135.165028-19-jar...@kernel.org --- arch/x86/kernel/traps.c | 10 ++ arch/x86/mm/fault.c | 7 +++ 2 files changed, 17 insertions(+) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index e19df6c..7798d86 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -60,6 +60,7 @@ #include #include #include +#include #ifdef CONFIG_X86_64 #include @@ -117,6 +118,9 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, const char *str, tsk->thread.error_code = error_code; tsk->thread.trap_nr = trapnr; die(str, regs, error_code); + } else { + if (fixup_vdso_exception(regs, trapnr, error_code, 0)) + return 0; } /* @@ -550,6 +554,9 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection) tsk->thread.error_code = error_code; tsk->thread.trap_nr = X86_TRAP_GP; + if (fixup_vdso_exception(regs, X86_TRAP_GP, error_code, 0)) + return; + show_signal(tsk, SIGSEGV, "", desc, regs, error_code); force_sig(SIGSEGV); goto exit; @@ -1048,6 +1055,9 @@ static void math_error(struct pt_regs *regs, int trapnr) if (!si_code) goto exit; + if (fixup_vdso_exception(regs, trapnr, 0, 0)) + return; + force_sig_fault(SIGFPE, si_code, (void __user *)uprobe_get_trap_addr(regs)); exit: diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 0161d4a..f1f1b5a 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -30,6 +30,7 @@ #include /* exception stack */ #include /* VMALLOC_START, ... */ #include /* kvm_handle_async_pf */ +#include /* fixup_vdso_exception() */ #define CREATE_TRACE_POINTS #include @@ -814,6 +815,9 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, sanitize_error_code(address, _code); + if (fixup_vdso_exception(regs, X86_TRAP_PF, error_code, address)) + return; + if (likely(show_unhandled_signals)) show_signal_msg(regs, error_code, address, tsk); @@ -933,6 +937,9 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, sanitize_error_code(address, _code); + if (fixup_vdso_exception(regs, X86_TRAP_PF, error_code, address)) + return; + set_signal_archinfo(address, error_code); #ifdef CONFIG_MEMORY_FAILURE
[tip: x86/urgent] x86/entry/64: Do not use RDPID in paranoid entry to accomodate KVM
The following commit has been merged into the x86/urgent branch of tip: Commit-ID: 6a3ea3e68b8a8a26c4aaac03432ed92269c9a14e Gitweb: https://git.kernel.org/tip/6a3ea3e68b8a8a26c4aaac03432ed92269c9a14e Author:Sean Christopherson AuthorDate:Fri, 21 Aug 2020 06:52:29 -04:00 Committer: Thomas Gleixner CommitterDate: Fri, 21 Aug 2020 16:15:27 +02:00 x86/entry/64: Do not use RDPID in paranoid entry to accomodate KVM KVM has an optmization to avoid expensive MRS read/writes on VMENTER/EXIT. It caches the MSR values and restores them either when leaving the run loop, on preemption or when going out to user space. The affected MSRs are not required for kernel context operations. This changed with the recently introduced mechanism to handle FSGSBASE in the paranoid entry code which has to retrieve the kernel GSBASE value by accessing per CPU memory. The mechanism needs to retrieve the CPU number and uses either LSL or RDPID if the processor supports it. Unfortunately RDPID uses MSR_TSC_AUX which is in the list of cached and lazily restored MSRs, which means between the point where the guest value is written and the point of restore, MSR_TSC_AUX contains a random number. If an NMI or any other exception which uses the paranoid entry path happens in such a context, then RDPID returns the random guest MSR_TSC_AUX value. As a consequence this reads from the wrong memory location to retrieve the kernel GSBASE value. Kernel GS is used to for all regular this_cpu_*() operations. If the GSBASE in the exception handler points to the per CPU memory of a different CPU then this has the obvious consequences of data corruption and crashes. As the paranoid entry path is the only place which accesses MSR_TSX_AUX (via RDPID) and the fallback via LSL is not significantly slower, remove the RDPID alternative from the entry path and always use LSL. The alternative would be to write MSR_TSC_AUX on every VMENTER and VMEXIT which would be inflicting massive overhead on that code path. [ tglx: Rewrote changelog ] Fixes: eaad981291ee3 ("x86/entry/64: Introduce the FIND_PERCPU_BASE macro") Reported-by: Tom Lendacky Debugged-by: Tom Lendacky Suggested-by: Andy Lutomirski Suggested-by: Peter Zijlstra Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20200821105229.18938-1-pbonz...@redhat.com --- arch/x86/entry/calling.h | 10 ++ 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 98e4d88..ae9b0d4 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -374,12 +374,14 @@ For 32-bit we have the following conventions - kernel is built with * Fetch the per-CPU GSBASE value for this processor and put it in @reg. * We normally use %gs for accessing per-CPU data, but we are setting up * %gs here and obviously can not use %gs itself to access per-CPU data. + * + * Do not use RDPID, because KVM loads guest's TSC_AUX on vm-entry and + * may not restore the host's value until the CPU returns to userspace. + * Thus the kernel would consume a guest's TSC_AUX if an NMI arrives + * while running KVM's run loop. */ .macro GET_PERCPU_BASE reg:req - ALTERNATIVE \ - "LOAD_CPU_AND_NODE_SEG_LIMIT \reg", \ - "RDPID \reg", \ - X86_FEATURE_RDPID + LOAD_CPU_AND_NODE_SEG_LIMIT \reg andq$VDSO_CPUNODE_MASK, \reg movq__per_cpu_offset(, \reg, 8), \reg .endm
[tip: x86/urgent] x86/split_lock: Don't write MSR_TEST_CTRL on CPUs that aren't whitelisted
The following commit has been merged into the x86/urgent branch of tip: Commit-ID: 009bce1df0bb5eb970b9eb98d963861f7fe353c7 Gitweb: https://git.kernel.org/tip/009bce1df0bb5eb970b9eb98d963861f7fe353c7 Author:Sean Christopherson AuthorDate:Fri, 05 Jun 2020 12:26:05 -07:00 Committer: Thomas Gleixner CommitterDate: Tue, 30 Jun 2020 14:09:31 +02:00 x86/split_lock: Don't write MSR_TEST_CTRL on CPUs that aren't whitelisted Choo! Choo! All aboard the Split Lock Express, with direct service to Wreckage! Skip split_lock_verify_msr() if the CPU isn't whitelisted as a possible SLD-enabled CPU model to avoid writing MSR_TEST_CTRL. MSR_TEST_CTRL exists, and is writable, on many generations of CPUs. Writing the MSR, even with '0', can result in bizarre, undocumented behavior. This fixes a crash on Haswell when resuming from suspend with a live KVM guest. Because APs use the standard SMP boot flow for resume, they will go through split_lock_init() and the subsequent RDMSR/WRMSR sequence, which runs even when sld_state==sld_off to ensure SLD is disabled. On Haswell (at least, my Haswell), writing MSR_TEST_CTRL with '0' will succeed and _may_ take the SMT _sibling_ out of VMX root mode. When KVM has an active guest, KVM performs VMXON as part of CPU onlining (see kvm_starting_cpu()). Because SMP boot is serialized, the resulting flow is effectively: on_each_ap_cpu() { WRMSR(MSR_TEST_CTRL, 0) VMXON } As a result, the WRMSR can disable VMX on a different CPU that has already done VMXON. This ultimately results in a #UD on VMPTRLD when KVM regains control and attempt run its vCPUs. The above voodoo was confirmed by reworking KVM's VMXON flow to write MSR_TEST_CTRL prior to VMXON, and to serialize the sequence as above. Further verification of the insanity was done by redoing VMXON on all APs after the initial WRMSR->VMXON sequence. The additional VMXON, which should VM-Fail, occasionally succeeded, and also eliminated the unexpected #UD on VMPTRLD. The damage done by writing MSR_TEST_CTRL doesn't appear to be limited to VMX, e.g. after suspend with an active KVM guest, subsequent reboots almost always hang (even when fudging VMXON), a #UD on a random Jcc was observed, suspend/resume stability is qualitatively poor, and so on and so forth. kernel BUG at arch/x86/kvm/x86.c:386! CPU: 1 PID: 2592 Comm: CPU 6/KVM Tainted: G D Hardware name: ASUS Q87M-E/Q87M-E, BIOS 1102 03/03/2014 RIP: 0010:kvm_spurious_fault+0xf/0x20 Call Trace: vmx_vcpu_load_vmcs+0x1fb/0x2b0 vmx_vcpu_load+0x3e/0x160 kvm_arch_vcpu_load+0x48/0x260 finish_task_switch+0x140/0x260 __schedule+0x460/0x720 _cond_resched+0x2d/0x40 kvm_arch_vcpu_ioctl_run+0x82e/0x1ca0 kvm_vcpu_ioctl+0x363/0x5c0 ksys_ioctl+0x88/0xa0 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x4c/0x170 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: dbaba47085b0c ("x86/split_lock: Rework the initialization flow of split lock detection") Signed-off-by: Sean Christopherson Signed-off-by: Thomas Gleixner Cc: sta...@vger.kernel.org Link: https://lkml.kernel.org/r/20200605192605.7439-1-sean.j.christopher...@intel.com --- arch/x86/kernel/cpu/intel.c | 11 ++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index c25a67a..0ab48f1 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -50,6 +50,13 @@ static enum split_lock_detect_state sld_state __ro_after_init = sld_off; static u64 msr_test_ctrl_cache __ro_after_init; /* + * With a name like MSR_TEST_CTL it should go without saying, but don't touch + * MSR_TEST_CTL unless the CPU is one of the whitelisted models. Writing it + * on CPUs that do not support SLD can cause fireworks, even when writing '0'. + */ +static bool cpu_model_supports_sld __ro_after_init; + +/* * Processors which have self-snooping capability can handle conflicting * memory type across CPUs by snooping its own cache. However, there exists * CPU models in which having conflicting memory types still leads to @@ -1071,7 +1078,8 @@ static void sld_update_msr(bool on) static void split_lock_init(void) { - split_lock_verify_msr(sld_state != sld_off); + if (cpu_model_supports_sld) + split_lock_verify_msr(sld_state != sld_off); } static void split_lock_warn(unsigned long ip) @@ -1177,5 +1185,6 @@ void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) return; } + cpu_model_supports_sld = true; split_lock_setup(); }
[tip: x86/urgent] x86/cpu: Reinitialize IA32_FEAT_CTL MSR on BSP during wakeup
The following commit has been merged into the x86/urgent branch of tip: Commit-ID: 5d5103595e9e53048bb7e70ee2673c897ab38300 Gitweb: https://git.kernel.org/tip/5d5103595e9e53048bb7e70ee2673c897ab38300 Author:Sean Christopherson AuthorDate:Mon, 08 Jun 2020 10:41:34 -07:00 Committer: Borislav Petkov CommitterDate: Mon, 15 Jun 2020 14:18:37 +02:00 x86/cpu: Reinitialize IA32_FEAT_CTL MSR on BSP during wakeup Reinitialize IA32_FEAT_CTL on the BSP during wakeup to handle the case where firmware doesn't initialize or save/restore across S3. This fixes a bug where IA32_FEAT_CTL is left uninitialized and results in VMXON taking a #GP due to VMX not being fully enabled, i.e. breaks KVM. Use init_ia32_feat_ctl() to "restore" IA32_FEAT_CTL as it already deals with the case where the MSR is locked, and because APs already redo init_ia32_feat_ctl() during suspend by virtue of the SMP boot flow being used to reinitialize APs upon wakeup. Do the call in the early wakeup flow to avoid dependencies in the syscore_ops chain, e.g. simply adding a resume hook is not guaranteed to work, as KVM does VMXON in its own resume hook, kvm_resume(), when KVM has active guests. Fixes: 21bd3467a58e ("KVM: VMX: Drop initialization of IA32_FEAT_CTL MSR") Reported-by: Brad Campbell Signed-off-by: Sean Christopherson Signed-off-by: Borislav Petkov Reviewed-by: Liam Merwick Reviewed-by: Maxim Levitsky Tested-by: Brad Campbell Cc: sta...@vger.kernel.org # v5.6 Link: https://lkml.kernel.org/r/20200608174134.11157-1-sean.j.christopher...@intel.com --- arch/x86/include/asm/cpu.h| 5 + arch/x86/kernel/cpu/centaur.c | 1 + arch/x86/kernel/cpu/cpu.h | 4 arch/x86/kernel/cpu/zhaoxin.c | 1 + arch/x86/power/cpu.c | 6 ++ 5 files changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index dd17c2d..da78ccb 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -58,4 +58,9 @@ static inline bool handle_guest_split_lock(unsigned long ip) return false; } #endif +#ifdef CONFIG_IA32_FEAT_CTL +void init_ia32_feat_ctl(struct cpuinfo_x86 *c); +#else +static inline void init_ia32_feat_ctl(struct cpuinfo_x86 *c) {} +#endif #endif /* _ASM_X86_CPU_H */ diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 4267925..c5cf336 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -3,6 +3,7 @@ #include #include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index fb538fc..9d03369 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -81,8 +81,4 @@ extern void update_srbds_msr(void); extern u64 x86_read_arch_cap_msr(void); -#ifdef CONFIG_IA32_FEAT_CTL -void init_ia32_feat_ctl(struct cpuinfo_x86 *c); -#endif - #endif /* ARCH_X86_CPU_H */ diff --git a/arch/x86/kernel/cpu/zhaoxin.c b/arch/x86/kernel/cpu/zhaoxin.c index df1358b..05fa4ef 100644 --- a/arch/x86/kernel/cpu/zhaoxin.c +++ b/arch/x86/kernel/cpu/zhaoxin.c @@ -2,6 +2,7 @@ #include #include +#include #include #include "cpu.h" diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 7c65102..db1378c 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -193,6 +193,8 @@ static void fix_processor_context(void) */ static void notrace __restore_processor_state(struct saved_context *ctxt) { + struct cpuinfo_x86 *c; + if (ctxt->misc_enable_saved) wrmsrl(MSR_IA32_MISC_ENABLE, ctxt->misc_enable); /* @@ -263,6 +265,10 @@ static void notrace __restore_processor_state(struct saved_context *ctxt) mtrr_bp_restore(); perf_restore_debug_store(); msr_restore_context(ctxt); + + c = _data(smp_processor_id()); + if (cpu_has(c, X86_FEATURE_MSR_IA32_FEAT_CTL)) + init_ia32_feat_ctl(c); } /* Needed by apm.c */
[tip: x86/urgent] x86/apic/x2apic: Fix a NULL pointer deref when handling a dying cpu
The following commit has been merged into the x86/urgent branch of tip: Commit-ID: 7a22e03b0c02988e91003c505b34d752a51de344 Gitweb: https://git.kernel.org/tip/7a22e03b0c02988e91003c505b34d752a51de344 Author:Sean Christopherson AuthorDate:Tue, 01 Oct 2019 13:50:19 -07:00 Committer: Thomas Gleixner CommitterDate: Tue, 15 Oct 2019 10:57:09 +02:00 x86/apic/x2apic: Fix a NULL pointer deref when handling a dying cpu Check that the per-cpu cluster mask pointer has been set prior to clearing a dying cpu's bit. The per-cpu pointer is not set until the target cpu reaches smp_callin() during CPUHP_BRINGUP_CPU, whereas the teardown function, x2apic_dead_cpu(), is associated with the earlier CPUHP_X2APIC_PREPARE. If an error occurs before the cpu is awakened, e.g. if do_boot_cpu() itself fails, x2apic_dead_cpu() will dereference the NULL pointer and cause a panic. smpboot: do_boot_cpu failed(-22) to wakeup CPU#1 BUG: kernel NULL pointer dereference, address: 0008 RIP: 0010:x2apic_dead_cpu+0x1a/0x30 Call Trace: cpuhp_invoke_callback+0x9a/0x580 _cpu_up+0x10d/0x140 do_cpu_up+0x69/0xb0 smp_init+0x63/0xa9 kernel_init_freeable+0xd7/0x229 ? rest_init+0xa0/0xa0 kernel_init+0xa/0x100 ret_from_fork+0x35/0x40 Fixes: 023a611748fd5 ("x86/apic/x2apic: Simplify cluster management") Signed-off-by: Sean Christopherson Signed-off-by: Thomas Gleixner Cc: sta...@vger.kernel.org Link: https://lkml.kernel.org/r/20191001205019.5789-1-sean.j.christopher...@intel.com --- arch/x86/kernel/apic/x2apic_cluster.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 45e92cb..b0889c4 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -156,7 +156,8 @@ static int x2apic_dead_cpu(unsigned int dead_cpu) { struct cluster_mask *cmsk = per_cpu(cluster_masks, dead_cpu); - cpumask_clear_cpu(dead_cpu, >mask); + if (cmsk) + cpumask_clear_cpu(dead_cpu, >mask); free_cpumask_var(per_cpu(ipi_mask, dead_cpu)); return 0; }
[tip: x86/urgent] x86/apic/x2apic: Fix a NULL pointer deref when handling a dying cpu
The following commit has been merged into the x86/urgent branch of tip: Commit-ID: 7a22e03b0c02988e91003c505b34d752a51de344 Gitweb: https://git.kernel.org/tip/7a22e03b0c02988e91003c505b34d752a51de344 Author:Sean Christopherson AuthorDate:Tue, 01 Oct 2019 13:50:19 -07:00 Committer: Thomas Gleixner CommitterDate: Tue, 15 Oct 2019 10:57:09 +02:00 x86/apic/x2apic: Fix a NULL pointer deref when handling a dying cpu Check that the per-cpu cluster mask pointer has been set prior to clearing a dying cpu's bit. The per-cpu pointer is not set until the target cpu reaches smp_callin() during CPUHP_BRINGUP_CPU, whereas the teardown function, x2apic_dead_cpu(), is associated with the earlier CPUHP_X2APIC_PREPARE. If an error occurs before the cpu is awakened, e.g. if do_boot_cpu() itself fails, x2apic_dead_cpu() will dereference the NULL pointer and cause a panic. smpboot: do_boot_cpu failed(-22) to wakeup CPU#1 BUG: kernel NULL pointer dereference, address: 0008 RIP: 0010:x2apic_dead_cpu+0x1a/0x30 Call Trace: cpuhp_invoke_callback+0x9a/0x580 _cpu_up+0x10d/0x140 do_cpu_up+0x69/0xb0 smp_init+0x63/0xa9 kernel_init_freeable+0xd7/0x229 ? rest_init+0xa0/0xa0 kernel_init+0xa/0x100 ret_from_fork+0x35/0x40 Fixes: 023a611748fd5 ("x86/apic/x2apic: Simplify cluster management") Signed-off-by: Sean Christopherson Signed-off-by: Thomas Gleixner Cc: sta...@vger.kernel.org Link: https://lkml.kernel.org/r/20191001205019.5789-1-sean.j.christopher...@intel.com --- arch/x86/kernel/apic/x2apic_cluster.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 45e92cb..b0889c4 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -156,7 +156,8 @@ static int x2apic_dead_cpu(unsigned int dead_cpu) { struct cluster_mask *cmsk = per_cpu(cluster_masks, dead_cpu); - cpumask_clear_cpu(dead_cpu, >mask); + if (cmsk) + cpumask_clear_cpu(dead_cpu, >mask); free_cpumask_var(per_cpu(ipi_mask, dead_cpu)); return 0; }
[tip: x86/urgent] x86/retpoline: Don't clobber RFLAGS during CALL_NOSPEC on i386
The following commit has been merged into the x86/urgent branch of tip: Commit-ID: b63f20a778c88b6a04458ed6ffc69da953d3a109 Gitweb: https://git.kernel.org/tip/b63f20a778c88b6a04458ed6ffc69da953d3a109 Author:Sean Christopherson AuthorDate:Thu, 22 Aug 2019 14:11:22 -07:00 Committer: Thomas Gleixner CommitterDate: Fri, 23 Aug 2019 17:38:13 +02:00 x86/retpoline: Don't clobber RFLAGS during CALL_NOSPEC on i386 Use 'lea' instead of 'add' when adjusting %rsp in CALL_NOSPEC so as to avoid clobbering flags. KVM's emulator makes indirect calls into a jump table of sorts, where the destination of the CALL_NOSPEC is a small blob of code that performs fast emulation by executing the target instruction with fixed operands. adcb_al_dl: 0x000339f8 <+0>: adc%dl,%al 0x000339fa <+2>: ret A major motiviation for doing fast emulation is to leverage the CPU to handle consumption and manipulation of arithmetic flags, i.e. RFLAGS is both an input and output to the target of CALL_NOSPEC. Clobbering flags results in all sorts of incorrect emulation, e.g. Jcc instructions often take the wrong path. Sans the nops... asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n" 0x0003595a <+58>: mov0xc0(%ebx),%eax 0x00035960 <+64>: mov0x60(%ebx),%edx 0x00035963 <+67>: mov0x90(%ebx),%ecx 0x00035969 <+73>: push %edi 0x0003596a <+74>: popf 0x0003596b <+75>: call *%esi 0x000359a0 <+128>: pushf 0x000359a1 <+129>: pop%edi 0x000359a2 <+130>: mov%eax,0xc0(%ebx) 0x000359b1 <+145>: mov%edx,0x60(%ebx) ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); 0x000359a8 <+136>: mov-0x10(%ebp),%eax 0x000359ab <+139>: and$0x8d5,%edi 0x000359b4 <+148>: and$0xf72a,%eax 0x000359b9 <+153>: or %eax,%edi 0x000359bd <+157>: mov%edi,0x4(%ebx) For the most part this has gone unnoticed as emulation of guest code that can trigger fast emulation is effectively limited to MMIO when running on modern hardware, and MMIO is rarely, if ever, accessed by instructions that affect or consume flags. Breakage is almost instantaneous when running with unrestricted guest disabled, in which case KVM must emulate all instructions when the guest has invalid state, e.g. when the guest is in Big Real Mode during early BIOS. Fixes: 776b043848fd2 ("x86/retpoline: Add initial retpoline support") Fixes: 1a29b5b7f347a ("KVM: x86: Make indirect calls in emulator speculation safe") Signed-off-by: Sean Christopherson Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: sta...@vger.kernel.org Link: https://lkml.kernel.org/r/20190822211122.27579-1-sean.j.christopher...@intel.com --- arch/x86/include/asm/nospec-branch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 109f974..80bc209 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -192,7 +192,7 @@ " lfence;\n" \ " jmp902b;\n" \ " .align 16\n"\ - "903: addl $4, %%esp;\n"\ + "903: lea4(%%esp), %%esp;\n" \ " pushl %[thunk_target];\n" \ " ret;\n" \ " .align 16\n"\