Linus, please pull the latest x86-asm-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-asm-for-linus The following updates for 5.1 are available: - Add pinning of sensitive CR0/CR4 bits, i.e. WP and SMAP/SMEP as these have been targets of recent exploits. This includes a lktdm test. - Remove unused macros/inlines and obsolete __GNUC__ conditionals. Thanks, tglx ------------------> Kees Cook (4): x86/asm: Pin sensitive CR4 bits x86/asm: Avoid taking an exception before cr4 restore x86/asm: Pin sensitive CR0 bits lkdtm: Check for SMEP clearing protections Rasmus Villemoes (2): x86/asm: Remove dead __GNUC__ conditionals x86/asm: Remove unused __constant_c_x_memset() macro and inlines arch/x86/include/asm/bitops.h | 6 -- arch/x86/include/asm/special_insns.h | 43 ++++++++++++++- arch/x86/include/asm/string_32.h | 104 ----------------------------------- arch/x86/include/asm/string_64.h | 15 ----- arch/x86/kernel/cpu/common.c | 12 +++- drivers/misc/lkdtm/bugs.c | 61 ++++++++++++++++++++ drivers/misc/lkdtm/core.c | 1 + drivers/misc/lkdtm/lkdtm.h | 1 + 8 files changed, 116 insertions(+), 127 deletions(-) diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index ad7b210aa3f6..d153d570bb04 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -36,13 +36,7 @@ * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). */ -#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1) -/* Technically wrong, but this avoids compilation errors on some gcc - versions. */ -#define BITOP_ADDR(x) "=m" (*(volatile long *) (x)) -#else #define BITOP_ADDR(x) "+m" (*(volatile long *) (x)) -#endif #define ADDR BITOP_ADDR(addr) diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 43c029cdc3fe..7fa4fe880395 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -5,6 +5,7 @@ #ifdef __KERNEL__ +#include <asm/processor-flags.h> #include <asm/nops.h> /* @@ -25,7 +26,28 @@ static inline unsigned long native_read_cr0(void) static inline void native_write_cr0(unsigned long val) { - asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order)); + bool warn = false; + +again: + val |= X86_CR0_WP; + /* + * In order to have the compiler not optimize away the check + * after the cr4 write, mark "val" as being also an output ("+r") + * by this asm() block so it will perform an explicit check, as + * if it were "volatile". + */ + asm volatile("mov %0,%%cr0" : "+r" (val) : "m" (__force_order) : ); + /* + * If the MOV above was used directly as a ROP gadget we can + * notice the lack of pinned bits in "val" and start the function + * from the beginning to gain the WP bit for sure. And do it + * without first taking the exception for a WARN(). + */ + if ((val & X86_CR0_WP) != X86_CR0_WP) { + warn = true; + goto again; + } + WARN_ONCE(warn, "Attempt to unpin X86_CR0_WP, cr0 bypass attack?!\n"); } static inline unsigned long native_read_cr2(void) @@ -72,9 +94,28 @@ static inline unsigned long native_read_cr4(void) return val; } +extern volatile unsigned long cr4_pin; + static inline void native_write_cr4(unsigned long val) { + unsigned long warn = 0; + +again: + val |= cr4_pin; asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order)); + /* + * If the MOV above was used directly as a ROP gadget we can + * notice the lack of pinned bits in "val" and start the function + * from the beginning to gain the cr4_pin bits for sure. Note + * that "val" must be volatile to keep the compiler from + * optimizing away this check. + */ + if ((val & cr4_pin) != cr4_pin) { + warn = ~val & cr4_pin; + goto again; + } + WARN_ONCE(warn, "Attempt to unpin cr4 bits: %lx; bypass attack?!\n", + warn); } #ifdef CONFIG_X86_64 diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h index 55d392c6bd29..f74362b05619 100644 --- a/arch/x86/include/asm/string_32.h +++ b/arch/x86/include/asm/string_32.h @@ -179,14 +179,7 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len) * No 3D Now! */ -#if (__GNUC__ >= 4) #define memcpy(t, f, n) __builtin_memcpy(t, f, n) -#else -#define memcpy(t, f, n) \ - (__builtin_constant_p((n)) \ - ? __constant_memcpy((t), (f), (n)) \ - : __memcpy((t), (f), (n))) -#endif #endif #endif /* !CONFIG_FORTIFY_SOURCE */ @@ -216,29 +209,6 @@ static inline void *__memset_generic(void *s, char c, size_t count) /* we might want to write optimized versions of these later */ #define __constant_count_memset(s, c, count) __memset_generic((s), (c), (count)) -/* - * memset(x, 0, y) is a reasonably common thing to do, so we want to fill - * things 32 bits at a time even when we don't know the size of the - * area at compile-time.. - */ -static __always_inline -void *__constant_c_memset(void *s, unsigned long c, size_t count) -{ - int d0, d1; - asm volatile("rep ; stosl\n\t" - "testb $2,%b3\n\t" - "je 1f\n\t" - "stosw\n" - "1:\ttestb $1,%b3\n\t" - "je 2f\n\t" - "stosb\n" - "2:" - : "=&c" (d0), "=&D" (d1) - : "a" (c), "q" (count), "0" (count/4), "1" ((long)s) - : "memory"); - return s; -} - /* Added by Gertjan van Wingerde to make minix and sysv module work */ #define __HAVE_ARCH_STRNLEN extern size_t strnlen(const char *s, size_t count); @@ -247,72 +217,6 @@ extern size_t strnlen(const char *s, size_t count); #define __HAVE_ARCH_STRSTR extern char *strstr(const char *cs, const char *ct); -/* - * This looks horribly ugly, but the compiler can optimize it totally, - * as we by now know that both pattern and count is constant.. - */ -static __always_inline -void *__constant_c_and_count_memset(void *s, unsigned long pattern, - size_t count) -{ - switch (count) { - case 0: - return s; - case 1: - *(unsigned char *)s = pattern & 0xff; - return s; - case 2: - *(unsigned short *)s = pattern & 0xffff; - return s; - case 3: - *(unsigned short *)s = pattern & 0xffff; - *((unsigned char *)s + 2) = pattern & 0xff; - return s; - case 4: - *(unsigned long *)s = pattern; - return s; - } - -#define COMMON(x) \ - asm volatile("rep ; stosl" \ - x \ - : "=&c" (d0), "=&D" (d1) \ - : "a" (eax), "0" (count/4), "1" ((long)s) \ - : "memory") - - { - int d0, d1; -#if __GNUC__ == 4 && __GNUC_MINOR__ == 0 - /* Workaround for broken gcc 4.0 */ - register unsigned long eax asm("%eax") = pattern; -#else - unsigned long eax = pattern; -#endif - - switch (count % 4) { - case 0: - COMMON(""); - return s; - case 1: - COMMON("\n\tstosb"); - return s; - case 2: - COMMON("\n\tstosw"); - return s; - default: - COMMON("\n\tstosw\n\tstosb"); - return s; - } - } - -#undef COMMON -} - -#define __constant_c_x_memset(s, c, count) \ - (__builtin_constant_p(count) \ - ? __constant_c_and_count_memset((s), (c), (count)) \ - : __constant_c_memset((s), (c), (count))) - #define __memset(s, c, count) \ (__builtin_constant_p(count) \ ? __constant_count_memset((s), (c), (count)) \ @@ -321,15 +225,7 @@ void *__constant_c_and_count_memset(void *s, unsigned long pattern, #define __HAVE_ARCH_MEMSET extern void *memset(void *, int, size_t); #ifndef CONFIG_FORTIFY_SOURCE -#if (__GNUC__ >= 4) #define memset(s, c, count) __builtin_memset(s, c, count) -#else -#define memset(s, c, count) \ - (__builtin_constant_p(c) \ - ? __constant_c_x_memset((s), (0x01010101UL * (unsigned char)(c)), \ - (count)) \ - : __memset((s), (c), (count))) -#endif #endif /* !CONFIG_FORTIFY_SOURCE */ #define __HAVE_ARCH_MEMSET16 diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 4e4194e21a09..75314c3dbe47 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -14,21 +14,6 @@ extern void *memcpy(void *to, const void *from, size_t len); extern void *__memcpy(void *to, const void *from, size_t len); -#ifndef CONFIG_FORTIFY_SOURCE -#if (__GNUC__ == 4 && __GNUC_MINOR__ < 3) || __GNUC__ < 4 -#define memcpy(dst, src, len) \ -({ \ - size_t __len = (len); \ - void *__ret; \ - if (__builtin_constant_p(len) && __len >= 64) \ - __ret = __memcpy((dst), (src), __len); \ - else \ - __ret = __builtin_memcpy((dst), (src), __len); \ - __ret; \ -}) -#endif -#endif /* !CONFIG_FORTIFY_SOURCE */ - #define __HAVE_ARCH_MEMSET void *memset(void *s, int c, size_t n); void *__memset(void *s, int c, size_t n); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cb28e98a0659..7e0ea4470f8e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -312,10 +312,16 @@ static __init int setup_disable_smep(char *arg) } __setup("nosmep", setup_disable_smep); +volatile unsigned long cr4_pin __ro_after_init; +EXPORT_SYMBOL_GPL(cr4_pin); + static __always_inline void setup_smep(struct cpuinfo_x86 *c) { - if (cpu_has(c, X86_FEATURE_SMEP)) + if (cpu_has(c, X86_FEATURE_SMEP)) { + if (!(cr4_pin & X86_CR4_SMEP)) + cr4_pin |= X86_CR4_SMEP; cr4_set_bits(X86_CR4_SMEP); + } } static __init int setup_disable_smap(char *arg) @@ -334,6 +340,8 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_SMAP)) { #ifdef CONFIG_X86_SMAP + if (!(cr4_pin & X86_CR4_SMAP)) + cr4_pin |= X86_CR4_SMAP; cr4_set_bits(X86_CR4_SMAP); #else cr4_clear_bits(X86_CR4_SMAP); @@ -351,6 +359,8 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c) if (!cpu_has(c, X86_FEATURE_UMIP)) goto out; + if (!(cr4_pin & X86_CR4_UMIP)) + cr4_pin |= X86_CR4_UMIP; cr4_set_bits(X86_CR4_UMIP); pr_info_once("x86/cpu: User Mode Instruction Prevention (UMIP) activated\n"); diff --git a/drivers/misc/lkdtm/bugs.c b/drivers/misc/lkdtm/bugs.c index 7eebbdfbcacd..6176384b4f85 100644 --- a/drivers/misc/lkdtm/bugs.c +++ b/drivers/misc/lkdtm/bugs.c @@ -255,3 +255,64 @@ void lkdtm_STACK_GUARD_PAGE_TRAILING(void) pr_err("FAIL: accessed page after stack!\n"); } + +void lkdtm_UNSET_SMEP(void) +{ +#ifdef CONFIG_X86_64 +#define MOV_CR4_DEPTH 64 + void (*direct_write_cr4)(unsigned long val); + unsigned char *insn; + unsigned long cr4; + int i; + + cr4 = native_read_cr4(); + + if ((cr4 & X86_CR4_SMEP) != X86_CR4_SMEP) { + pr_err("FAIL: SMEP not in use\n"); + return; + } + cr4 &= ~(X86_CR4_SMEP); + + pr_info("trying to clear SMEP normally\n"); + native_write_cr4(cr4); + if (cr4 == native_read_cr4()) { + pr_err("FAIL: pinning SMEP failed!\n"); + cr4 |= X86_CR4_SMEP; + pr_info("restoring SMEP\n"); + native_write_cr4(cr4); + return; + } + pr_info("ok: SMEP did not get cleared\n"); + + /* + * To test the post-write pinning verification we need to call + * directly into the the middle of native_write_cr4() where the + * cr4 write happens, skipping the pinning. This searches for + * the cr4 writing instruction. + */ + insn = (unsigned char *)native_write_cr4; + for (i = 0; i < MOV_CR4_DEPTH; i++) { + /* mov %rdi, %cr4 */ + if (insn[i] == 0x0f && insn[i+1] == 0x22 && insn[i+2] == 0xe7) + break; + } + if (i >= MOV_CR4_DEPTH) { + pr_info("ok: cannot locate cr4 writing call gadget\n"); + return; + } + direct_write_cr4 = (void *)(insn + i); + + pr_info("trying to clear SMEP with call gadget\n"); + direct_write_cr4(cr4); + if (native_read_cr4() & X86_CR4_SMEP) { + pr_info("ok: SMEP removal was reverted\n"); + } else { + pr_err("FAIL: cleared SMEP not detected!\n"); + cr4 |= X86_CR4_SMEP; + pr_info("restoring SMEP\n"); + native_write_cr4(cr4); + } +#else + pr_err("FAIL: this test is x86_64-only\n"); +#endif +} diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c index 2837dc77478e..fd668776414b 100644 --- a/drivers/misc/lkdtm/core.c +++ b/drivers/misc/lkdtm/core.c @@ -132,6 +132,7 @@ static const struct crashtype crashtypes[] = { CRASHTYPE(CORRUPT_LIST_ADD), CRASHTYPE(CORRUPT_LIST_DEL), CRASHTYPE(CORRUPT_USER_DS), + CRASHTYPE(UNSET_SMEP), CRASHTYPE(CORRUPT_STACK), CRASHTYPE(CORRUPT_STACK_STRONG), CRASHTYPE(STACK_GUARD_PAGE_LEADING), diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h index 3c6fd327e166..9c78d7e21c13 100644 --- a/drivers/misc/lkdtm/lkdtm.h +++ b/drivers/misc/lkdtm/lkdtm.h @@ -26,6 +26,7 @@ void lkdtm_CORRUPT_LIST_DEL(void); void lkdtm_CORRUPT_USER_DS(void); void lkdtm_STACK_GUARD_PAGE_LEADING(void); void lkdtm_STACK_GUARD_PAGE_TRAILING(void); +void lkdtm_UNSET_SMEP(void); /* lkdtm_heap.c */ void lkdtm_OVERWRITE_ALLOCATION(void);