Module Name: src Committed By: kamil Date: Sat May 26 20:27:48 UTC 2018
Modified Files: src/tests/lib/libc/sys: t_ptrace_amd64_wait.h t_ptrace_x86_wait.h Log Message: Port the CVE 2018-8897 mitigation to i386 ATF ptrace(2) tests On i386 there is no need to switch execution mode. Use 0x23 SS selector for i386, amd64 used 0x4f. Based on pointers from <maxv>. Sponsored by <The NetBSD Foundation> To generate a diff of this commit: cvs rdiff -u -r1.4 -r1.5 src/tests/lib/libc/sys/t_ptrace_amd64_wait.h cvs rdiff -u -r1.6 -r1.7 src/tests/lib/libc/sys/t_ptrace_x86_wait.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/tests/lib/libc/sys/t_ptrace_amd64_wait.h diff -u src/tests/lib/libc/sys/t_ptrace_amd64_wait.h:1.4 src/tests/lib/libc/sys/t_ptrace_amd64_wait.h:1.5 --- src/tests/lib/libc/sys/t_ptrace_amd64_wait.h:1.4 Mon May 14 12:42:34 2018 +++ src/tests/lib/libc/sys/t_ptrace_amd64_wait.h Sat May 26 20:27:48 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: t_ptrace_amd64_wait.h,v 1.4 2018/05/14 12:42:34 kamil Exp $ */ +/* $NetBSD: t_ptrace_amd64_wait.h,v 1.5 2018/05/26 20:27:48 kamil Exp $ */ /*- * Copyright (c) 2016 The NetBSD Foundation, Inc. @@ -113,219 +113,9 @@ ATF_TC_BODY(x86_64_regs1, tc) /// ---------------------------------------------------------------------------- -ATF_TC(x86_64_cve_2018_8897); -ATF_TC_HEAD(x86_64_cve_2018_8897, tc) -{ - atf_tc_set_md_var(tc, "descr", - "Verify mitigation for CVE-2018-8897 (POP SS debug exception)"); -} - -#define CVE_2018_8897_PAGE 0x5000 /* page addressable by 32-bit registers */ - -static void -trigger_cve_2018_8897(void) -{ - /* - * A function to trigger the POP SS (CVE-2018-8897) vulnerability - * - * We need to switch to 32-bit mode execution on 64-bit kernel. - * This is achieved with far jump instruction and GDT descriptor - * set to 32-bit CS selector. The 32-bit CS selector is kernel - * specific, in the NetBSD case registered as GUCODE32_SEL - * that is equal to (14 (decimal) << 3) with GDT and user - * privilege level (this makes it 0x73). - * - * In UNIX as(1) assembly x86_64 far jump is coded as ljmp. - * amd64 ljmp requires an indirect address with cs:RIP. - * - * When we are running in 32-bit mode, it's similar to the - * mode as if the binary had been launched in netbsd32. - * - * There are two versions of this exploit, one with RIP - * relative code and the other with static addresses. - * The first one is PIE code aware, the other no-PIE one. - * - * - * After switching to the 32-bit mode we can move on to the remaining - * part of the exploit. - * - * - * Set the stack pointer to the page we allocated earlier. Remember - * that we put an SS selector exactly at this address, so we can pop. - * - * movl $0x5000,%esp - * - * Pop the SS selector off the stack. This reloads the SS selector, - * which is fine. Remember that we set DR0 at address 0x5000, which - * we are now reading. Therefore, on this instruction, the CPU will - * raise a #DB exception. - * - * But the "pop %ss" instruction is special: it blocks exceptions - * until the next instruction is executed. So the #DB that we just - * raised is actually blocked. - * - * pop %ss - * - * We are still here, and didn't receive the #DB. After we execute - * this instruction, the effect of "pop %ss" will disappear, and - * we will receive the #DB for real. - * - * int $4 - * - * Here the bug happens. We executed "int $4", so we entered the - * kernel, with interrupts disabled. The #DB that was pending is - * received. But, it is received immediately in kernel mode, and is - * _NOT_ received when interrupts are enabled again. - * - * It means that, in the first instruction of the $4 handler, we - * think we are safe with interrupts disabled. But we aren't, and - * just got interrupted. - * - * The new interrupt handler doesn't handle this particular context: - * we are entered in kernel mode, the previous context was kernel - * mode too but it still had the user context loaded. - * - * We find ourselves not doing a 'swapgs'. At the end of the day, it - * means that we call trap() with a curcpu() that is fully - * controllable by userland. From then on, it is easy to escalate - * privileges. - * - * With SVS it also means we don't switch CR3, so this results in a - * triple fault, which this time cannot be turned to a privilege - * escalation. - */ - -#if __PIE__ - void *csRIP; - - csRIP = malloc(sizeof(int) + sizeof(short)); - FORKEE_ASSERT(csRIP != NULL); - - __asm__ __volatile__( - " leal 24(%%eip), %%eax\n\t" - " movq %0, %%rdx\n\t" - " movl %%eax, (%%rdx)\n\t" - " movw $0x73, 4(%%rdx)\n\t" - " movq %1, %%rax\n\t" - " ljmp *(%%rax)\n\t" - " .code32\n\t" - " movl $0x5000, %%esp\n\t" - " pop %%ss\n\t" - " int $4\n\t" - " .code64\n\t" - : "=m"(csRIP) - : "m"(csRIP) - : "%rax", "%rdx", "%rsp" - ); -#else /* !__PIE__ */ - __asm__ __volatile__( - " movq $farjmp32, %%rax\n\t" - " ljmp *(%%rax)\n\t" - "farjmp32:\n\t" - " .long trigger32\n\t" - " .word 0x73\n\t" - " .code32\n\t" - "trigger32:\n\t" - " movl $0x5000, %%esp\n\t" - " pop %%ss\n\t" - " int $4\n\t" - " .code64\n\t" - : - : - : "%rax", "%rsp" - ); -#endif -} - -ATF_TC_BODY(x86_64_cve_2018_8897, tc) -{ - const int sigval = SIGSTOP; - pid_t child, wpid; -#if defined(TWAIT_HAVE_STATUS) - int status; -#endif - char *trap_page; - struct dbreg db; - - if (!can_we_set_dbregs()) { - atf_tc_skip("Either run this test as root or set sysctl(3) " - "security.models.extensions.user_set_dbregs to 1"); - } - - DPRINTF("Before forking process PID=%d\n", getpid()); - SYSCALL_REQUIRE((child = fork()) != -1); - if (child == 0) { - DPRINTF("Before calling PT_TRACE_ME from child %d\n", getpid()); - FORKEE_ASSERT(ptrace(PT_TRACE_ME, 0, NULL, 0) != -1); - - trap_page = mmap((void *)CVE_2018_8897_PAGE, - sysconf(_SC_PAGESIZE), PROT_READ|PROT_WRITE, - MAP_FIXED|MAP_ANON|MAP_PRIVATE, -1, 0); - - /* trigger page fault */ - memset(trap_page, 0, sysconf(_SC_PAGESIZE)); - - /* SS selector (descriptor 9 (0x4f >> 3)) */ - *trap_page = 0x4f; - - DPRINTF("Before raising %s from child\n", strsignal(sigval)); - FORKEE_ASSERT(raise(sigval) == 0); - - trigger_cve_2018_8897(); - - /* NOTREACHED */ - FORKEE_ASSERTX(0 && "This shall not be reached"); - } - DPRINTF("Parent process PID=%d, child's PID=%d\n", getpid(), child); - - DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME); - TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child); - - validate_status_stopped(status, sigval); - - DPRINTF("Call GETDBREGS for the child process\n"); - SYSCALL_REQUIRE(ptrace(PT_GETDBREGS, child, &db, 0) != -1); - - /* - * Set up the dbregs. We put the 0x5000 address in DR0. - * It means that, the first time we touch this, the CPU will trigger a - * #DB exception. - */ - db.dr[0] = CVE_2018_8897_PAGE; - db.dr[7] = 0x30003; - - DPRINTF("Call SETDBREGS for the child process\n"); - SYSCALL_REQUIRE(ptrace(PT_SETDBREGS, child, &db, 0) != -1); - - DPRINTF("Before resuming the child process where it left off and " - "without signal to be sent\n"); - SYSCALL_REQUIRE(ptrace(PT_CONTINUE, child, (void *)1, 0) != -1); - - DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME); - TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child); - - // In this test we receive SIGFPE, is this appropriate? -// validate_status_stopped(status, SIGFPE); - - DPRINTF("Kill the child process\n"); - SYSCALL_REQUIRE(ptrace(PT_KILL, child, NULL, 0) != -1); - - DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME); - TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child); - - validate_status_signaled(status, SIGKILL, 0); - - DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME); - TWAIT_REQUIRE_FAILURE(ECHILD, wpid = TWAIT_GENERIC(child, &status, 0)); -} - -#undef CVE_2018_8897_PAGE - -/// ---------------------------------------------------------------------------- #define ATF_TP_ADD_TCS_PTRACE_WAIT_AMD64() \ - ATF_TP_ADD_TC_HAVE_GPREGS(tp, x86_64_regs1); \ - ATF_TP_ADD_TC_HAVE_GPREGS(tp, x86_64_cve_2018_8897); + ATF_TP_ADD_TC_HAVE_GPREGS(tp, x86_64_regs1); #else #define ATF_TP_ADD_TCS_PTRACE_WAIT_AMD64() #endif Index: src/tests/lib/libc/sys/t_ptrace_x86_wait.h diff -u src/tests/lib/libc/sys/t_ptrace_x86_wait.h:1.6 src/tests/lib/libc/sys/t_ptrace_x86_wait.h:1.7 --- src/tests/lib/libc/sys/t_ptrace_x86_wait.h:1.6 Sun May 13 23:01:25 2018 +++ src/tests/lib/libc/sys/t_ptrace_x86_wait.h Sat May 26 20:27:48 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: t_ptrace_x86_wait.h,v 1.6 2018/05/13 23:01:25 kamil Exp $ */ +/* $NetBSD: t_ptrace_x86_wait.h,v 1.7 2018/05/26 20:27:48 kamil Exp $ */ /*- * Copyright (c) 2016 The NetBSD Foundation, Inc. @@ -1956,6 +1956,238 @@ ATF_TC_BODY(dbregs_dr3_dont_inherit_exec { dbregs_dont_inherit_execve(3); } + +/// ---------------------------------------------------------------------------- + +ATF_TC(x86_cve_2018_8897); +ATF_TC_HEAD(x86_cve_2018_8897, tc) +{ + atf_tc_set_md_var(tc, "descr", + "Verify mitigation for CVE-2018-8897 (POP SS debug exception)"); +} + +#define X86_CVE_2018_8897_PAGE 0x5000 /* page addressable by 32-bit registers */ + +static void +x86_cve_2018_8897_trigger(void) +{ + /* + * A function to trigger the POP SS (CVE-2018-8897) vulnerability + * + * ifdef __x86_64__ + * + * We need to switch to 32-bit mode execution on 64-bit kernel. + * This is achieved with far jump instruction and GDT descriptor + * set to 32-bit CS selector. The 32-bit CS selector is kernel + * specific, in the NetBSD case registered as GUCODE32_SEL + * that is equal to (14 (decimal) << 3) with GDT and user + * privilege level (this makes it 0x73). + * + * In UNIX as(1) assembly x86_64 far jump is coded as ljmp. + * amd64 ljmp requires an indirect address with cs:RIP. + * + * When we are running in 32-bit mode, it's similar to the + * mode as if the binary had been launched in netbsd32. + * + * There are two versions of this exploit, one with RIP + * relative code and the other with static addresses. + * The first one is PIE code aware, the other no-PIE one. + * + * + * After switching to the 32-bit mode we can move on to the remaining + * part of the exploit. + * + * endif // __x86_64__ + * + * Set the stack pointer to the page we allocated earlier. Remember + * that we put an SS selector exactly at this address, so we can pop. + * + * movl $0x5000,%esp + * + * Pop the SS selector off the stack. This reloads the SS selector, + * which is fine. Remember that we set DR0 at address 0x5000, which + * we are now reading. Therefore, on this instruction, the CPU will + * raise a #DB exception. + * + * But the "pop %ss" instruction is special: it blocks exceptions + * until the next instruction is executed. So the #DB that we just + * raised is actually blocked. + * + * pop %ss + * + * We are still here, and didn't receive the #DB. After we execute + * this instruction, the effect of "pop %ss" will disappear, and + * we will receive the #DB for real. + * + * int $4 + * + * Here the bug happens. We executed "int $4", so we entered the + * kernel, with interrupts disabled. The #DB that was pending is + * received. But, it is received immediately in kernel mode, and is + * _NOT_ received when interrupts are enabled again. + * + * It means that, in the first instruction of the $4 handler, we + * think we are safe with interrupts disabled. But we aren't, and + * just got interrupted. + * + * The new interrupt handler doesn't handle this particular context: + * we are entered in kernel mode, the previous context was kernel + * mode too but it still had the user context loaded. + * + * We find ourselves not doing a 'swapgs'. At the end of the day, it + * means that we call trap() with a curcpu() that is fully + * controllable by userland. From then on, it is easy to escalate + * privileges. + * + * With SVS it also means we don't switch CR3, so this results in a + * triple fault, which this time cannot be turned to a privilege + * escalation. + */ + +#if __x86_64__ +#if __PIE__ + void *csRIP; + + csRIP = malloc(sizeof(int) + sizeof(short)); + FORKEE_ASSERT(csRIP != NULL); + + __asm__ __volatile__( + " leal 24(%%eip), %%eax\n\t" + " movq %0, %%rdx\n\t" + " movl %%eax, (%%rdx)\n\t" + " movw $0x73, 4(%%rdx)\n\t" + " movq %1, %%rax\n\t" + " ljmp *(%%rax)\n\t" + " .code32\n\t" + " movl $0x5000, %%esp\n\t" + " pop %%ss\n\t" + " int $4\n\t" + " .code64\n\t" + : "=m"(csRIP) + : "m"(csRIP) + : "%rax", "%rdx", "%rsp" + ); +#else /* !__PIE__ */ + __asm__ __volatile__( + " movq $farjmp32, %%rax\n\t" + " ljmp *(%%rax)\n\t" + "farjmp32:\n\t" + " .long trigger32\n\t" + " .word 0x73\n\t" + " .code32\n\t" + "trigger32:\n\t" + " movl $0x5000, %%esp\n\t" + " pop %%ss\n\t" + " int $4\n\t" + " .code64\n\t" + : + : + : "%rax", "%rsp" + ); +#endif +#elif __i386__ + __asm__ __volatile__( + "movl $0x5000, %%esp\n\t" + "pop %%ss\n\t" + "int $4\n\t" + : + : + : "%esp" + ); +#endif +} + +ATF_TC_BODY(x86_cve_2018_8897, tc) +{ + const int sigval = SIGSTOP; + pid_t child, wpid; +#if defined(TWAIT_HAVE_STATUS) + int status; +#endif + char *trap_page; + struct dbreg db; + + + if (!can_we_set_dbregs()) { + atf_tc_skip("Either run this test as root or set sysctl(3) " + "security.models.extensions.user_set_dbregs to 1"); + } + + DPRINTF("Before forking process PID=%d\n", getpid()); + SYSCALL_REQUIRE((child = fork()) != -1); + if (child == 0) { + DPRINTF("Before calling PT_TRACE_ME from child %d\n", getpid()); + FORKEE_ASSERT(ptrace(PT_TRACE_ME, 0, NULL, 0) != -1); + + trap_page = mmap((void *)X86_CVE_2018_8897_PAGE, + sysconf(_SC_PAGESIZE), PROT_READ|PROT_WRITE, + MAP_FIXED|MAP_ANON|MAP_PRIVATE, -1, 0); + + /* trigger page fault */ + memset(trap_page, 0, sysconf(_SC_PAGESIZE)); + + // kernel GDT +#if __x86_64__ + /* SS selector (descriptor 9 (0x4f >> 3)) */ + *trap_page = 0x4f; +#elif __i386__ + /* SS selector (descriptor 4 (0x23 >> 3)) */ + *trap_page = 0x23; +#endif + + DPRINTF("Before raising %s from child\n", strsignal(sigval)); + FORKEE_ASSERT(raise(sigval) == 0); + + x86_cve_2018_8897_trigger(); + + /* NOTREACHED */ + FORKEE_ASSERTX(0 && "This shall not be reached"); + } + DPRINTF("Parent process PID=%d, child's PID=%d\n", getpid(), child); + + DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME); + TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child); + + validate_status_stopped(status, sigval); + + DPRINTF("Call GETDBREGS for the child process\n"); + SYSCALL_REQUIRE(ptrace(PT_GETDBREGS, child, &db, 0) != -1); + + /* + * Set up the dbregs. We put the 0x5000 address in DR0. + * It means that, the first time we touch this, the CPU will trigger a + * #DB exception. + */ + db.dr[0] = X86_CVE_2018_8897_PAGE; + db.dr[7] = 0x30003; + + DPRINTF("Call SETDBREGS for the child process\n"); + SYSCALL_REQUIRE(ptrace(PT_SETDBREGS, child, &db, 0) != -1); + + DPRINTF("Before resuming the child process where it left off and " + "without signal to be sent\n"); + SYSCALL_REQUIRE(ptrace(PT_CONTINUE, child, (void *)1, 0) != -1); + + DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME); + TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child); + + // In this test we receive SIGFPE, is this appropriate? +// validate_status_stopped(status, SIGFPE); + + DPRINTF("Kill the child process\n"); + SYSCALL_REQUIRE(ptrace(PT_KILL, child, NULL, 0) != -1); + + DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME); + TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child); + + validate_status_signaled(status, SIGKILL, 0); + + DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME); + TWAIT_REQUIRE_FAILURE(ECHILD, wpid = TWAIT_GENERIC(child, &status, 0)); +} + +/// ---------------------------------------------------------------------------- + #define ATF_TP_ADD_TCS_PTRACE_WAIT_X86() \ ATF_TP_ADD_TC_HAVE_DBREGS(tp, dbregs_print); \ ATF_TP_ADD_TC_HAVE_DBREGS(tp, dbregs_preserve_dr0); \ @@ -2017,7 +2249,8 @@ ATF_TC_BODY(dbregs_dr3_dont_inherit_exec ATF_TP_ADD_TC_HAVE_DBREGS(tp, dbregs_dr0_dont_inherit_execve); \ ATF_TP_ADD_TC_HAVE_DBREGS(tp, dbregs_dr1_dont_inherit_execve); \ ATF_TP_ADD_TC_HAVE_DBREGS(tp, dbregs_dr2_dont_inherit_execve); \ - ATF_TP_ADD_TC_HAVE_DBREGS(tp, dbregs_dr3_dont_inherit_execve); + ATF_TP_ADD_TC_HAVE_DBREGS(tp, dbregs_dr3_dont_inherit_execve); \ + ATF_TP_ADD_TC_HAVE_DBREGS(tp, x86_cve_2018_8897); #else #define ATF_TP_ADD_TCS_PTRACE_WAIT_X86() #endif