From: Nadav Har'El <n...@scylladb.com> Committer: Waldemar Kozaczuk <jwkozac...@gmail.com> Branch: master
pthread_create() - prefault top of stack This is the original patch from Jan 11, 2021 applied manually to resolve conflicts caused by subsequent changes to the relevant files since. Waldemar Kozaczuk ------------------------------------------------------------------ Since commit 695375f65303e13df1b9de798577ee9a4f8f9892, new threads start with preemption disabled, and then immediately (in thread_main_c()) enable preemption. However, this code runs on the thread's stack, which can be supplied by the user. If this stack is lazily allocated (not pre-populated, with pages to be populated only on first use), we will get a page fault when thread_main_c() begins to run with preemption disabled. This can happen if the application creates a thread stack with mmap() without passing the MAP_STACK option. Note that it is still strongly recommended to pre-populate thread stacks by using MAP_STACK (or MAP_POPULATE), because we still haven't committed a fix for issue #143. However, let's at least not crash immediately and consistently, as we did before this patch. This patch simply reads from the top byte of the user-supplied stack before starting the thread - causing it to be faulted in (if needed) at that time instead of when the thread starts. The first stack page should be enough to start thread_main_c() and get to the point where it re-enables preemption. This patch also includes a test reproducing this issue and demonstrating that it is fixed. Signed-off-by: Nadav Har'El <n...@scylladb.com> --- diff --git a/arch/aarch64/arch-switch.hh b/arch/aarch64/arch-switch.hh --- a/arch/aarch64/arch-switch.hh +++ b/arch/aarch64/arch-switch.hh @@ -55,6 +55,15 @@ void thread::init_stack() if (!stack.begin) { stack.begin = malloc(stack.size); stack.deleter = stack.default_deleter; + } else { + // The thread will run thread_main_c() with preemption disabled + // for a short while (see 695375f65303e13df1b9de798577ee9a4f8f9892) + // so page faults are forbidden - so we need the top of the stack + // to be pre-faulted. When we call malloc() above ourselves above + // we know this is the case, but if the user allocates the stack + // with mmap without MAP_STACK or MAP_POPULATE, this might not be + // the case, so we need to fault it in now, with preemption on. + (void) *((volatile char*)stack.begin + stack.size - 1); } void** stacktop = reinterpret_cast<void**>(stack.begin + stack.size); _state.fp = 0; diff --git a/arch/x64/arch-switch.hh b/arch/x64/arch-switch.hh --- a/arch/x64/arch-switch.hh +++ b/arch/x64/arch-switch.hh @@ -142,6 +142,15 @@ void thread::init_stack() if (!stack.begin) { stack.begin = malloc(stack.size); stack.deleter = stack.default_deleter; + } else { + // The thread will run thread_main_c() with preemption disabled + // for a short while (see 695375f65303e13df1b9de798577ee9a4f8f9892) + // so page faults are forbidden - so we need the top of the stack + // to be pre-faulted. When we call malloc() above ourselves above + // we know this is the case, but if the user allocates the stack + // with mmap without MAP_STACK or MAP_POPULATE, this might not be + // the case, so we need to fault it in now, with preemption on. + (void) *((volatile char*)stack.begin + stack.size - 1); } void** stacktop = reinterpret_cast<void**>(stack.begin + stack.size); _state.rbp = this; diff --git a/modules/tests/Makefile b/modules/tests/Makefile --- a/modules/tests/Makefile +++ b/modules/tests/Makefile @@ -134,7 +134,7 @@ tests := tst-pthread.so misc-ramdisk.so tst-vblk.so tst-bsd-evh.so \ tst-elf-init.so tst-realloc.so tst-setjmp.so \ libtls.so libtls_gold.so tst-tls.so tst-tls-gold.so tst-tls-pie.so \ tst-sigaction.so tst-syscall.so tst-ifaddrs.so tst-getdents.so \ - tst-netlink.so misc-zfs-io.so misc-zfs-arc.so + tst-netlink.so misc-zfs-io.so misc-zfs-arc.so tst-pthread-create.so # libstatic-thread-variable.so tst-static-thread-variable.so \ ifeq ($(arch),x64) diff --git a/tests/tst-pthread-create.cc b/tests/tst-pthread-create.cc --- a/tests/tst-pthread-create.cc +++ b/tests/tst-pthread-create.cc @@ -0,0 +1,66 @@ +/* + * This work is open source software, licensed under the terms of the + * BSD license as described in the LICENSE file in the top-level directory. + */ + +#include <pthread.h> +#include <stdio.h> +#include <stdbool.h> +#include <unistd.h> +#include <atomic> +#include <cstdlib> +#include <sys/mman.h> + +static std::atomic<unsigned> tests_total(0), tests_failed(0); + +void report(const char* name, bool passed) +{ + static const char* status[] = { "FAIL", "PASS" }; + printf("%s: %s\n", status[passed], name); + tests_total += 1; + tests_failed += !passed; +} + +// Test that we can start a thread with an *unpopulated* stack. Because +// we start threads with preemption disabled (see commit +// 695375f65303e13df1b9de798577ee9a4f8f9892), there is the risk that +// the thread will start to run thread_main_c() (which finally enables +// preemption) and immediately need a page fault to populate the stack - +// which is forbidden with preemption disabled. +static bool thread_func_1_ran = false; +static void* thread_func_1(void *) +{ + thread_func_1_ran = true; + return nullptr; +} + +void test_pthread_create_unpopulated_stack() +{ + pthread_t thread; + pthread_attr_t attr; + + pthread_attr_init(&attr); + size_t stacksize = 65536; + // Note MAP_STACK or MAP_POPULATE deliberately missing, so the stack + // is not pre-populated. + void *stackaddr = mmap(nullptr, stacksize, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_ANONYMOUS | MAP_PRIVATE, + -1, 0); + report("mmap", stackaddr != MAP_FAILED); + pthread_attr_setstack(&attr, stackaddr, stacksize); + pthread_create(&thread, &attr, thread_func_1, nullptr); + pthread_join(thread, nullptr); + pthread_attr_destroy(&attr); + report("test_pthread_create_unpopulated_stack", thread_func_1_ran == true); +} + +int main(void) +{ + printf("starting pthread create test\n"); + + test_pthread_create_unpopulated_stack(); + + printf("SUMMARY: %u tests / %u failures\n", tests_total.load(), tests_failed.load()); + return tests_failed == 0 ? 0 : 1; +} -- You received this message because you are subscribed to the Google Groups "OSv Development" group. To unsubscribe from this group and stop receiving emails from it, send an email to osv-dev+unsubscr...@googlegroups.com. To view this discussion on the web visit https://groups.google.com/d/msgid/osv-dev/0000000000009c406e05e75490d1%40google.com.