From: Nadav Har'El <n...@scylladb.com>
Committer: Waldemar Kozaczuk <jwkozac...@gmail.com>
Branch: master

pthread_create() - prefault top of stack

This is the original patch from Jan 11, 2021 applied manually
to resolve conflicts caused by subsequent changes to the relevant
files since.
Waldemar Kozaczuk
------------------------------------------------------------------
Since commit 695375f65303e13df1b9de798577ee9a4f8f9892, new threads start
with preemption disabled, and then immediately (in thread_main_c()) enable
preemption. However, this code runs on the thread's stack, which can be
supplied by the user. If this stack is lazily allocated (not pre-populated,
with pages to be populated only on first use), we will get a page fault
when thread_main_c() begins to run with preemption disabled.

This can happen if the application creates a thread stack with mmap()
without passing the MAP_STACK option.

Note that it is still strongly recommended to pre-populate thread
stacks by using MAP_STACK (or MAP_POPULATE), because we still haven't
committed a fix for issue #143. However, let's at least not crash
immediately and consistently, as we did before this patch.

This patch simply reads from the top byte of the user-supplied stack
before starting the thread - causing it to be faulted in (if needed)
at that time instead of when the thread starts. The first stack page
should be enough to start thread_main_c() and get to the point where
it re-enables preemption.

This patch also includes a test reproducing this issue and demonstrating
that it is fixed.

Signed-off-by: Nadav Har'El <n...@scylladb.com>

---
diff --git a/arch/aarch64/arch-switch.hh b/arch/aarch64/arch-switch.hh
--- a/arch/aarch64/arch-switch.hh
+++ b/arch/aarch64/arch-switch.hh
@@ -55,6 +55,15 @@ void thread::init_stack()
     if (!stack.begin) {
         stack.begin = malloc(stack.size);
         stack.deleter = stack.default_deleter;
+    } else {
+        // The thread will run thread_main_c() with preemption disabled
+        // for a short while (see 695375f65303e13df1b9de798577ee9a4f8f9892)
+        // so page faults are forbidden - so we need the top of the stack
+        // to be pre-faulted. When we call malloc() above ourselves above
+        // we know this is the case, but if the user allocates the stack
+        // with mmap without MAP_STACK or MAP_POPULATE, this might not be
+        // the case, so we need to fault it in now, with preemption on.
+        (void) *((volatile char*)stack.begin + stack.size - 1);
     }
     void** stacktop = reinterpret_cast<void**>(stack.begin + stack.size);
     _state.fp = 0;
diff --git a/arch/x64/arch-switch.hh b/arch/x64/arch-switch.hh
--- a/arch/x64/arch-switch.hh
+++ b/arch/x64/arch-switch.hh
@@ -142,6 +142,15 @@ void thread::init_stack()
     if (!stack.begin) {
         stack.begin = malloc(stack.size);
         stack.deleter = stack.default_deleter;
+    } else {
+        // The thread will run thread_main_c() with preemption disabled
+        // for a short while (see 695375f65303e13df1b9de798577ee9a4f8f9892)
+        // so page faults are forbidden - so we need the top of the stack
+        // to be pre-faulted. When we call malloc() above ourselves above
+        // we know this is the case, but if the user allocates the stack
+        // with mmap without MAP_STACK or MAP_POPULATE, this might not be
+        // the case, so we need to fault it in now, with preemption on.
+        (void) *((volatile char*)stack.begin + stack.size - 1);
     }
     void** stacktop = reinterpret_cast<void**>(stack.begin + stack.size);
     _state.rbp = this;
diff --git a/modules/tests/Makefile b/modules/tests/Makefile
--- a/modules/tests/Makefile
+++ b/modules/tests/Makefile
@@ -134,7 +134,7 @@ tests := tst-pthread.so misc-ramdisk.so tst-vblk.so 
tst-bsd-evh.so \
        tst-elf-init.so tst-realloc.so tst-setjmp.so \
        libtls.so libtls_gold.so tst-tls.so tst-tls-gold.so tst-tls-pie.so \
        tst-sigaction.so tst-syscall.so tst-ifaddrs.so tst-getdents.so \
-       tst-netlink.so misc-zfs-io.so misc-zfs-arc.so
+       tst-netlink.so misc-zfs-io.so misc-zfs-arc.so tst-pthread-create.so
 #      libstatic-thread-variable.so tst-static-thread-variable.so \
 
 ifeq ($(arch),x64)
diff --git a/tests/tst-pthread-create.cc b/tests/tst-pthread-create.cc
--- a/tests/tst-pthread-create.cc
+++ b/tests/tst-pthread-create.cc
@@ -0,0 +1,66 @@
+/*
+ * This work is open source software, licensed under the terms of the
+ * BSD license as described in the LICENSE file in the top-level directory.
+ */
+
+#include <pthread.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <atomic>
+#include <cstdlib>
+#include <sys/mman.h>
+
+static std::atomic<unsigned> tests_total(0), tests_failed(0);
+
+void report(const char* name, bool passed)
+{
+    static const char* status[] = { "FAIL", "PASS" };
+    printf("%s: %s\n", status[passed], name);
+    tests_total += 1;
+    tests_failed += !passed;
+}
+
+// Test that we can start a thread with an *unpopulated* stack. Because
+// we start threads with preemption disabled (see commit
+// 695375f65303e13df1b9de798577ee9a4f8f9892), there is the risk that
+// the thread will start to run thread_main_c() (which finally enables
+// preemption) and immediately need a page fault to populate the stack -
+// which is forbidden with preemption disabled.
+static bool thread_func_1_ran = false;
+static void* thread_func_1(void *)
+{
+    thread_func_1_ran = true;
+    return nullptr;
+}
+
+void test_pthread_create_unpopulated_stack()
+{
+    pthread_t thread;
+    pthread_attr_t attr;
+
+    pthread_attr_init(&attr);
+    size_t stacksize = 65536;
+    // Note MAP_STACK or MAP_POPULATE deliberately missing, so the stack
+    // is not pre-populated.
+    void *stackaddr = mmap(nullptr, stacksize,  
+        PROT_READ | PROT_WRITE | PROT_EXEC, 
+        MAP_ANONYMOUS | MAP_PRIVATE,
+        -1, 0);
+    report("mmap", stackaddr != MAP_FAILED);
+    pthread_attr_setstack(&attr, stackaddr, stacksize);
+    pthread_create(&thread, &attr, thread_func_1, nullptr);
+    pthread_join(thread, nullptr);
+    pthread_attr_destroy(&attr);
+    report("test_pthread_create_unpopulated_stack", thread_func_1_ran == true);
+}
+
+int main(void)
+{
+    printf("starting pthread create test\n");
+
+    test_pthread_create_unpopulated_stack();
+
+    printf("SUMMARY: %u tests / %u failures\n", tests_total.load(), 
tests_failed.load());
+    return tests_failed == 0 ? 0 : 1;
+}

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/0000000000009c406e05e75490d1%40google.com.

Reply via email to