[osv-dev] [PATCH V2] Support local-exec TLS access

Waldemar Kozaczuk Wed, 26 Jun 2019 21:58:21 -0700

This patch enhances OSv dynamic loader to support
pies and position dependant executables that use TLS
(Thread Local Storage) in local-exec mode.


It does so by reserving an extra slot in kernel static
TLS block at its end and designating it as user static TLS
for the executable ELF. Any dependant ELF objects are still
places in the are before the kernel TLS. The specifics please
read comments added to arch-elf.cc and arch-switch.hh.

Please note that this solution limits the size of the application
ELF block to 64 bytes plus extra gap due to 64-bytes alignment
of the kernel TLS. This should be sufficient for most applications
which either use tiny TLS (Golang uses 8-bytes long) if at all.
Rust ELFs tend to rely on quite large TLS in which case the limit
in loader.ld needs to be increased accordingly and loader.elf
relinked.

Fixes #352

Signed-off-by: Waldemar Kozaczuk <jwkozac...@gmail.com>
---
 arch/x64/arch-elf.cc    |  65 +++++++-
 arch/x64/arch-elf.hh    |   2 +-
 arch/x64/arch-switch.hh |  68 +++++++--
 arch/x64/loader.ld      |   4 +
 core/elf.cc             |  26 +++-
 include/osv/elf.hh      |   7 +-
 include/osv/mmu.hh_maps | 322 ++++++++++++++++++++++++++++++++++++++++
 modules/tests/Makefile  |  14 +-
 tests/libtls.cc         |   7 +
 tests/tst-tls.cc        |  12 +-
 10 files changed, 495 insertions(+), 32 deletions(-)
 create mode 100644 include/osv/mmu.hh_maps

diff --git a/arch/x64/arch-elf.cc b/arch/x64/arch-elf.cc
index 82c60eb1..ffd98fe1 100644
--- a/arch/x64/arch-elf.cc
+++ b/arch/x64/arch-elf.cc
@@ -12,6 +12,8 @@
 
 namespace elf {
 
+// This function is solely used to relocate symbols in OSv kernel ELF
+// and is indirectly called by loader premain() function
 bool arch_init_reloc_dyn(struct init_table *t, u32 type, u32 sym,
                          void *addr, void *base, Elf64_Sxword addend)
 {
@@ -33,7 +35,7 @@ bool arch_init_reloc_dyn(struct init_table *t, u32 type, u32 
sym,
     case R_X86_64_GLOB_DAT:
         *static_cast<u64*>(addr) = t->dyn_tabs.lookup(sym)->st_value;
         break;
-    case R_X86_64_DPTMOD64:
+    case R_X86_64_DTPMOD64:
         abort();
         //*static_cast<u64*>(addr) = symbol_module(sym);
         break;
@@ -53,6 +55,10 @@ bool arch_init_reloc_dyn(struct init_table *t, u32 type, u32 
sym,
     return true;
 }
 
+//
+// This method is used when relocating symbols in all ELF objects
+// except for OSv kernel ELF itself which is relocated by
+// the function arch_init_reloc_dyn() above
 bool object::arch_relocate_rela(u32 type, u32 sym, void *addr,
                                 Elf64_Sxword addend)
 {
@@ -74,23 +80,58 @@ bool object::arch_relocate_rela(u32 type, u32 sym, void 
*addr,
     case R_X86_64_GLOB_DAT:
         *static_cast<void**>(addr) = symbol(sym).relocated_addr();
         break;
-    case R_X86_64_DPTMOD64:
+    // The next 3 types are intended to relocate symbols of thread local 
variables
+    // defined with __thread modifier
+    //
+    // Please note that thread local variables accessed in so called 
local-exec mode
+    // are never relocated as their negative offsets relative to the TCB 
address in FS register,
+    // are placed by static linker into the final code as in this example:
+    //    mov %fs:0xfffffffffffffffc,%eax
+    //
+    case R_X86_64_DTPMOD64:
+        // This type and next R_X86_64_DTPOFF64 are intended to prepare 
execution of __tls_get_addr()
+        // which provides dynamic access of thread local variable
+        // This calculates the module index of the ELF containing the variable
         if (sym == STN_UNDEF) {
+            // The thread-local variable being accessed is within
+            // the SAME shared object as the caller
             *static_cast<u64*>(addr) = _module_index;
+            // No need to calculate the offset to the beginning
         } else {
-            *static_cast<u64*>(addr) = symbol(sym).obj->_module_index;
+            // The thread-local variable being accessed is located
+            // in DIFFERENT shared object that the caller
+            *static_cast<u64*>(addr) = symbol(sym).obj->module_index();
         }
         break;
     case R_X86_64_DTPOFF64:
+        // The thread-local variable being accessed is located
+        // in DIFFERENT shared object that the caller
         *static_cast<u64*>(addr) = symbol(sym).symbol->st_value;
         break;
     case R_X86_64_TPOFF64:
+        // This type is intended to resolve symbols of thread-local variables 
in static TLS
+        // accessed in initial-exec mode and is handled to calculate the 
virtual address of
+        // target thread-local variable
         if (sym) {
             auto sm = symbol(sym);
-            sm.obj->alloc_static_tls();
-            auto tls_offset = sm.obj->static_tls_end() + 
sched::kernel_tls_size();
+            ulong tls_offset;
+            if (sm.obj->is_executable()) {
+                tls_offset = sm.obj->get_tls_size();
+                // If this is an executable (pie or position-dependant one)
+                // then the variable is located in the reserved slot of the TLS
+                // right where the kernel TLS lives
+                // So the offset is negative size of this ELF TLS block
+            } else {
+                // If shared library, the variable is located in one of TLS
+                // blocks that are part of the static TLS before kernel part
+                // so the offset needs to shift by sum of kernel and size of 
the user static
+                // TLS so far
+                sm.obj->alloc_static_tls();
+                tls_offset = sm.obj->static_tls_end() + 
sched::kernel_tls_size();
+            }
             *static_cast<u64*>(addr) = sm.symbol->st_value + addend - 
tls_offset;
         } else {
+            // TODO: Which case does this handle?
             alloc_static_tls();
             auto tls_offset = static_tls_end() + sched::kernel_tls_size();
             *static_cast<u64*>(addr) = addend - tls_offset;
@@ -126,7 +167,19 @@ void object::prepare_initial_tls(void* buffer, size_t size,
     memset(ptr + _tls_init_size, 0, _tls_uninit_size);
 
     offsets.resize(std::max(_module_index + 1, offsets.size()));
-    offsets[_module_index] = - _static_tls_offset - tls_size - 
sched::kernel_tls_size();
+    auto offset = - _static_tls_offset - tls_size - sched::kernel_tls_size();
+    offsets[_module_index] = offset;
+}
+
+void object::prepare_local_tls(std::vector<ptrdiff_t>& offsets)
+{
+    if (!_static_tls && !is_executable()) {
+        return;
+    }
+
+    offsets.resize(std::max(_module_index + 1, offsets.size()));
+    auto offset = - get_tls_size();
+    offsets[_module_index] = offset;
 }
 
 }
diff --git a/arch/x64/arch-elf.hh b/arch/x64/arch-elf.hh
index 2c347777..1811ceb5 100644
--- a/arch/x64/arch-elf.hh
+++ b/arch/x64/arch-elf.hh
@@ -18,7 +18,7 @@ enum {
     R_X86_64_PC16 = 13, //  word16 S + A - P
     R_X86_64_8 = 14, //  word8 S + A
     R_X86_64_PC8 = 15, //  word8 S + A - P
-    R_X86_64_DPTMOD64 = 16, //  word64
+    R_X86_64_DTPMOD64 = 16, //  word64
     R_X86_64_DTPOFF64 = 17, //  word64
     R_X86_64_TPOFF64 = 18, //  word64
     R_X86_64_TLSGD = 19, //  word32
diff --git a/arch/x64/arch-switch.hh b/arch/x64/arch-switch.hh
index 88bef949..cd1acb83 100644
--- a/arch/x64/arch-switch.hh
+++ b/arch/x64/arch-switch.hh
@@ -151,33 +151,83 @@ void thread::init_stack()
 }
 
 void thread::setup_tcb()
-{
-    assert(tls.size);
+{   //
+    // Most importantly this method allocates TLS memory region and
+    // sets up TCB (Thread Control Block) that points to that allocated
+    // memory region. The TLS memory region is designated to a specific thread
+    // and holds thread local variables (with __thread modifier) defined
+    // in OSv kernel and the application ELF objects including dependant ones
+    // through DT_NEEDED tag.
+    //
+    // Each ELF object and OSv kernel gets its own TLS block with offsets
+    // specified in DTV structure (the offsets get calculated as ELF is loaded 
and symbols
+    // resolved before we get to this point).
+    //
+    // Because both OSv kernel and position-in-dependant (pie) or 
position-dependant
+    // executable (non library) are compiled to use local-exec mode to access 
the thread
+    // local variables, we need to setup the offsets and TLS blocks in a 
special way
+    // to avoid any collisions. Specifically we define OSv TLS segment
+    // (see arch/x64/loader.ld for specifics) with an extra buffer at
+    // the end of the kernel TLS to accommodate TLS block of pies and
+    // position-dependant executables.
+
+    // (1) - TLS memory area layout with app shared library
+    // |-----|-----|-----|--------------|------|
+    // |SO_3 |SO_2 |SO_1 |KERNEL        |<NONE>|
+    // |-----|-----|-----|--------------|------|
+
+    // (2) - TLS memory area layout with pie or
+    // position dependant executable
+    //       |-----|-----|---------------------|
+    //       |SO_3 |SO_2 |KERNEL        | EXE  |
+    //       |-----|-----|--------------|------|
+
+    assert(sched::tls.size);
 
     void* user_tls_data;
     size_t user_tls_size = 0;
+    void* executable_tls_data;
+    size_t executable_tls_size = 0;
     if (_app_runtime) {
         auto obj = _app_runtime->app.lib();
         assert(obj);
         user_tls_size = obj->initial_tls_size();
         user_tls_data = obj->initial_tls();
+        if (obj->is_executable()) {
+           executable_tls_size = obj->get_tls_size();
+           executable_tls_data = obj->get_tls_segment();
+        }
     }
 
     // In arch/x64/loader.ld, the TLS template segment is aligned to 64
     // bytes, and that's what the objects placed in it assume. So make
     // sure our copy is allocated with the same 64-byte alignment, and
     // verify that object::init_static_tls() ensured that user_tls_size
-    // also doesn't break this alignment .
-    assert(align_check(tls.size, (size_t)64));
+    // also doesn't break this alignment.
+    auto kernel_tls_size = sched::tls.size;
+    assert(align_check(kernel_tls_size, (size_t)64));
     assert(align_check(user_tls_size, (size_t)64));
-    void* p = aligned_alloc(64, sched::tls.size + user_tls_size + 
sizeof(*_tcb));
+
+    auto total_tls_size = kernel_tls_size + user_tls_size;
+    void* p = aligned_alloc(64, total_tls_size + sizeof(*_tcb));
+    // First goes user TLS data
     if (user_tls_size) {
         memcpy(p, user_tls_data, user_tls_size);
     }
-    memcpy(p + user_tls_size, sched::tls.start, sched::tls.filesize);
-    memset(p + user_tls_size + sched::tls.filesize, 0,
-           sched::tls.size - sched::tls.filesize);
-    _tcb = static_cast<thread_control_block*>(p + tls.size + user_tls_size);
+    // Next goes kernel TLS data
+    auto kernel_tls_offset = user_tls_size;
+    memcpy(p + kernel_tls_offset, sched::tls.start, sched::tls.filesize);
+    memset(p + kernel_tls_offset + sched::tls.filesize, 0,
+           kernel_tls_size - sched::tls.filesize);
+
+    if (executable_tls_size) {
+        // If executable copy its TLS block data at the designated offset
+        // at the end of area as described in the ascii art for executables
+        // TLS layout
+        auto executable_tls_offset = total_tls_size - executable_tls_size;
+        memcpy(p + executable_tls_offset, executable_tls_data, 
executable_tls_size);
+    }
+    _tcb = static_cast<thread_control_block*>(p + total_tls_size);
     _tcb->self = _tcb;
     _tcb->tls_base = p + user_tls_size;
 
diff --git a/arch/x64/loader.ld b/arch/x64/loader.ld
index 8b82b1bb..6af67ef4 100644
--- a/arch/x64/loader.ld
+++ b/arch/x64/loader.ld
@@ -83,7 +83,11 @@ SECTIONS
     .tdata : AT(ADDR(.tdata) - OSV_KERNEL_VM_SHIFT) { *(.tdata .tdata.* 
.gnu.linkonce.td.*) } :tls :text
     .tbss : AT(ADDR(.tbss) - OSV_KERNEL_VM_SHIFT) {
         *(.tbss .tbss.* .gnu.linkonce.tb.*)
+        _pie_static_tls_start = .;
+        /* This is a reserve intended for executables' (pie or non-pie) TLS 
block */
+        . = . + 64;
         . = ALIGN(64);
+        _pie_static_tls_end = .;
     } :tls :text
     .tls_template_size = SIZEOF(.tdata) + SIZEOF(.tbss);
     .bss : AT(ADDR(.bss) - OSV_KERNEL_VM_SHIFT) { *(.bss .bss.*) } :text
diff --git a/core/elf.cc b/core/elf.cc
index 42fe07d9..196f3405 100644
--- a/core/elf.cc
+++ b/core/elf.cc
@@ -403,6 +403,7 @@ Elf64_Note::Elf64_Note(void *_base, char *str)
     }
 }
 
+extern "C" char _pie_static_tls_start, _pie_static_tls_end;
 void object::load_segments()
 {
     for (unsigned i = 0; i < _ehdr.e_phnum; ++i) {
@@ -470,9 +471,13 @@ void object::load_segments()
     // As explained in issue #352, we currently don't correctly support TLS
     // used in PIEs.
     if (_is_executable && _tls_segment) {
-        std::cout << "WARNING: " << pathname() << " is a PIE using TLS. This "
-                  << "is currently unsupported (see issue #352). Link with "
-                  << "'-shared' instead of '-pie'.\n";
+        auto tls_size = _tls_init_size + _tls_uninit_size;
+        ulong pie_static_tls_maximum_size = &_pie_static_tls_end - 
&_pie_static_tls_start;
+        if (tls_size > pie_static_tls_maximum_size) {
+            std::cout << "WARNING: " << pathname() << " is a PIE using TLS of 
size " << tls_size
+                  << " which is greater than " << pie_static_tls_maximum_size 
<< " bytes limit. "
+                  << "Either increase the limit or link with '-shared' instead 
of '-pie'.\n";
+        }
     }
 }
 
@@ -1089,9 +1094,9 @@ void object::init_static_tls()
         }
         static_tls |= obj->_static_tls;
         _initial_tls_size = std::max(_initial_tls_size, obj->static_tls_end());
-       // Align initial_tls_size to 64 bytes, to not break the 64-byte
-       // alignment of the TLS segment defined in loader.ld.
-       _initial_tls_size = align_up(_initial_tls_size, (size_t)64);
+        // Align initial_tls_size to 64 bytes, to not break the 64-byte
+        // alignment of the TLS segment defined in loader.ld.
+        _initial_tls_size = align_up(_initial_tls_size, (size_t)64);
     }
     if (!static_tls) {
         _initial_tls_size = 0;
@@ -1103,8 +1108,13 @@ void object::init_static_tls()
         if (obj->is_core()) {
             continue;
         }
-        obj->prepare_initial_tls(_initial_tls.get(), _initial_tls_size,
-                                 _initial_tls_offsets);
+        if (obj->is_executable()) {
+            obj->prepare_local_tls(_initial_tls_offsets);
+        }
+        else {
+            obj->prepare_initial_tls(_initial_tls.get(), _initial_tls_size,
+                                     _initial_tls_offsets);
+        }
     }
 }
 
diff --git a/include/osv/elf.hh b/include/osv/elf.hh
index 775d8c8d..cf79a8a1 100644
--- a/include/osv/elf.hh
+++ b/include/osv/elf.hh
@@ -365,8 +365,11 @@ public:
     void init_static_tls();
     size_t initial_tls_size() { return _initial_tls_size; }
     void* initial_tls() { return _initial_tls.get(); }
+    void* get_tls_segment() { return _tls_segment; }
     bool is_non_pie_executable() { return _ehdr.e_type == ET_EXEC; }
     std::vector<ptrdiff_t>& initial_tls_offsets() { return 
_initial_tls_offsets; }
+    bool is_executable() { return _is_executable; }
+    ulong get_tls_size();
 protected:
     virtual void load_segment(const Elf64_Phdr& segment) = 0;
     virtual void unload_segment(const Elf64_Phdr& segment) = 0;
@@ -391,9 +394,9 @@ private:
     void relocate_rela();
     void relocate_pltgot();
     unsigned symtab_len();
-    ulong get_tls_size();
     void collect_dependencies(std::unordered_set<elf::object*>& ds);
     void prepare_initial_tls(void* buffer, size_t size, 
std::vector<ptrdiff_t>& offsets);
+    void prepare_local_tls(std::vector<ptrdiff_t>& offsets);
     void alloc_static_tls();
     void make_text_writable(bool flag);
 protected:
@@ -440,7 +443,7 @@ protected:
                             Elf64_Sxword addend);
     bool arch_relocate_jump_slot(u32 sym, void *addr, Elf64_Sxword addend, 
bool ignore_missing = false);
     size_t static_tls_end() {
-        if (is_core()) {
+        if (is_core() || is_executable()) {
             return 0;
         }
         return _static_tls_offset + get_tls_size();
diff --git a/include/osv/mmu.hh_maps b/include/osv/mmu.hh_maps
new file mode 100644
index 00000000..23929c46
--- /dev/null
+++ b/include/osv/mmu.hh_maps
@@ -0,0 +1,322 @@
+/*
+ * Copyright (C) 2013 Cloudius Systems, Ltd.
+ *
+ * This work is open source software, licensed under the terms of the
+ * BSD license as described in the LICENSE file in the top-level directory.
+ */
+
+#ifndef MMU_HH
+#define MMU_HH
+
+#include "fs/fs.hh"
+#include <stdint.h>
+#include <boost/intrusive/set.hpp>
+#include <osv/types.h>
+#include <functional>
+#include <osv/error.h>
+#include <osv/addr_range.hh>
+#include <unordered_map>
+#include <memory>
+#include <osv/mmu-defs.hh>
+#include <osv/align.hh>
+#include <osv/trace.hh>
+
+struct exception_frame;
+class balloon;
+typedef std::shared_ptr<balloon> balloon_ptr;
+
+/**
+ * MMU namespace
+ */
+namespace mmu {
+
+// when we know it was dynamically allocated
+inline phys virt_to_phys_dynamic_phys(void* virt)
+{
+    return static_cast<char*>(virt) - phys_mem;
+}
+
+constexpr inline unsigned pt_index(void *virt, unsigned level)
+{
+    return (reinterpret_cast<ulong>(virt) >> (page_size_shift + level * 
pte_per_page_shift)) & (pte_per_page - 1);
+}
+
+struct page_allocator;
+
+class vma {
+public:
+    vma(addr_range range, unsigned perm, unsigned flags, bool map_dirty, 
page_allocator *page_ops = nullptr);
+    virtual ~vma();
+    void set(uintptr_t start, uintptr_t end);
+    void protect(unsigned perm);
+    uintptr_t start() const;
+    uintptr_t end() const;
+    void* addr() const;
+    uintptr_t size() const;
+    unsigned perm() const;
+    unsigned flags() const;
+    virtual void fault(uintptr_t addr, exception_frame *ef);
+    virtual void split(uintptr_t edge) = 0;
+    virtual error sync(uintptr_t start, uintptr_t end) = 0;
+    virtual int validate_perm(unsigned perm) { return 0; }
+    virtual page_allocator* page_ops();
+    void update_flags(unsigned flag);
+    bool has_flags(unsigned flag);
+    template<typename T> ulong operate_range(T mapper, void *start, size_t 
size);
+    template<typename T> ulong operate_range(T mapper);
+    bool map_dirty();
+    class addr_compare;
+protected:
+    addr_range _range;
+    unsigned _perm;
+    unsigned _flags;
+    bool _map_dirty;
+    page_allocator *_page_ops;
+public:
+    boost::intrusive::set_member_hook<> _vma_list_hook;
+};
+
+class anon_vma : public vma {
+public:
+    anon_vma(addr_range range, unsigned perm, unsigned flags);
+    virtual void split(uintptr_t edge) override;
+    virtual error sync(uintptr_t start, uintptr_t end) override;
+};
+
+class file_vma : public vma {
+public:
+    file_vma(addr_range range, unsigned perm, unsigned flags, fileref file, 
f_offset offset, page_allocator *page_ops);
+    ~file_vma();
+    virtual void split(uintptr_t edge) override;
+    virtual error sync(uintptr_t start, uintptr_t end) override;
+    virtual int validate_perm(unsigned perm);
+    virtual void fault(uintptr_t addr, exception_frame *ef) override;
+    fileref file() const { return _file; }
+    u64 file_inode() const { return _file_inode; }
+    f_offset offset() const { return _offset; }
+private:
+    f_offset offset(uintptr_t addr);
+    fileref _file;
+    u64 _file_inode;
+    f_offset _offset;
+};
+
+ulong map_jvm(unsigned char* addr, size_t size, size_t align, balloon_ptr b);
+
+class jvm_balloon_vma : public vma {
+public:
+    jvm_balloon_vma(unsigned char *jvm_addr, uintptr_t start, uintptr_t end, 
balloon_ptr b, unsigned perm, unsigned flags);
+    virtual ~jvm_balloon_vma();
+    virtual void split(uintptr_t edge) override;
+    virtual error sync(uintptr_t start, uintptr_t end) override;
+    virtual void fault(uintptr_t addr, exception_frame *ef) override;
+    void detach_balloon();
+    unsigned char *jvm_addr() { return _jvm_addr; }
+    unsigned char *effective_jvm_addr() { return _effective_jvm_addr; }
+    bool add_partial(size_t partial, unsigned char *eff);
+    size_t partial() { return _partial_copy; }
+    // Iff we have a partial, the size may be temporarily changed. We keep it 
in a different
+    // variable so don't risk breaking any mmu core code that relies on the 
derived size()
+    // being the same.
+    uintptr_t real_size() const { return _real_size; }
+    friend ulong map_jvm(unsigned char* jvm_addr, size_t size, size_t align, 
balloon_ptr b);
+protected:
+    balloon_ptr _balloon;
+    unsigned char *_jvm_addr;
+private:
+    unsigned char *_effective_jvm_addr = nullptr;
+    uintptr_t _partial_addr = 0;
+    anon_vma *_partial_vma = nullptr;
+    size_t _partial_copy = 0;
+    unsigned _real_perm;
+    unsigned _real_flags;
+    uintptr_t _real_size;
+};
+
+class shm_file final : public special_file {
+    size_t _size;
+    std::unordered_map<uintptr_t, void*> _pages;
+    void* page(uintptr_t hp_off);
+public:
+    shm_file(size_t size, int flags);
+    virtual int stat(struct stat* buf) override;
+    virtual int close() override;
+    virtual std::unique_ptr<file_vma> mmap(addr_range range, unsigned flags, 
unsigned perm, off_t offset) override;
+
+    virtual bool map_page(uintptr_t offset, hw_ptep<0> ptep, pt_element<0> 
pte, bool write, bool shared) override;
+    virtual bool map_page(uintptr_t offset, hw_ptep<1> ptep, pt_element<1> 
pte, bool write, bool shared) override;
+    virtual bool put_page(void *addr, uintptr_t offset, hw_ptep<0> ptep) 
override;
+    virtual bool put_page(void *addr, uintptr_t offset, hw_ptep<1> ptep) 
override;
+};
+
+void* map_file(const void* addr, size_t size, unsigned flags, unsigned perm,
+              fileref file, f_offset offset);
+void* map_anon(const void* addr, size_t size, unsigned flags, unsigned perm);
+
+error munmap(const void* addr, size_t size);
+error mprotect(const void *addr, size_t size, unsigned int perm);
+error msync(const void* addr, size_t length, int flags);
+error mincore(const void *addr, size_t length, unsigned char *vec);
+bool is_linear_mapped(const void *addr, size_t size);
+bool ismapped(const void *addr, size_t size);
+bool isreadable(void *addr, size_t size);
+std::unique_ptr<file_vma> default_file_mmap(file* file, addr_range range, 
unsigned flags, unsigned perm, off_t offset);
+std::unique_ptr<file_vma> map_file_mmap(file* file, addr_range range, unsigned 
flags, unsigned perm, off_t offset);
+
+
+template<int N>
+inline bool pte_is_cow(pt_element<N> pte)
+{
+    return false;
+}
+
+template<>
+inline bool pte_is_cow(pt_element<0> pte)
+{
+    return pte.sw_bit(pte_cow); // only 4k pages can be cow for now
+}
+
+static TRACEPOINT(trace_clear_pte, "ptep=%p, cow=%d, pte=%x", void*, bool, 
uint64_t);
+
+template<int N>
+__attribute__((always_inline)) // Necessary because of issue #1029
+inline pt_element<N> clear_pte(hw_ptep<N> ptep)
+{
+    auto old = ptep.exchange(make_empty_pte<N>());
+    trace_clear_pte(ptep.release(), pte_is_cow(old), old.addr());
+    return old;
+}
+
+template<int N>
+inline bool clear_accessed(hw_ptep<N> ptep)
+{
+    pt_element<N> pte = ptep.read();
+    bool accessed = pte.accessed();
+    if (accessed) {
+        pt_element<N> clear = pte;
+        clear.set_accessed(false);
+        ptep.compare_exchange(pte, clear);
+    }
+    return accessed;
+}
+
+template<int N>
+inline bool clear_dirty(hw_ptep<N> ptep)
+{
+    static_assert(pt_level_traits<N>::leaf_capable::value, "non leaf pte");
+    pt_element<N> pte = ptep.read();
+    bool dirty = pte.dirty();
+    if (dirty) {
+        pt_element<N> clear = pte;
+        clear.set_dirty(false);
+        ptep.compare_exchange(pte, clear);
+    }
+    return dirty;
+}
+
+template<int N>
+inline pt_element<N> make_intermediate_pte(hw_ptep<N> ptep, phys addr)
+{
+    static_assert(pt_level_traits<N>::intermediate_capable::value, "level 0 
pte cannot be intermediate");
+    return make_pte<N>(addr, false);
+}
+
+template<int N>
+inline pt_element<N> make_leaf_pte(hw_ptep<N> ptep, phys addr,
+                                   unsigned perm = perm_rwx,
+                                   mattr mem_attr = mattr_default)
+{   
+    static_assert(pt_level_traits<N>::leaf_capable::value, "non leaf pte");
+    return make_pte<N>(addr, true, perm, mem_attr);
+}
+
+class virt_pte_visitor {
+public:
+    virtual void pte(pt_element<0>) = 0;
+    virtual void pte(pt_element<1>) = 0;
+};
+
+void virt_visit_pte_rcu(uintptr_t virt, virt_pte_visitor& visitor);
+
+template<int N>
+inline bool write_pte(void *addr, hw_ptep<N> ptep, pt_element<N> old_pte, 
pt_element<N> new_pte)
+{
+    new_pte.mod_addr(virt_to_phys(addr));
+    return ptep.compare_exchange(old_pte, new_pte);
+}
+
+template<int N>
+inline bool write_pte(void *addr, hw_ptep<N> ptep, pt_element<N> pte)
+{
+    pte.mod_addr(virt_to_phys(addr));
+    return ptep.compare_exchange(ptep.read(), pte);
+}
+
+pt_element<0> pte_mark_cow(pt_element<0> pte, bool cow);
+
+template <typename OutputFunc>
+inline
+void virt_to_phys(void* vaddr, size_t len, OutputFunc out)
+{
+    if (CONF_debug_memory && vaddr >= debug_base) {
+        while (len) {
+            auto next = std::min(align_down(vaddr + page_size, page_size), 
vaddr + len);
+            size_t delta = static_cast<char*>(next) - 
static_cast<char*>(vaddr);
+            out(virt_to_phys(vaddr), delta);
+            vaddr = next;
+            len -= delta;
+        }
+    } else {
+        out(virt_to_phys(vaddr), len);
+    }
+}
+
+void* phys_to_virt(phys pa);
+
+template <typename T>
+T* phys_cast(phys pa)
+{
+    return static_cast<T*>(phys_to_virt(pa));
+}
+
+inline
+bool is_page_aligned(intptr_t addr)
+{
+    return !(addr & (page_size-1));
+}
+
+inline
+bool is_page_aligned(void* addr)
+{
+    return is_page_aligned(reinterpret_cast<intptr_t>(addr));
+}
+
+// The mattr type is defined differently for each architecture
+// and interpreted by the architecture-specific code, and has
+// an architecture-specific meaning.
+// Currently mem_attr is ignored on x86_64. For aarch64 specifics see
+// definitions in arch/aarch64/arch-mmu.hh
+void linear_map(void* virt, phys addr, size_t size,
+                size_t slop = mmu::page_size,
+                mattr mem_attr = mmu::mattr_default);
+
+void free_initial_memory_range(uintptr_t addr, size_t size);
+void switch_to_runtime_page_tables();
+
+void set_nr_page_sizes(unsigned nr);
+
+void vpopulate(void* addr, size_t size);
+void vdepopulate(void* addr, size_t size);
+void vcleanup(void* addr, size_t size);
+
+error  advise(void* addr, size_t size, int advice);
+
+void vm_fault(uintptr_t addr, exception_frame* ef);
+
+std::string procfs_maps();
+
+unsigned long all_vmas_size();
+
+}
+
+#endif /* MMU_HH */
diff --git a/modules/tests/Makefile b/modules/tests/Makefile
index d8ea6049..3c5bc324 100644
--- a/modules/tests/Makefile
+++ b/modules/tests/Makefile
@@ -119,7 +119,7 @@ tests := tst-pthread.so misc-ramdisk.so tst-vblk.so 
tst-bsd-evh.so \
        tst-sendfile.so misc-lock-perf.so tst-uio.so tst-printf.so \
        tst-pthread-affinity.so tst-pthread-tsd.so tst-thread-local.so \
        tst-zfs-mount.so tst-regex.so tst-tcp-siocoutq.so \
-       libtls.so tst-tls.so tst-select-timeout.so tst-faccessat.so \
+       libtls.so tst-tls.so tst-tls-pie.so tst-select-timeout.so 
tst-faccessat.so \
        tst-fstatat.so misc-reboot.so tst-fcntl.so payload-namespace.so \
        tst-namespace.so tst-without-namespace.so payload-env.so \
        payload-merge-env.so misc-execve.so misc-execve-payload.so 
misc-mutex2.so \
@@ -150,7 +150,17 @@ $(out)/tests/tst-tls.so: \
                $(src)/tests/tst-tls.cc \
                $(out)/tests/libtls.so
        $(makedir)
-       $(call quiet, cd $(out); $(CXX) $(CXXFLAGS) -shared -o $@ $< 
tests/libtls.so, CXX tst-tls.so)
+       $(call quiet, cd $(out); $(CXX) $(CXXFLAGS) -D__SHARED_OBJECT__=1 
-shared -o $@ $< tests/libtls.so, CXX tst-tls.so)
+
+$(out)/tests/tst-tls-pie.o: CXXFLAGS:=$(subst -fPIC,-fpie,$(CXXFLAGS))
+$(out)/tests/tst-tls-pie.o: $(src)/tests/tst-tls.cc
+       $(makedir)
+       $(call quiet, $(CXX) $(CXXFLAGS) -c -o $@ $<, CXX $*.cc)
+$(out)/tests/tst-tls-pie.so: \
+               $(out)/tests/tst-tls-pie.o \
+               $(out)/tests/libtls.so
+       $(makedir)
+       $(call quiet, cd $(out); $(CXX) $(CXXFLAGS) -fuse-ld=bfd -pthread -pie 
-o $@ $< tests/libtls.so, LD tst-tls-pie.so)
 
 boost-tests := tst-vfs.so tst-libc-locking.so misc-fs-stress.so \
        misc-bdev-write.so misc-bdev-wlatency.so misc-bdev-rw.so \
diff --git a/tests/libtls.cc b/tests/libtls.cc
index bf7e7e70..96e380e5 100644
--- a/tests/libtls.cc
+++ b/tests/libtls.cc
@@ -12,9 +12,16 @@ __thread int ex1 = 321;
 __thread int ex2 __attribute__ ((tls_model ("initial-exec"))) = 432;
 __thread int ex3 = 765;
 
+extern __thread int v1;
+extern __thread int v5;
+
 void external_library()
 {
+    // ex1 and ex3 get accessed by _tls_get_addr()
     ex1++;
     ex2++;
     ex3++;
+    // These 2 below get handled by get _tls_get_addr() function in core/elf.cc
+    v1++;
+    v5++;
 }
diff --git a/tests/tst-tls.cc b/tests/tst-tls.cc
index f827aa71..452052c2 100644
--- a/tests/tst-tls.cc
+++ b/tests/tst-tls.cc
@@ -37,7 +37,7 @@ static __thread int v6 __attribute__ ((tls_model 
("initial-exec"))) = 678;
 
 extern __thread int ex3 __attribute__ ((tls_model ("initial-exec")));
 
-#ifndef __OSV__
+#ifndef __SHARED_OBJECT__
 // We can also try to force the "Local Exec" TLS model, but OSv's makefile
 // builds all tests as shared objects (.so), and the linker will report an
 // error, because local-exec is not allowed in shared libraries, just in
@@ -65,7 +65,7 @@ int main(int argc, char** argv)
     report(v5 == 567, "v5");
     report(v6 == 678, "v6");
     report(ex3 == 765, "ex3");
-#ifndef __OSV__
+#ifndef __SHARED_OBJECT__
     report(v7 == 789, "v7");
 #endif
 
@@ -73,6 +73,8 @@ int main(int argc, char** argv)
     report(ex1 == 322, "ex1 modified");
     report(ex2 == 433, "ex2 modified");
     report(ex3 == 766, "ex3 modified");
+    report(v1 == 124, "v1 modified");
+    report(v5 == 568, "v5 modified");
 
     // Write on this thread's variables, and see a new thread gets
     // the original default values
@@ -82,7 +84,7 @@ int main(int argc, char** argv)
     v4 = 0;
     v5 = 0;
     v6 = 0;
-#ifndef __OSV__
+#ifndef __SHARED_OBJECT__
     v7 = 0;
 #endif
 
@@ -97,7 +99,7 @@ int main(int argc, char** argv)
             report(v5 == 567, "v5 in new thread");
             report(v6 == 678, "v6 in new thread");
             report(ex3 == 765, "ex3 in new thread");
-#ifndef __OSV__
+#ifndef __SHARED_OBJECT__
             report(v7 == 789, "v7 in new thread");
 #endif
 
@@ -105,6 +107,8 @@ int main(int argc, char** argv)
             report(ex1 == 322, "ex1 modified in new thread");
             report(ex2 == 433, "ex2 modified in new thread");
             report(ex3 == 766, "ex3 modified in new thread");
+            report(v1 == 124, "v1 modified in new thread");
+            report(v5 == 568, "v5 modified");
     });
     t1.join();
 
-- 
2.20.1

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/20190627045741.30205-1-jwkozaczuk%40gmail.com.
For more options, visit https://groups.google.com/d/optout.

[osv-dev] [PATCH V2] Support local-exec TLS access

Reply via email to