================ @@ -0,0 +1,424 @@ +//===-- lib/runtime/trampoline.cpp -------------------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// W^X-compliant trampoline pool implementation. +// +// This file implements a runtime trampoline pool that maintains separate +// memory regions for executable code (RX) and writable data (RW). +// +// On Linux the code region transitions RW → RX (never simultaneously W+X). +// On macOS Apple Silicon the code region uses MAP_JIT with per-thread W^X +// toggling via pthread_jit_write_protect_np, so the mapping permissions +// include both W and X but hardware enforces that only one is active at +// a time on any given thread. +// +// Architecture: +// - Code region (RX): Contains pre-assembled trampoline stubs that load +// callee address and static chain from a paired TDATA entry, then jump +// to the callee with the static chain in the appropriate register. +// - Data region (RW): Contains TrampolineData entries with {callee_address, +// static_chain_address} pairs, one per trampoline slot. +// - Free list: Tracks available trampoline slots for O(1) alloc/free. +// +// Thread safety: Uses Fortran::runtime::Lock (pthreads on POSIX, +// CRITICAL_SECTION on Windows) — not std::mutex — to avoid C++ runtime +// library dependence. A single global lock serializes pool operations. +// This is a deliberate V1 design choice to keep the initial W^X +// architectural change minimal. Per-thread lock-free pools are deferred +// to a future optimization patch. +// +// AddressSanitizer note: The trampoline code region is allocated via +// mmap (not malloc/new), so ASan does not track it. The data region +// and handles are allocated via malloc (through AllocateMemoryOrCrash), +// which ASan intercepts normally. No special annotations are needed. +// +// See flang/docs/InternalProcedureTrampolines.md for design details. +// +//===----------------------------------------------------------------------===// + +#include "flang/Runtime/trampoline.h" +#include "flang-rt/runtime/lock.h" +#include "flang-rt/runtime/memory.h" +#include "flang-rt/runtime/terminator.h" +#include "flang-rt/runtime/trampoline.h" + +#include <cassert> +#include <cstdint> +#include <cstdlib> +#include <cstring> +#include <new> // For placement-new only (no operator new/delete dependency) + +// Platform-specific headers for memory mapping. +#if defined(_WIN32) +#include <windows.h> +#else +#include <sys/mman.h> +#include <unistd.h> +#endif + +// macOS Apple Silicon requires MAP_JIT and pthread_jit_write_protect_np +// to create executable memory under the hardened runtime. +#if defined(__APPLE__) && defined(__aarch64__) +#include <libkern/OSCacheControl.h> +#include <pthread.h> +#endif + +// Architecture support check. Stub generators exist only for x86-64 and +// AArch64. On other architectures the file compiles but the runtime API +// functions crash with a diagnostic if actually called, so that building +// flang-rt on e.g. RISC-V or PPC64 never fails. +#if defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) || \ + defined(_M_ARM64) +#define TRAMPOLINE_ARCH_SUPPORTED 1 +#else +#define TRAMPOLINE_ARCH_SUPPORTED 0 +#endif + +namespace Fortran::runtime::trampoline { + +/// A handle returned to the caller. Contains enough info to find +/// both the trampoline stub and its data entry. +struct TrampolineHandle { + void *codePtr; // Pointer to the trampoline stub in the RX region. + TrampolineData *dataPtr; // Pointer to the data entry in the RW region. + std::size_t slotIndex; // Index in the pool for free-list management. +}; + +// Namespace-scope globals following Flang runtime conventions: +// - Lock is trivially constructible (pthread_mutex_t / CRITICAL_SECTION) +// - Pool pointer starts null; initialized under lock (double-checked locking) +class TrampolinePool; // Forward declaration for pointer below. +static Lock poolLock; +static TrampolinePool *poolInstance{nullptr}; + +/// The global trampoline pool. +class TrampolinePool { +public: + static TrampolinePool &instance() { + if (poolInstance) { + return *poolInstance; + } + CriticalSection critical{poolLock}; + if (poolInstance) { + return *poolInstance; + } + // Allocate pool using malloc + placement new (trivial constructor). + Terminator terminator{__FILE__, __LINE__}; + void *storage = AllocateMemoryOrCrash(terminator, sizeof(TrampolinePool)); + poolInstance = new (storage) TrampolinePool(); + return *poolInstance; + } + + /// Allocate a trampoline slot and initialize it. + TrampolineHandle *allocate( + const void *calleeAddress, const void *staticChainAddress) { + CriticalSection critical{lock_}; + ensureInitialized(); + + if (freeHead_ == kInvalidIndex) { + // Pool exhausted — fixed size by design for V1. + // The pool capacity is controlled by FLANG_TRAMPOLINE_POOL_SIZE + // (default 1024). Dynamic slab growth can be added in a follow-up + // patch if real workloads demonstrate a need for it. + Terminator terminator{__FILE__, __LINE__}; + terminator.Crash("Trampoline pool exhausted (max %zu slots). " + "Set FLANG_TRAMPOLINE_POOL_SIZE to increase.", + poolSize_); + } + + std::size_t index = freeHead_; + freeHead_ = freeList_[index]; + + // Initialize the data entry. + dataRegion_[index].calleeAddress = calleeAddress; + dataRegion_[index].staticChainAddress = staticChainAddress; + + // Create handle using malloc + placement new. + Terminator terminator{__FILE__, __LINE__}; + void *mem = AllocateMemoryOrCrash(terminator, sizeof(TrampolineHandle)); + auto *handle = new (mem) TrampolineHandle(); + handle->codePtr = + static_cast<char *>(codeRegion_) + index * kTrampolineStubSize; + handle->dataPtr = &dataRegion_[index]; + handle->slotIndex = index; + + return handle; + } + + /// Get the callable address of a trampoline. + void *getCallableAddress(TrampolineHandle *handle) { return handle->codePtr; } + + /// Free a trampoline slot. + void free(TrampolineHandle *handle) { + CriticalSection critical{lock_}; + + std::size_t index = handle->slotIndex; + + // Poison the data entry so that any dangling call through a freed + // trampoline traps immediately. We use a non-null, obviously-invalid + // address (0xDEAD...) so that the resulting fault is distinguishable + // from a null-pointer dereference when debugging. + dataRegion_[index].calleeAddress = reinterpret_cast<const void *>( + static_cast<uintptr_t>(~uintptr_t{0} - 1)); + dataRegion_[index].staticChainAddress = nullptr; + + // Return slot to free list. + freeList_[index] = freeHead_; + freeHead_ = index; + + FreeMemory(handle); + } + +private: + static constexpr std::size_t kInvalidIndex = ~std::size_t{0}; + + TrampolinePool() = default; + + void ensureInitialized() { + if (initialized_) + return; + initialized_ = true; + + // Check environment variable for pool size override. + // Fixed-size pool by design (V1): avoids complexity of dynamic growth + // and re-protection of code pages. The default (1024 slots) is + // sufficient for typical Fortran programs. Users can override via: + // export FLANG_TRAMPOLINE_POOL_SIZE=4096 + poolSize_ = kDefaultPoolSize; + if (const char *envSize = std::getenv("FLANG_TRAMPOLINE_POOL_SIZE")) { ---------------- eugeneepshteyn wrote:
This should be documented in https://github.com/llvm/llvm-project/blob/main/flang/docs/RuntimeEnvironment.md https://github.com/llvm/llvm-project/pull/183108 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
