================
@@ -0,0 +1,424 @@
+//===-- lib/runtime/trampoline.cpp -------------------------------*- 
C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// W^X-compliant trampoline pool implementation.
+//
+// This file implements a runtime trampoline pool that maintains separate
+// memory regions for executable code (RX) and writable data (RW).
+//
+// On Linux the code region transitions RW → RX (never simultaneously W+X).
+// On macOS Apple Silicon the code region uses MAP_JIT with per-thread W^X
+// toggling via pthread_jit_write_protect_np, so the mapping permissions
+// include both W and X but hardware enforces that only one is active at
+// a time on any given thread.
+//
+// Architecture:
+//   - Code region (RX): Contains pre-assembled trampoline stubs that load
+//     callee address and static chain from a paired TDATA entry, then jump
+//     to the callee with the static chain in the appropriate register.
+//   - Data region (RW): Contains TrampolineData entries with {callee_address,
+//     static_chain_address} pairs, one per trampoline slot.
+//   - Free list: Tracks available trampoline slots for O(1) alloc/free.
+//
+// Thread safety: Uses Fortran::runtime::Lock (pthreads on POSIX,
+// CRITICAL_SECTION on Windows) — not std::mutex — to avoid C++ runtime
+// library dependence. A single global lock serializes pool operations.
+// This is a deliberate V1 design choice to keep the initial W^X
+// architectural change minimal. Per-thread lock-free pools are deferred
+// to a future optimization patch.
+//
+// AddressSanitizer note: The trampoline code region is allocated via
+// mmap (not malloc/new), so ASan does not track it. The data region
+// and handles are allocated via malloc (through AllocateMemoryOrCrash),
+// which ASan intercepts normally. No special annotations are needed.
+//
+// See flang/docs/InternalProcedureTrampolines.md for design details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Runtime/trampoline.h"
+#include "flang-rt/runtime/lock.h"
+#include "flang-rt/runtime/memory.h"
+#include "flang-rt/runtime/terminator.h"
+#include "flang-rt/runtime/trampoline.h"
+
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <new> // For placement-new only (no operator new/delete dependency)
+
+// Platform-specific headers for memory mapping.
+#if defined(_WIN32)
+#include <windows.h>
+#else
+#include <sys/mman.h>
+#include <unistd.h>
+#endif
+
+// macOS Apple Silicon requires MAP_JIT and pthread_jit_write_protect_np
+// to create executable memory under the hardened runtime.
+#if defined(__APPLE__) && defined(__aarch64__)
+#include <libkern/OSCacheControl.h>
+#include <pthread.h>
+#endif
+
+// Architecture support check. Stub generators exist only for x86-64 and
+// AArch64. On other architectures the file compiles but the runtime API
+// functions crash with a diagnostic if actually called, so that building
+// flang-rt on e.g. RISC-V or PPC64 never fails.
+#if defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) || \
+    defined(_M_ARM64)
+#define TRAMPOLINE_ARCH_SUPPORTED 1
+#else
+#define TRAMPOLINE_ARCH_SUPPORTED 0
+#endif
+
+namespace Fortran::runtime::trampoline {
+
+/// A handle returned to the caller. Contains enough info to find
+/// both the trampoline stub and its data entry.
+struct TrampolineHandle {
+  void *codePtr; // Pointer to the trampoline stub in the RX region.
+  TrampolineData *dataPtr; // Pointer to the data entry in the RW region.
+  std::size_t slotIndex; // Index in the pool for free-list management.
+};
+
+// Namespace-scope globals following Flang runtime conventions:
+// - Lock is trivially constructible (pthread_mutex_t / CRITICAL_SECTION)
+// - Pool pointer starts null; initialized under lock (double-checked locking)
+class TrampolinePool; // Forward declaration for pointer below.
+static Lock poolLock;
+static TrampolinePool *poolInstance{nullptr};
+
+/// The global trampoline pool.
+class TrampolinePool {
+public:
+  static TrampolinePool &instance() {
+    if (poolInstance) {
+      return *poolInstance;
+    }
+    CriticalSection critical{poolLock};
+    if (poolInstance) {
+      return *poolInstance;
+    }
+    // Allocate pool using malloc + placement new (trivial constructor).
+    Terminator terminator{__FILE__, __LINE__};
+    void *storage = AllocateMemoryOrCrash(terminator, sizeof(TrampolinePool));
+    poolInstance = new (storage) TrampolinePool();
+    return *poolInstance;
+  }
+
+  /// Allocate a trampoline slot and initialize it.
+  TrampolineHandle *allocate(
+      const void *calleeAddress, const void *staticChainAddress) {
+    CriticalSection critical{lock_};
+    ensureInitialized();
+
+    if (freeHead_ == kInvalidIndex) {
+      // Pool exhausted — fixed size by design for V1.
+      // The pool capacity is controlled by FLANG_TRAMPOLINE_POOL_SIZE
+      // (default 1024). Dynamic slab growth can be added in a follow-up
+      // patch if real workloads demonstrate a need for it.
+      Terminator terminator{__FILE__, __LINE__};
+      terminator.Crash("Trampoline pool exhausted (max %zu slots). "
+                       "Set FLANG_TRAMPOLINE_POOL_SIZE to increase.",
+          poolSize_);
+    }
+
+    std::size_t index = freeHead_;
+    freeHead_ = freeList_[index];
+
+    // Initialize the data entry.
+    dataRegion_[index].calleeAddress = calleeAddress;
+    dataRegion_[index].staticChainAddress = staticChainAddress;
+
+    // Create handle using malloc + placement new.
+    Terminator terminator{__FILE__, __LINE__};
+    void *mem = AllocateMemoryOrCrash(terminator, sizeof(TrampolineHandle));
+    auto *handle = new (mem) TrampolineHandle();
+    handle->codePtr =
+        static_cast<char *>(codeRegion_) + index * kTrampolineStubSize;
+    handle->dataPtr = &dataRegion_[index];
+    handle->slotIndex = index;
+
+    return handle;
+  }
+
+  /// Get the callable address of a trampoline.
+  void *getCallableAddress(TrampolineHandle *handle) { return handle->codePtr; 
}
+
+  /// Free a trampoline slot.
+  void free(TrampolineHandle *handle) {
+    CriticalSection critical{lock_};
+
+    std::size_t index = handle->slotIndex;
+
+    // Poison the data entry so that any dangling call through a freed
+    // trampoline traps immediately. We use a non-null, obviously-invalid
+    // address (0xDEAD...) so that the resulting fault is distinguishable
+    // from a null-pointer dereference when debugging.
+    dataRegion_[index].calleeAddress = reinterpret_cast<const void *>(
+        static_cast<uintptr_t>(~uintptr_t{0} - 1));
+    dataRegion_[index].staticChainAddress = nullptr;
+
+    // Return slot to free list.
+    freeList_[index] = freeHead_;
+    freeHead_ = index;
+
+    FreeMemory(handle);
+  }
+
+private:
+  static constexpr std::size_t kInvalidIndex = ~std::size_t{0};
+
+  TrampolinePool() = default;
+
+  void ensureInitialized() {
+    if (initialized_)
+      return;
+    initialized_ = true;
+
+    // Check environment variable for pool size override.
+    // Fixed-size pool by design (V1): avoids complexity of dynamic growth
+    // and re-protection of code pages. The default (1024 slots) is
+    // sufficient for typical Fortran programs. Users can override via:
+    //   export FLANG_TRAMPOLINE_POOL_SIZE=4096
+    poolSize_ = kDefaultPoolSize;
+    if (const char *envSize = std::getenv("FLANG_TRAMPOLINE_POOL_SIZE")) {
----------------
eugeneepshteyn wrote:

This should be documented in 
https://github.com/llvm/llvm-project/blob/main/flang/docs/RuntimeEnvironment.md

https://github.com/llvm/llvm-project/pull/183108
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to