The hard-coded addresses inside code_gen_alloc only make sense if we're building an executable that will actually run at the address we've put into the linker scripts.
When we're building with -fpie, the executable will run at some random location chosen by the kernel. We get better placement for the code_gen_buffer if we allow the kernel to place the memory, as it will tend to to place it near the executable, based on the PROT_EXEC bit. Since code_gen_prologue is always inside the executable, this effect is easily seen at the end of most TB, with the exit_tb opcode: Before: 0x40b82024: mov $0x7fa97bd5c296,%r10 0x40b8202e: jmpq *%r10 After: 0x7f1191ff1024: jmpq 0x7f119edc0296 Signed-off-by: Richard Henderson <r...@twiddle.net> --- exec.c | 127 +++++++++++++++++++++++++++++++---------------------------------- 1 file changed, 60 insertions(+), 67 deletions(-) diff --git a/exec.c b/exec.c index bb6aa4a..0ddc07a 100644 --- a/exec.c +++ b/exec.c @@ -510,6 +510,14 @@ static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE] __attribute__((aligned (CODE_GEN_ALIGN))); #endif +/* ??? Should configure for this not list operating systems here. */ +#if defined(__linux__) \ + || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \ + || defined(__DragonFly__) || defined(__OpenBSD__) \ + || defined(__NetBSD__) +# define USE_MMAP +#endif + static void code_gen_alloc(unsigned long tb_size) { #ifdef USE_STATIC_CODE_GEN_BUFFER @@ -517,6 +525,45 @@ static void code_gen_alloc(unsigned long tb_size) code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE; map_exec(code_gen_buffer, code_gen_buffer_size); #else +#ifdef USE_MMAP + int flags = MAP_PRIVATE | MAP_ANONYMOUS; +#endif + uintptr_t max_buf = -1, start = 0; + + /* Constrain the size and position of the buffer based on the host cpu. */ +#if defined(__x86_64__) +# if !defined(__PIE__) && !defined(__PIC__) && defined(MAP_32BIT) + /* Force the memory down into low memory with the executable. + Leave the choice of exact location with the kernel. */ + flags |= MAP_32BIT; + /* Cannot expect to map more than 800MB in low memory. */ + max_buf = 800 * 1024 * 1024; +# else + /* Maximum range of direct branches. */ + max_buf = 2ul * 1024 * 1024 * 1024; +# endif +#elif defined(__sparc__) && HOST_LONG_BITS == 64 + /* Maximum range of direct branches between TB (via "call"). */ + max_buf = 2ul * 1024 * 1024 * 1024; + start = 0x40000000ul; +#elif defined(__arm__) + /* Keep the buffer no bigger than 16MB to branch between blocks */ + max_buf = 16 * 1024 * 1024; +#elif defined(__s390x__) + /* Map the buffer so that we can use direct calls and branches. */ + /* We have a +- 4GB range on the branches; leave some slop. */ + max_buf = 3ul * 1024 * 1024 * 1024; + start = 0x90000000ul; +#endif +#if defined(__PIE__) || defined(__PIC__) + /* Don't bother setting a preferred location if we're building + a position-independent executable. We're more likely to get + an address near the main executable if we let the kernel + choose the address. */ + start = 0; +#endif + + /* Size the buffer. */ code_gen_buffer_size = tb_size; if (code_gen_buffer_size == 0) { #if defined(CONFIG_USER_ONLY) @@ -526,81 +573,27 @@ static void code_gen_alloc(unsigned long tb_size) code_gen_buffer_size = (unsigned long)(ram_size / 4); #endif } - if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE) + if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE) { code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE; - /* The code gen buffer location may have constraints depending on - the host cpu and OS */ -#if defined(__linux__) - { - int flags; - void *start = NULL; - - flags = MAP_PRIVATE | MAP_ANONYMOUS; -#if defined(__x86_64__) - flags |= MAP_32BIT; - /* Cannot map more than that */ - if (code_gen_buffer_size > (800 * 1024 * 1024)) - code_gen_buffer_size = (800 * 1024 * 1024); -#elif defined(__sparc__) && HOST_LONG_BITS == 64 - // Map the buffer below 2G, so we can use direct calls and branches - start = (void *) 0x40000000UL; - if (code_gen_buffer_size > (512 * 1024 * 1024)) - code_gen_buffer_size = (512 * 1024 * 1024); -#elif defined(__arm__) - /* Keep the buffer no bigger than 16MB to branch between blocks */ - if (code_gen_buffer_size > 16 * 1024 * 1024) - code_gen_buffer_size = 16 * 1024 * 1024; -#elif defined(__s390x__) - /* Map the buffer so that we can use direct calls and branches. */ - /* We have a +- 4GB range on the branches; leave some slop. */ - if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) { - code_gen_buffer_size = 3ul * 1024 * 1024 * 1024; - } - start = (void *)0x90000000UL; -#endif - code_gen_buffer = mmap(start, code_gen_buffer_size, - PROT_WRITE | PROT_READ | PROT_EXEC, - flags, -1, 0); - if (code_gen_buffer == MAP_FAILED) { - fprintf(stderr, "Could not allocate dynamic translator buffer\n"); - exit(1); - } } -#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \ - || defined(__DragonFly__) || defined(__OpenBSD__) \ - || defined(__NetBSD__) - { - int flags; - void *addr = NULL; - flags = MAP_PRIVATE | MAP_ANONYMOUS; -#if defined(__x86_64__) - /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume - * 0x40000000 is free */ - flags |= MAP_FIXED; - addr = (void *)0x40000000; - /* Cannot map more than that */ - if (code_gen_buffer_size > (800 * 1024 * 1024)) - code_gen_buffer_size = (800 * 1024 * 1024); -#elif defined(__sparc__) && HOST_LONG_BITS == 64 - // Map the buffer below 2G, so we can use direct calls and branches - addr = (void *) 0x40000000UL; - if (code_gen_buffer_size > (512 * 1024 * 1024)) { - code_gen_buffer_size = (512 * 1024 * 1024); - } -#endif - code_gen_buffer = mmap(addr, code_gen_buffer_size, - PROT_WRITE | PROT_READ | PROT_EXEC, - flags, -1, 0); - if (code_gen_buffer == MAP_FAILED) { - fprintf(stderr, "Could not allocate dynamic translator buffer\n"); - exit(1); - } + if (code_gen_buffer_size > max_buf) { + code_gen_buffer_size = max_buf; + } + +#ifdef USE_MMAP + code_gen_buffer = mmap((void *)start, code_gen_buffer_size, + PROT_WRITE | PROT_READ | PROT_EXEC, + flags, -1, 0); + if (code_gen_buffer == MAP_FAILED) { + fprintf(stderr, "Could not allocate dynamic translator buffer\n"); + exit(1); } #else code_gen_buffer = g_malloc(code_gen_buffer_size); map_exec(code_gen_buffer, code_gen_buffer_size); #endif #endif /* !USE_STATIC_CODE_GEN_BUFFER */ + map_exec(code_gen_prologue, sizeof(code_gen_prologue)); code_gen_buffer_max_size = code_gen_buffer_size - (TCG_MAX_OP_SIZE * OPC_BUF_SIZE); -- 1.7.11.4