The hard-coded addresses inside code_gen_alloc only make sense if
we're building an executable that will actually run at the address
we've put into the linker scripts.

When we're building with -fpie, the executable will run at some
random location chosen by the kernel.  We get better placement for
the code_gen_buffer if we allow the kernel to place the memory,
as it will tend to to place it near the executable, based on the
PROT_EXEC bit.

Since code_gen_prologue is always inside the executable, this effect
is easily seen at the end of most TB, with the exit_tb opcode:

Before:
0x40b82024:  mov    $0x7fa97bd5c296,%r10
0x40b8202e:  jmpq   *%r10

After:
0x7f1191ff1024:  jmpq   0x7f119edc0296

Signed-off-by: Richard Henderson <r...@twiddle.net>
---
 exec.c | 127 +++++++++++++++++++++++++++++++----------------------------------
 1 file changed, 60 insertions(+), 67 deletions(-)

diff --git a/exec.c b/exec.c
index bb6aa4a..0ddc07a 100644
--- a/exec.c
+++ b/exec.c
@@ -510,6 +510,14 @@ static uint8_t 
static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
                __attribute__((aligned (CODE_GEN_ALIGN)));
 #endif
 
+/* ??? Should configure for this not list operating systems here.  */
+#if defined(__linux__) \
+    || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
+    || defined(__DragonFly__) || defined(__OpenBSD__) \
+    || defined(__NetBSD__)
+# define USE_MMAP
+#endif
+
 static void code_gen_alloc(unsigned long tb_size)
 {
 #ifdef USE_STATIC_CODE_GEN_BUFFER
@@ -517,6 +525,45 @@ static void code_gen_alloc(unsigned long tb_size)
     code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
     map_exec(code_gen_buffer, code_gen_buffer_size);
 #else
+#ifdef USE_MMAP
+    int flags = MAP_PRIVATE | MAP_ANONYMOUS;
+#endif
+    uintptr_t max_buf = -1, start = 0;
+
+    /* Constrain the size and position of the buffer based on the host cpu.  */
+#if defined(__x86_64__)
+# if !defined(__PIE__) && !defined(__PIC__) && defined(MAP_32BIT)
+    /* Force the memory down into low memory with the executable.
+       Leave the choice of exact location with the kernel.  */
+    flags |= MAP_32BIT;
+    /* Cannot expect to map more than 800MB in low memory.  */
+    max_buf = 800 * 1024 * 1024;
+# else
+    /* Maximum range of direct branches.  */
+    max_buf = 2ul * 1024 * 1024 * 1024;
+# endif
+#elif defined(__sparc__) && HOST_LONG_BITS == 64
+    /* Maximum range of direct branches between TB (via "call").  */
+    max_buf = 2ul * 1024 * 1024 * 1024;
+    start = 0x40000000ul;
+#elif defined(__arm__)
+    /* Keep the buffer no bigger than 16MB to branch between blocks */
+    max_buf = 16 * 1024 * 1024;
+#elif defined(__s390x__)
+    /* Map the buffer so that we can use direct calls and branches.  */
+    /* We have a +- 4GB range on the branches; leave some slop.  */
+    max_buf = 3ul * 1024 * 1024 * 1024;
+    start = 0x90000000ul;
+#endif
+#if defined(__PIE__) || defined(__PIC__)
+    /* Don't bother setting a preferred location if we're building
+       a position-independent executable.  We're more likely to get
+       an address near the main executable if we let the kernel
+       choose the address.  */
+    start = 0;
+#endif
+
+    /* Size the buffer.  */
     code_gen_buffer_size = tb_size;
     if (code_gen_buffer_size == 0) {
 #if defined(CONFIG_USER_ONLY)
@@ -526,81 +573,27 @@ static void code_gen_alloc(unsigned long tb_size)
         code_gen_buffer_size = (unsigned long)(ram_size / 4);
 #endif
     }
-    if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
+    if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE) {
         code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
-    /* The code gen buffer location may have constraints depending on
-       the host cpu and OS */
-#if defined(__linux__) 
-    {
-        int flags;
-        void *start = NULL;
-
-        flags = MAP_PRIVATE | MAP_ANONYMOUS;
-#if defined(__x86_64__)
-        flags |= MAP_32BIT;
-        /* Cannot map more than that */
-        if (code_gen_buffer_size > (800 * 1024 * 1024))
-            code_gen_buffer_size = (800 * 1024 * 1024);
-#elif defined(__sparc__) && HOST_LONG_BITS == 64
-        // Map the buffer below 2G, so we can use direct calls and branches
-        start = (void *) 0x40000000UL;
-        if (code_gen_buffer_size > (512 * 1024 * 1024))
-            code_gen_buffer_size = (512 * 1024 * 1024);
-#elif defined(__arm__)
-        /* Keep the buffer no bigger than 16MB to branch between blocks */
-        if (code_gen_buffer_size > 16 * 1024 * 1024)
-            code_gen_buffer_size = 16 * 1024 * 1024;
-#elif defined(__s390x__)
-        /* Map the buffer so that we can use direct calls and branches.  */
-        /* We have a +- 4GB range on the branches; leave some slop.  */
-        if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
-            code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
-        }
-        start = (void *)0x90000000UL;
-#endif
-        code_gen_buffer = mmap(start, code_gen_buffer_size,
-                               PROT_WRITE | PROT_READ | PROT_EXEC,
-                               flags, -1, 0);
-        if (code_gen_buffer == MAP_FAILED) {
-            fprintf(stderr, "Could not allocate dynamic translator buffer\n");
-            exit(1);
-        }
     }
-#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
-    || defined(__DragonFly__) || defined(__OpenBSD__) \
-    || defined(__NetBSD__)
-    {
-        int flags;
-        void *addr = NULL;
-        flags = MAP_PRIVATE | MAP_ANONYMOUS;
-#if defined(__x86_64__)
-        /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
-         * 0x40000000 is free */
-        flags |= MAP_FIXED;
-        addr = (void *)0x40000000;
-        /* Cannot map more than that */
-        if (code_gen_buffer_size > (800 * 1024 * 1024))
-            code_gen_buffer_size = (800 * 1024 * 1024);
-#elif defined(__sparc__) && HOST_LONG_BITS == 64
-        // Map the buffer below 2G, so we can use direct calls and branches
-        addr = (void *) 0x40000000UL;
-        if (code_gen_buffer_size > (512 * 1024 * 1024)) {
-            code_gen_buffer_size = (512 * 1024 * 1024);
-        }
-#endif
-        code_gen_buffer = mmap(addr, code_gen_buffer_size,
-                               PROT_WRITE | PROT_READ | PROT_EXEC, 
-                               flags, -1, 0);
-        if (code_gen_buffer == MAP_FAILED) {
-            fprintf(stderr, "Could not allocate dynamic translator buffer\n");
-            exit(1);
-        }
+    if (code_gen_buffer_size > max_buf) {
+        code_gen_buffer_size = max_buf;
+    }
+
+#ifdef USE_MMAP
+    code_gen_buffer = mmap((void *)start, code_gen_buffer_size,
+                           PROT_WRITE | PROT_READ | PROT_EXEC,
+                           flags, -1, 0);
+    if (code_gen_buffer == MAP_FAILED) {
+        fprintf(stderr, "Could not allocate dynamic translator buffer\n");
+        exit(1);
     }
 #else
     code_gen_buffer = g_malloc(code_gen_buffer_size);
     map_exec(code_gen_buffer, code_gen_buffer_size);
 #endif
 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
+
     map_exec(code_gen_prologue, sizeof(code_gen_prologue));
     code_gen_buffer_max_size = code_gen_buffer_size -
         (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
-- 
1.7.11.4


Reply via email to