We had a hack for arm and sparc, allocating code_gen_prologue to a special section. Which, honestly does no good under certain cases. We've already got limits on code_gen_buffer_size to ensure that all TBs can use direct branches between themselves; reuse this limit to ensure the prologue is also reachable.
As a bonus, we get to avoid marking a page of the main executable's data segment as executable. Signed-off-by: Richard Henderson <r...@twiddle.net> --- exec.c | 30 +++++++++++------------------- tcg/tcg.h | 2 +- 2 files changed, 12 insertions(+), 20 deletions(-) diff --git a/exec.c b/exec.c index 5e33a3d..8958b28 100644 --- a/exec.c +++ b/exec.c @@ -86,22 +86,7 @@ static int nb_tbs; /* any access to the tbs or the page table must use this lock */ spinlock_t tb_lock = SPIN_LOCK_UNLOCKED; -#if defined(__arm__) || defined(__sparc__) -/* The prologue must be reachable with a direct jump. ARM and Sparc64 - have limited branch ranges (possibly also PPC) so place it in a - section close to code segment. */ -#define code_gen_section \ - __attribute__((__section__(".gen_code"))) \ - __attribute__((aligned (32))) -#elif defined(_WIN32) && !defined(_WIN64) -#define code_gen_section \ - __attribute__((aligned (16))) -#else -#define code_gen_section \ - __attribute__((aligned (32))) -#endif - -uint8_t code_gen_prologue[1024] code_gen_section; +uint8_t *code_gen_prologue; static uint8_t *code_gen_buffer; static size_t code_gen_buffer_size; /* threshold to flush the translated code buffer */ @@ -221,7 +206,7 @@ static int tb_flush_count; static int tb_phys_invalidate_count; #ifdef _WIN32 -static void map_exec(void *addr, long size) +static inline void map_exec(void *addr, long size) { DWORD old_protect; VirtualProtect(addr, size, @@ -229,7 +214,7 @@ static void map_exec(void *addr, long size) } #else -static void map_exec(void *addr, long size) +static inline void map_exec(void *addr, long size) { unsigned long start, end, page_size; @@ -621,7 +606,14 @@ static inline void code_gen_alloc(size_t tb_size) exit(1); } - map_exec(code_gen_prologue, sizeof(code_gen_prologue)); + /* Steal room for the prologue at the end of the buffer. This ensures + (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches + from TB's to the prologue are going to be in range. It also means + that we don't need to mark (additional) portions of the data segment + as executable. */ + code_gen_prologue = code_gen_buffer + code_gen_buffer_size - 1024; + code_gen_buffer_size -= 1024; + code_gen_buffer_max_size = code_gen_buffer_size - (TCG_MAX_OP_SIZE * OPC_BUF_SIZE); code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE; diff --git a/tcg/tcg.h b/tcg/tcg.h index 7bafe0e..45e94f5 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -616,7 +616,7 @@ TCGv_i64 tcg_const_i64(int64_t val); TCGv_i32 tcg_const_local_i32(int32_t val); TCGv_i64 tcg_const_local_i64(int64_t val); -extern uint8_t code_gen_prologue[]; +extern uint8_t *code_gen_prologue; /* TCG targets may use a different definition of tcg_qemu_tb_exec. */ #if !defined(tcg_qemu_tb_exec) -- 1.7.11.7