When resolving data flow we can not emit reload instructions at the end of a basic block. That's because the register we are reloading to might be allocated to another interval at the end of this block. Especially, it can be used in a branch instruction that ends this basic block (see tableswitch).
Solution is to put those invasive moves (mem -> reg) to a per CFG edge blocks. These blocks will be emitted after all other blocks. Signed-off-by: Tomek Grabiec <tgrab...@gmail.com> --- arch/x86/emit-code.c | 59 ++++++++++++-------- arch/x86/include/arch/instruction.h | 5 ++ include/jit/basic-block.h | 13 +++++ include/jit/emit-code.h | 2 +- include/jit/statement.h | 5 ++- jit/basic-block.c | 24 ++++++++ jit/compiler.c | 2 +- jit/emit.c | 62 ++++++++++++++++++---- jit/spill-reload.c | 37 +++++++++++-- jit/statement.c | 3 + jit/switch-bc.c | 10 ++-- regression/jvm/RegisterAllocatorTortureTest.java | 25 +++++++++ 12 files changed, 200 insertions(+), 47 deletions(-) diff --git a/arch/x86/emit-code.c b/arch/x86/emit-code.c index a59523e..48aaf1e 100644 --- a/arch/x86/emit-code.c +++ b/arch/x86/emit-code.c @@ -209,18 +209,27 @@ static long branch_rel_addr(struct insn *insn, unsigned long target_offset) return ret; } -static void __emit_branch(struct buffer *buf, unsigned char prefix, - unsigned char opc, struct insn *insn) +static void __emit_branch(struct buffer *buf, struct basic_block *bb, + unsigned char prefix, unsigned char opc, struct insn *insn) { struct basic_block *target_bb; long addr = 0; + int idx; if (prefix) insn->escaped = true; target_bb = insn->operand.branch_target; - if (target_bb->is_emitted) { + if (!bb) + idx = -1; + else + idx = bb_lookup_successor_index(bb, target_bb); + + if (idx >= 0 && branch_needs_resolvation_block(bb, idx)) { + list_add(&insn->branch_list_node, + &bb->resolvation_blocks[idx].backpatch_insns); + } else if (target_bb->is_emitted) { struct insn *target_insn = list_first_entry(&target_bb->insn_list, struct insn, insn_list_node); @@ -232,39 +241,39 @@ static void __emit_branch(struct buffer *buf, unsigned char prefix, emit_branch_rel(buf, prefix, opc, addr); } -static void emit_je_branch(struct buffer *buf, struct insn *insn) +static void emit_je_branch(struct buffer *buf, struct basic_block *bb, struct insn *insn) { - __emit_branch(buf, 0x0f, 0x84, insn); + __emit_branch(buf, bb, 0x0f, 0x84, insn); } -static void emit_jne_branch(struct buffer *buf, struct insn *insn) +static void emit_jne_branch(struct buffer *buf, struct basic_block *bb, struct insn *insn) { - __emit_branch(buf, 0x0f, 0x85, insn); + __emit_branch(buf, bb, 0x0f, 0x85, insn); } -static void emit_jge_branch(struct buffer *buf, struct insn *insn) +static void emit_jge_branch(struct buffer *buf, struct basic_block *bb, struct insn *insn) { - __emit_branch(buf, 0x0f, 0x8d, insn); + __emit_branch(buf, bb, 0x0f, 0x8d, insn); } -static void emit_jg_branch(struct buffer *buf, struct insn *insn) +static void emit_jg_branch(struct buffer *buf, struct basic_block *bb, struct insn *insn) { - __emit_branch(buf, 0x0f, 0x8f, insn); + __emit_branch(buf, bb, 0x0f, 0x8f, insn); } -static void emit_jle_branch(struct buffer *buf, struct insn *insn) +static void emit_jle_branch(struct buffer *buf, struct basic_block *bb, struct insn *insn) { - __emit_branch(buf, 0x0f, 0x8e, insn); + __emit_branch(buf, bb, 0x0f, 0x8e, insn); } -static void emit_jl_branch(struct buffer *buf, struct insn *insn) +static void emit_jl_branch(struct buffer *buf, struct basic_block *bb, struct insn *insn) { - __emit_branch(buf, 0x0f, 0x8c, insn); + __emit_branch(buf, bb, 0x0f, 0x8c, insn); } -static void emit_jmp_branch(struct buffer *buf, struct insn *insn) +static void emit_jmp_branch(struct buffer *buf, struct basic_block *bb, struct insn *insn) { - __emit_branch(buf, 0x00, 0xe9, insn); + __emit_branch(buf, bb, 0x00, 0xe9, insn); } void emit_lock(struct buffer *buf, struct vm_object *obj) @@ -2788,15 +2797,17 @@ static void emit_two_operands(struct emitter *emitter, struct buffer *buf, struc emit(buf, &insn->src, &insn->dest); } -typedef void (*emit_branch_fn) (struct buffer *, struct insn *); +typedef void (*emit_branch_fn) (struct buffer *, struct basic_block *, struct insn *); -static void emit_branch(struct emitter *emitter, struct buffer *buf, struct insn *insn) +static void emit_branch(struct emitter *emitter, struct buffer *buf, + struct basic_block *bb, struct insn *insn) { emit_branch_fn emit = emitter->emit_fn; - emit(buf, insn); + emit(buf, bb, insn); } -static void __emit_insn(struct buffer *buf, struct insn *insn) +static void __emit_insn(struct buffer *buf, struct basic_block *bb, + struct insn *insn) { struct emitter *emitter; @@ -2812,7 +2823,7 @@ static void __emit_insn(struct buffer *buf, struct insn *insn) emit_two_operands(emitter, buf, insn); break; case BRANCH: - emit_branch(emitter, buf, insn); + emit_branch(emitter, buf, bb, insn); break; default: printf("Oops. No emitter for 0x%x.\n", insn->type); @@ -2820,11 +2831,11 @@ static void __emit_insn(struct buffer *buf, struct insn *insn) }; } -void emit_insn(struct buffer *buf, struct insn *insn) +void emit_insn(struct buffer *buf, struct basic_block *bb, struct insn *insn) { insn->mach_offset = buffer_offset(buf); - __emit_insn(buf, insn); + __emit_insn(buf, bb, insn); } void emit_nop(struct buffer *buf) diff --git a/arch/x86/include/arch/instruction.h b/arch/x86/include/arch/instruction.h index feb8dbe..015f50f 100644 --- a/arch/x86/include/arch/instruction.h +++ b/arch/x86/include/arch/instruction.h @@ -284,6 +284,11 @@ pop_slot_insn(struct stack_slot *to) return memlocal_insn(INSN_POP_MEMLOCAL, to); } +static inline struct insn *jump_insn(struct basic_block *bb) +{ + return branch_insn(INSN_JMP_BRANCH, bb); +} + struct insn *alloc_insn(enum insn_type); void free_insn(struct insn *); diff --git a/include/jit/basic-block.h b/include/jit/basic-block.h index 28ca6b2..3e4cbd9 100644 --- a/include/jit/basic-block.h +++ b/include/jit/basic-block.h @@ -8,6 +8,13 @@ struct compilation_unit; struct insn; struct statement; +struct resolvation_block { + unsigned long addr; + + struct list_head insns; + struct list_head backpatch_insns; +}; + struct basic_block { struct compilation_unit *b_parent; unsigned long start; @@ -30,6 +37,9 @@ struct basic_block { struct expression **mimic_stack_expr; unsigned long mach_offset; + /* Contains data flow resultion blocks for each outgoing CFG edge. */ + struct resolvation_block *resolvation_blocks; + /* The mimic stack is used to simulate JVM operand stack at compile-time. See Section 2.2 ("Lazy code selection") of the paper "Fast, Effective Code Generation in a Just-In-Time Java Compiler" by @@ -76,6 +86,9 @@ int bb_add_successor(struct basic_block *, struct basic_block *); int bb_add_mimic_stack_expr(struct basic_block *, struct expression *); struct statement *bb_remove_last_stmt(struct basic_block *bb); unsigned char *bb_native_ptr(struct basic_block *bb); +void resolvation_block_init(struct resolvation_block *block); +bool branch_needs_resolvation_block(struct basic_block *from, int idx); +int bb_lookup_successor_index(struct basic_block *from, struct basic_block *to); #define for_each_basic_block(bb, bb_list) list_for_each_entry(bb, bb_list, bb_list_node) #define for_each_basic_block_reverse(bb, bb_list) list_for_each_entry_reverse(bb, bb_list, bb_list_node) diff --git a/include/jit/emit-code.h b/include/jit/emit-code.h index f08bee6..d012c36 100644 --- a/include/jit/emit-code.h +++ b/include/jit/emit-code.h @@ -36,7 +36,7 @@ extern void emit_lock_this(struct buffer *); extern void emit_unlock(struct buffer *, struct vm_object *); extern void emit_unlock_this(struct buffer *); extern void emit_body(struct basic_block *, struct buffer *); -extern void emit_insn(struct buffer *buf, struct insn *insn); +extern void emit_insn(struct buffer *, struct basic_block *, struct insn *); extern void emit_nop(struct buffer *buf); extern void backpatch_branch_target(struct buffer *buf, struct insn *insn, unsigned long target_offset); diff --git a/include/jit/statement.h b/include/jit/statement.h index ae4d1fb..8a4694c 100644 --- a/include/jit/statement.h +++ b/include/jit/statement.h @@ -29,6 +29,9 @@ struct tableswitch { uint32_t low; uint32_t high; + /* basic block containing this tableswitch. */ + struct basic_block *src; + union { struct basic_block **bb_lookup_table; @@ -96,7 +99,7 @@ struct statement *alloc_statement(enum statement_type); void free_statement(struct statement *); int stmt_nr_kids(struct statement *); -struct tableswitch *alloc_tableswitch(struct tableswitch_info *, struct compilation_unit *, unsigned long); +struct tableswitch *alloc_tableswitch(struct tableswitch_info *, struct compilation_unit *, struct basic_block *, unsigned long); void free_tableswitch(struct tableswitch *); struct statement *if_stmt(struct basic_block *, enum vm_type, enum binary_operator, struct expression *, struct expression *); diff --git a/jit/basic-block.c b/jit/basic-block.c index f46b632..cc9b038 100644 --- a/jit/basic-block.c +++ b/jit/basic-block.c @@ -84,6 +84,10 @@ void free_basic_block(struct basic_block *bb) free(bb->def_set); free(bb->live_in_set); free(bb->live_out_set); + + if (bb->resolvation_blocks) + free(bb->resolvation_blocks); + free(bb); } @@ -230,3 +234,23 @@ unsigned char *bb_native_ptr(struct basic_block *bb) { return buffer_ptr(bb->b_parent->objcode) + bb->mach_offset; } + +void resolvation_block_init(struct resolvation_block *block) +{ + INIT_LIST_HEAD(&block->insns); + INIT_LIST_HEAD(&block->backpatch_insns); +} + +int bb_lookup_successor_index(struct basic_block *from, struct basic_block *to) +{ + for (unsigned long i = 0; i < from->nr_successors; i++) + if (from->successors[i] == to) + return i; + + return -1; +} + +bool branch_needs_resolvation_block(struct basic_block *from, int idx) +{ + return !list_is_empty(&from->resolvation_blocks[idx].insns); +} diff --git a/jit/compiler.c b/jit/compiler.c index 2f94e2f..26a06d8 100644 --- a/jit/compiler.c +++ b/jit/compiler.c @@ -104,7 +104,7 @@ int compile(struct compilation_unit *cu) trace_regalloc(cu); assert(all_insn_have_bytecode_offset(cu)); - + trace_flush(); err = emit_machine_code(cu); if (err) goto out; diff --git a/jit/emit.c b/jit/emit.c index 1a537a9..74e475e 100644 --- a/jit/emit.c +++ b/jit/emit.c @@ -55,19 +55,33 @@ static void backpatch_branches(struct buffer *buf, } } +static void backpatch_tableswitch(struct tableswitch *table) +{ + int count; + + count = table->high - table->low + 1; + + for (int i = 0; i < count; i++) { + int idx = bb_lookup_successor_index(table->src, + table->bb_lookup_table[i]); + + if (branch_needs_resolvation_block(table->src, idx)) { + table->lookup_table[i] = + (void *)table->src->resolvation_blocks[idx].addr; + } else { + table->lookup_table[i] = + bb_native_ptr(table->bb_lookup_table[i]); + } + } +} + static void backpatch_tableswitch_targets(struct compilation_unit *cu) { struct tableswitch *this; list_for_each_entry(this, &cu->tableswitch_list, list_node) { - int count; - - count = this->high - this->low + 1; - - for (int i = 0; i < count; i++) - this->lookup_table[i] = - bb_native_ptr(this->bb_lookup_table[i]); + backpatch_tableswitch(this); } } @@ -81,13 +95,37 @@ void emit_body(struct basic_block *bb, struct buffer *buf) backpatch_branches(buf, &bb->backpatch_insns, bb->mach_offset); for_each_insn(insn, &bb->insn_list) { - emit_insn(buf, insn); + emit_insn(buf, bb, insn); } if (opt_trace_machine_code) emit_nop(buf); } +static void emit_resolvation_blocks(struct basic_block *bb, struct buffer *buf) +{ + for (unsigned int i = 0; i < bb->nr_successors; i++) { + struct resolvation_block *block; + unsigned long mach_offset; + struct insn *insn; + + mach_offset = buffer_offset(buf); + block = &bb->resolvation_blocks[i]; + block->addr = (unsigned long) buffer_ptr(buf) + mach_offset; + + if (list_is_empty(&block->insns)) + continue; + + backpatch_branches(buf, &block->backpatch_insns, mach_offset); + + for_each_insn(insn, &block->insns) { + emit_insn(buf, NULL, insn); + } + + emit_insn(buf, NULL, jump_insn(bb->successors[i])); + } +} + static struct buffer_operations exec_buf_ops = { .expand = NULL, .free = NULL, @@ -121,8 +159,6 @@ int emit_machine_code(struct compilation_unit *cu) for_each_basic_block(bb, &cu->bb_list) emit_body(bb, cu->objcode); - backpatch_tableswitch_targets(cu); - emit_body(cu->exit_bb, cu->objcode); if (method_is_synchronized(cu->method)) emit_monitorexit(cu); @@ -135,6 +171,12 @@ int emit_machine_code(struct compilation_unit *cu) cu->unwind_past_unlock_ptr = buffer_current(cu->objcode); emit_unwind(cu->objcode); + for_each_basic_block(bb, &cu->bb_list) { + emit_resolvation_blocks(bb, cu->objcode); + } + + backpatch_tableswitch_targets(cu); + jit_text_reserve(buffer_offset(cu->objcode)); jit_text_unlock(); diff --git a/jit/spill-reload.c b/jit/spill-reload.c index 9ce571f..d607281 100644 --- a/jit/spill-reload.c +++ b/jit/spill-reload.c @@ -34,7 +34,9 @@ #include "lib/buffer.h" #include "vm/die.h" +#include "vm/trace.h" +#include <malloc.h> #include <string.h> #include <errno.h> #include <stdio.h> @@ -205,9 +207,24 @@ static void insert_mov_insns(struct compilation_unit *cu, insert_copy_slot_insn(mappings[i].to, cu, slots[i], to_it->spill_parent->spill_slot, spill_at_insn, reload_at_insn); - } else { - insert_reload_insn(to_it, cu, slots[i], spill_at_insn); + continue; } + + /* + * Reload instructions should be inserted to per-edge + * resolvation blocks because register we are reloading + * to might be already allocated to another interval + * at this position. Especially it can be used in a + * branch instruction which ends @from_bb basic block + * (see tableswitch). + */ + struct insn *reload; + int idx; + + idx = bb_lookup_successor_index(from_bb, to_bb); + reload = reload_insn(slots[i], &to_it->spill_reload_reg); + list_add_tail(&reload->insn_list_node, + &from_bb->resolvation_blocks[idx].insns); } } @@ -267,23 +284,31 @@ static void maybe_add_mapping(struct live_interval_mapping *mappings, (*nr_mapped)++; } -static void resolve_data_flow(struct compilation_unit *cu) +static int resolve_data_flow(struct compilation_unit *cu) { struct basic_block *from; unsigned long vreg; /* - * This implements the data flow resolution algorithm described in + * This implements the data flow resolvation algorithm described in * Section 5.8 ("Resolving the Data Flow") of Wimmer 2004. */ for_each_basic_block(from, &cu->bb_list) { + unsigned long rb_size; unsigned int i; + rb_size = sizeof(struct resolvation_block) * from->nr_successors; + from->resolvation_blocks = malloc(rb_size); + if (!from->resolvation_blocks) + return -ENOMEM; + for (i = 0; i < from->nr_successors; i++) { struct live_interval_mapping mappings[cu->nr_vregs]; struct basic_block *to; int nr_mapped = 0; + resolvation_block_init(&from->resolvation_blocks[i]); + if (cu->nr_vregs == 0) continue; @@ -300,6 +325,8 @@ static void resolve_data_flow(struct compilation_unit *cu) insert_mov_insns(cu, mappings, nr_mapped, from, to); } } + + return 0; } int insert_spill_reload_insns(struct compilation_unit *cu) @@ -321,7 +348,7 @@ int insert_spill_reload_insns(struct compilation_unit *cu) * Make sure intervals spilled across basic block boundaries will be * reloaded correctly. */ - resolve_data_flow(cu); + err = resolve_data_flow(cu); return err; } diff --git a/jit/statement.c b/jit/statement.c index 7547b5f..635e0e2 100644 --- a/jit/statement.c +++ b/jit/statement.c @@ -69,6 +69,7 @@ void free_statement(struct statement *stmt) struct tableswitch *alloc_tableswitch(struct tableswitch_info *info, struct compilation_unit *cu, + struct basic_block *bb, unsigned long offset) { struct tableswitch *table; @@ -77,6 +78,8 @@ struct tableswitch *alloc_tableswitch(struct tableswitch_info *info, if (!table) return NULL; + table->src = bb; + table->low = info->low; table->high = info->high; diff --git a/jit/switch-bc.c b/jit/switch-bc.c index a2a9b84..ce7cd44 100644 --- a/jit/switch-bc.c +++ b/jit/switch-bc.c @@ -75,10 +75,6 @@ int convert_tableswitch(struct parse_context *ctx) get_tableswitch_info(ctx->code, ctx->offset, &info); ctx->buffer->pos += info.insn_size; - table = alloc_tableswitch(&info, ctx->cu, ctx->offset); - if (!table) - return -ENOMEM; - default_bb = find_bb(ctx->cu, ctx->offset + info.default_target); if (!default_bb) goto fail_default_bb; @@ -109,6 +105,10 @@ int convert_tableswitch(struct parse_context *ctx) bb_add_successor(b2, target_bb); } + table = alloc_tableswitch(&info, ctx->cu, b2, ctx->offset); + if (!table) + return -ENOMEM; + struct statement *if_lesser_stmt; struct statement *if_greater_stmt; struct statement *stmt; @@ -143,7 +143,7 @@ int convert_tableswitch(struct parse_context *ctx) fail_greater_stmt: free_statement(if_lesser_stmt); fail_lesser_stmt: - fail_default_bb: free_tableswitch(table); + fail_default_bb: return -1; } diff --git a/regression/jvm/RegisterAllocatorTortureTest.java b/regression/jvm/RegisterAllocatorTortureTest.java index 91682a3..8660b40 100644 --- a/regression/jvm/RegisterAllocatorTortureTest.java +++ b/regression/jvm/RegisterAllocatorTortureTest.java @@ -77,9 +77,34 @@ public class RegisterAllocatorTortureTest extends TestCase { doTestIntervalSplitOnRegisterAllocatorPressure(obj, 0); } + static class DataFlowTestClass { + private void test2(int a, int b, int c, int d, int e, int f, int g, int h, int i) { + assertEquals(0, a); + assertEquals(1, b); + assertEquals(2, c); + assertEquals(3, d); + assertEquals(4, e); + assertEquals(5, f); + assertEquals(6, g); + assertEquals(1, h); + assertEquals(7, i); + } + + public void test(int a, int b, int c, int d, int e, int f, int g, int h){ + test2(a, b, c, d, e, f, g, ((f != 0) ? 1 : 0), h); + } + }; + + public static void testDataFlowResolution() { + DataFlowTestClass x = new DataFlowTestClass(); + + x.test(0, 1, 2, 3, 4, 5, 6, 7); + } + public static void main(String[] args) { testIntegerBigExpression(); testComplexRegisterAllocatorPressure(); testIntervalSplitOnRegisterAllocatorPressure(); + testDataFlowResolution(); } } -- 1.6.0.6 ------------------------------------------------------------------------------ Let Crystal Reports handle the reporting - Free Crystal Reports 2008 30-Day trial. Simplify your report design, integration and deployment - and focus on what you do best, core application coding. Discover what's new with Crystal Reports now. http://p.sf.net/sfu/bobj-july _______________________________________________ Jatovm-devel mailing list Jatovm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/jatovm-devel