When resolving data flow we can not emit reload instructions at the
end of a basic block. That's because the register we are reloading to
might be allocated to another interval at the end of this
block. Especially, it can be used in a branch instruction that ends
this basic block (see tableswitch).

Solution is to put those invasive moves (mem -> reg) to a per CFG edge
blocks. These blocks will be emitted after all other blocks.

Signed-off-by: Tomek Grabiec <tgrab...@gmail.com>
---
 arch/x86/emit-code.c                             |   59 ++++++++++++--------
 arch/x86/include/arch/instruction.h              |    5 ++
 include/jit/basic-block.h                        |   13 +++++
 include/jit/emit-code.h                          |    2 +-
 include/jit/statement.h                          |    5 ++-
 jit/basic-block.c                                |   24 ++++++++
 jit/compiler.c                                   |    2 +-
 jit/emit.c                                       |   62 ++++++++++++++++++----
 jit/spill-reload.c                               |   37 +++++++++++--
 jit/statement.c                                  |    3 +
 jit/switch-bc.c                                  |   10 ++--
 regression/jvm/RegisterAllocatorTortureTest.java |   25 +++++++++
 12 files changed, 200 insertions(+), 47 deletions(-)

diff --git a/arch/x86/emit-code.c b/arch/x86/emit-code.c
index a59523e..48aaf1e 100644
--- a/arch/x86/emit-code.c
+++ b/arch/x86/emit-code.c
@@ -209,18 +209,27 @@ static long branch_rel_addr(struct insn *insn, unsigned 
long target_offset)
        return ret;
 }
 
-static void __emit_branch(struct buffer *buf, unsigned char prefix,
-                         unsigned char opc, struct insn *insn)
+static void __emit_branch(struct buffer *buf, struct basic_block *bb,
+               unsigned char prefix, unsigned char opc, struct insn *insn)
 {
        struct basic_block *target_bb;
        long addr = 0;
+       int idx;
 
        if (prefix)
                insn->escaped = true;
 
        target_bb = insn->operand.branch_target;
 
-       if (target_bb->is_emitted) {
+       if (!bb)
+               idx = -1;
+       else
+               idx = bb_lookup_successor_index(bb, target_bb);
+
+       if (idx >= 0 && branch_needs_resolvation_block(bb, idx)) {
+               list_add(&insn->branch_list_node,
+                        &bb->resolvation_blocks[idx].backpatch_insns);
+       } else if (target_bb->is_emitted) {
                struct insn *target_insn =
                    list_first_entry(&target_bb->insn_list, struct insn,
                               insn_list_node);
@@ -232,39 +241,39 @@ static void __emit_branch(struct buffer *buf, unsigned 
char prefix,
        emit_branch_rel(buf, prefix, opc, addr);
 }
 
-static void emit_je_branch(struct buffer *buf, struct insn *insn)
+static void emit_je_branch(struct buffer *buf, struct basic_block *bb, struct 
insn *insn)
 {
-       __emit_branch(buf, 0x0f, 0x84, insn);
+       __emit_branch(buf, bb, 0x0f, 0x84, insn);
 }
 
-static void emit_jne_branch(struct buffer *buf, struct insn *insn)
+static void emit_jne_branch(struct buffer *buf, struct basic_block *bb, struct 
insn *insn)
 {
-       __emit_branch(buf, 0x0f, 0x85, insn);
+       __emit_branch(buf, bb, 0x0f, 0x85, insn);
 }
 
-static void emit_jge_branch(struct buffer *buf, struct insn *insn)
+static void emit_jge_branch(struct buffer *buf, struct basic_block *bb, struct 
insn *insn)
 {
-       __emit_branch(buf, 0x0f, 0x8d, insn);
+       __emit_branch(buf, bb, 0x0f, 0x8d, insn);
 }
 
-static void emit_jg_branch(struct buffer *buf, struct insn *insn)
+static void emit_jg_branch(struct buffer *buf, struct basic_block *bb, struct 
insn *insn)
 {
-       __emit_branch(buf, 0x0f, 0x8f, insn);
+       __emit_branch(buf, bb, 0x0f, 0x8f, insn);
 }
 
-static void emit_jle_branch(struct buffer *buf, struct insn *insn)
+static void emit_jle_branch(struct buffer *buf, struct basic_block *bb, struct 
insn *insn)
 {
-       __emit_branch(buf, 0x0f, 0x8e, insn);
+       __emit_branch(buf, bb, 0x0f, 0x8e, insn);
 }
 
-static void emit_jl_branch(struct buffer *buf, struct insn *insn)
+static void emit_jl_branch(struct buffer *buf, struct basic_block *bb, struct 
insn *insn)
 {
-       __emit_branch(buf, 0x0f, 0x8c, insn);
+       __emit_branch(buf, bb, 0x0f, 0x8c, insn);
 }
 
-static void emit_jmp_branch(struct buffer *buf, struct insn *insn)
+static void emit_jmp_branch(struct buffer *buf, struct basic_block *bb, struct 
insn *insn)
 {
-       __emit_branch(buf, 0x00, 0xe9, insn);
+       __emit_branch(buf, bb, 0x00, 0xe9, insn);
 }
 
 void emit_lock(struct buffer *buf, struct vm_object *obj)
@@ -2788,15 +2797,17 @@ static void emit_two_operands(struct emitter *emitter, 
struct buffer *buf, struc
        emit(buf, &insn->src, &insn->dest);
 }
 
-typedef void (*emit_branch_fn) (struct buffer *, struct insn *);
+typedef void (*emit_branch_fn) (struct buffer *, struct basic_block *, struct 
insn *);
 
-static void emit_branch(struct emitter *emitter, struct buffer *buf, struct 
insn *insn)
+static void emit_branch(struct emitter *emitter, struct buffer *buf,
+                       struct basic_block *bb, struct insn *insn)
 {
        emit_branch_fn emit = emitter->emit_fn;
-       emit(buf, insn);
+       emit(buf, bb, insn);
 }
 
-static void __emit_insn(struct buffer *buf, struct insn *insn)
+static void __emit_insn(struct buffer *buf, struct basic_block *bb,
+                       struct insn *insn)
 {
        struct emitter *emitter;
 
@@ -2812,7 +2823,7 @@ static void __emit_insn(struct buffer *buf, struct insn 
*insn)
                emit_two_operands(emitter, buf, insn);
                break;
        case BRANCH:
-               emit_branch(emitter, buf, insn);
+               emit_branch(emitter, buf, bb, insn);
                break;
        default:
                printf("Oops. No emitter for 0x%x.\n", insn->type);
@@ -2820,11 +2831,11 @@ static void __emit_insn(struct buffer *buf, struct insn 
*insn)
        };
 }
 
-void emit_insn(struct buffer *buf, struct insn *insn)
+void emit_insn(struct buffer *buf, struct basic_block *bb, struct insn *insn)
 {
        insn->mach_offset = buffer_offset(buf);
 
-       __emit_insn(buf, insn);
+       __emit_insn(buf, bb, insn);
 }
 
 void emit_nop(struct buffer *buf)
diff --git a/arch/x86/include/arch/instruction.h 
b/arch/x86/include/arch/instruction.h
index feb8dbe..015f50f 100644
--- a/arch/x86/include/arch/instruction.h
+++ b/arch/x86/include/arch/instruction.h
@@ -284,6 +284,11 @@ pop_slot_insn(struct stack_slot *to)
        return memlocal_insn(INSN_POP_MEMLOCAL, to);
 }
 
+static inline struct insn *jump_insn(struct basic_block *bb)
+{
+       return branch_insn(INSN_JMP_BRANCH, bb);
+}
+
 struct insn *alloc_insn(enum insn_type);
 void free_insn(struct insn *);
 
diff --git a/include/jit/basic-block.h b/include/jit/basic-block.h
index 28ca6b2..3e4cbd9 100644
--- a/include/jit/basic-block.h
+++ b/include/jit/basic-block.h
@@ -8,6 +8,13 @@ struct compilation_unit;
 struct insn;
 struct statement;
 
+struct resolvation_block {
+       unsigned long addr;
+
+       struct list_head insns;
+       struct list_head backpatch_insns;
+};
+
 struct basic_block {
        struct compilation_unit *b_parent;
        unsigned long start;
@@ -30,6 +37,9 @@ struct basic_block {
        struct expression **mimic_stack_expr;
        unsigned long mach_offset;
 
+       /* Contains data flow resultion blocks for each outgoing CFG edge. */
+       struct resolvation_block *resolvation_blocks;
+
        /* The mimic stack is used to simulate JVM operand stack at
           compile-time.  See Section 2.2 ("Lazy code selection") of the paper
           "Fast, Effective Code Generation in a Just-In-Time Java Compiler" by
@@ -76,6 +86,9 @@ int bb_add_successor(struct basic_block *, struct basic_block 
*);
 int bb_add_mimic_stack_expr(struct basic_block *, struct expression *);
 struct statement *bb_remove_last_stmt(struct basic_block *bb);
 unsigned char *bb_native_ptr(struct basic_block *bb);
+void resolvation_block_init(struct resolvation_block *block);
+bool branch_needs_resolvation_block(struct basic_block *from, int idx);
+int bb_lookup_successor_index(struct basic_block *from, struct basic_block 
*to);
 
 #define for_each_basic_block(bb, bb_list) list_for_each_entry(bb, bb_list, 
bb_list_node)
 #define for_each_basic_block_reverse(bb, bb_list) 
list_for_each_entry_reverse(bb, bb_list, bb_list_node)
diff --git a/include/jit/emit-code.h b/include/jit/emit-code.h
index f08bee6..d012c36 100644
--- a/include/jit/emit-code.h
+++ b/include/jit/emit-code.h
@@ -36,7 +36,7 @@ extern void emit_lock_this(struct buffer *);
 extern void emit_unlock(struct buffer *, struct vm_object *);
 extern void emit_unlock_this(struct buffer *);
 extern void emit_body(struct basic_block *, struct buffer *);
-extern void emit_insn(struct buffer *buf, struct insn *insn);
+extern void emit_insn(struct buffer *, struct basic_block *, struct insn *);
 extern void emit_nop(struct buffer *buf);
 extern void backpatch_branch_target(struct buffer *buf, struct insn *insn,
                                    unsigned long target_offset);
diff --git a/include/jit/statement.h b/include/jit/statement.h
index ae4d1fb..8a4694c 100644
--- a/include/jit/statement.h
+++ b/include/jit/statement.h
@@ -29,6 +29,9 @@ struct tableswitch {
        uint32_t low;
        uint32_t high;
 
+       /* basic block containing this tableswitch. */
+       struct basic_block *src;
+
        union {
                struct basic_block **bb_lookup_table;
 
@@ -96,7 +99,7 @@ struct statement *alloc_statement(enum statement_type);
 void free_statement(struct statement *);
 int stmt_nr_kids(struct statement *);
 
-struct tableswitch *alloc_tableswitch(struct tableswitch_info *, struct 
compilation_unit *, unsigned long);
+struct tableswitch *alloc_tableswitch(struct tableswitch_info *, struct 
compilation_unit *, struct basic_block *, unsigned long);
 void free_tableswitch(struct tableswitch *);
 struct statement *if_stmt(struct basic_block *, enum vm_type, enum 
binary_operator, struct expression *, struct expression *);
 
diff --git a/jit/basic-block.c b/jit/basic-block.c
index f46b632..cc9b038 100644
--- a/jit/basic-block.c
+++ b/jit/basic-block.c
@@ -84,6 +84,10 @@ void free_basic_block(struct basic_block *bb)
        free(bb->def_set);
        free(bb->live_in_set);
        free(bb->live_out_set);
+
+       if (bb->resolvation_blocks)
+               free(bb->resolvation_blocks);
+
        free(bb);
 }
 
@@ -230,3 +234,23 @@ unsigned char *bb_native_ptr(struct basic_block *bb)
 {
        return buffer_ptr(bb->b_parent->objcode) + bb->mach_offset;
 }
+
+void resolvation_block_init(struct resolvation_block *block)
+{
+       INIT_LIST_HEAD(&block->insns);
+       INIT_LIST_HEAD(&block->backpatch_insns);
+}
+
+int bb_lookup_successor_index(struct basic_block *from, struct basic_block *to)
+{
+       for (unsigned long i = 0; i < from->nr_successors; i++)
+               if (from->successors[i] == to)
+                       return i;
+
+       return -1;
+}
+
+bool branch_needs_resolvation_block(struct basic_block *from, int idx)
+{
+       return !list_is_empty(&from->resolvation_blocks[idx].insns);
+}
diff --git a/jit/compiler.c b/jit/compiler.c
index 2f94e2f..26a06d8 100644
--- a/jit/compiler.c
+++ b/jit/compiler.c
@@ -104,7 +104,7 @@ int compile(struct compilation_unit *cu)
                trace_regalloc(cu);
 
        assert(all_insn_have_bytecode_offset(cu));
-
+       trace_flush();
        err = emit_machine_code(cu);
        if (err)
                goto out;
diff --git a/jit/emit.c b/jit/emit.c
index 1a537a9..74e475e 100644
--- a/jit/emit.c
+++ b/jit/emit.c
@@ -55,19 +55,33 @@ static void backpatch_branches(struct buffer *buf,
        }
 }
 
+static void backpatch_tableswitch(struct tableswitch *table)
+{
+       int count;
+
+       count = table->high - table->low + 1;
+
+       for (int i = 0; i < count; i++) {
+               int idx = bb_lookup_successor_index(table->src,
+                                                   table->bb_lookup_table[i]);
+
+               if (branch_needs_resolvation_block(table->src, idx)) {
+                       table->lookup_table[i] =
+                               (void 
*)table->src->resolvation_blocks[idx].addr;
+               } else {
+                       table->lookup_table[i] =
+                               bb_native_ptr(table->bb_lookup_table[i]);
+               }
+       }
+}
+
 static void backpatch_tableswitch_targets(struct compilation_unit *cu)
 {
        struct tableswitch *this;
 
        list_for_each_entry(this, &cu->tableswitch_list, list_node)
        {
-               int count;
-
-               count = this->high - this->low + 1;
-
-               for (int i = 0; i < count; i++)
-                       this->lookup_table[i] =
-                               bb_native_ptr(this->bb_lookup_table[i]);
+               backpatch_tableswitch(this);
        }
 }
 
@@ -81,13 +95,37 @@ void emit_body(struct basic_block *bb, struct buffer *buf)
        backpatch_branches(buf, &bb->backpatch_insns, bb->mach_offset);
 
        for_each_insn(insn, &bb->insn_list) {
-               emit_insn(buf, insn);
+               emit_insn(buf, bb, insn);
        }
 
        if (opt_trace_machine_code)
                emit_nop(buf);
 }
 
+static void emit_resolvation_blocks(struct basic_block *bb, struct buffer *buf)
+{
+       for (unsigned int i = 0; i < bb->nr_successors; i++) {
+               struct resolvation_block *block;
+               unsigned long mach_offset;
+               struct insn *insn;
+
+               mach_offset = buffer_offset(buf);
+               block = &bb->resolvation_blocks[i];
+               block->addr = (unsigned long) buffer_ptr(buf) + mach_offset;
+
+               if (list_is_empty(&block->insns))
+                       continue;
+
+               backpatch_branches(buf, &block->backpatch_insns, mach_offset);
+
+               for_each_insn(insn, &block->insns) {
+                       emit_insn(buf, NULL, insn);
+               }
+
+               emit_insn(buf, NULL, jump_insn(bb->successors[i]));
+       }
+}
+
 static struct buffer_operations exec_buf_ops = {
        .expand = NULL,
        .free   = NULL,
@@ -121,8 +159,6 @@ int emit_machine_code(struct compilation_unit *cu)
        for_each_basic_block(bb, &cu->bb_list)
                emit_body(bb, cu->objcode);
 
-       backpatch_tableswitch_targets(cu);
-
        emit_body(cu->exit_bb, cu->objcode);
        if (method_is_synchronized(cu->method))
                emit_monitorexit(cu);
@@ -135,6 +171,12 @@ int emit_machine_code(struct compilation_unit *cu)
        cu->unwind_past_unlock_ptr = buffer_current(cu->objcode);
        emit_unwind(cu->objcode);
 
+       for_each_basic_block(bb, &cu->bb_list) {
+               emit_resolvation_blocks(bb, cu->objcode);
+       }
+
+       backpatch_tableswitch_targets(cu);
+
        jit_text_reserve(buffer_offset(cu->objcode));
        jit_text_unlock();
 
diff --git a/jit/spill-reload.c b/jit/spill-reload.c
index 9ce571f..d607281 100644
--- a/jit/spill-reload.c
+++ b/jit/spill-reload.c
@@ -34,7 +34,9 @@
 #include "lib/buffer.h"
 
 #include "vm/die.h"
+#include "vm/trace.h"
 
+#include <malloc.h>
 #include <string.h>
 #include <errno.h>
 #include <stdio.h>
@@ -205,9 +207,24 @@ static void insert_mov_insns(struct compilation_unit *cu,
                        insert_copy_slot_insn(mappings[i].to, cu, slots[i],
                                        to_it->spill_parent->spill_slot,
                                        spill_at_insn, reload_at_insn);
-               } else {
-                       insert_reload_insn(to_it, cu, slots[i], spill_at_insn);
+                       continue;
                }
+
+               /*
+                * Reload instructions should be inserted to per-edge
+                * resolvation blocks because register we are reloading
+                * to might be already allocated to another interval
+                * at this position. Especially it can be used in a
+                * branch instruction which ends @from_bb basic block
+                * (see tableswitch).
+                */
+               struct insn *reload;
+               int idx;
+
+               idx = bb_lookup_successor_index(from_bb, to_bb);
+               reload = reload_insn(slots[i], &to_it->spill_reload_reg);
+               list_add_tail(&reload->insn_list_node,
+                             &from_bb->resolvation_blocks[idx].insns);
        }
 }
 
@@ -267,23 +284,31 @@ static void maybe_add_mapping(struct 
live_interval_mapping *mappings,
        (*nr_mapped)++;
 }
 
-static void resolve_data_flow(struct compilation_unit *cu)
+static int resolve_data_flow(struct compilation_unit *cu)
 {
        struct basic_block *from;
        unsigned long vreg;
 
        /*
-        * This implements the data flow resolution algorithm described in
+        * This implements the data flow resolvation algorithm described in
         * Section 5.8 ("Resolving the Data Flow") of Wimmer 2004.
         */
        for_each_basic_block(from, &cu->bb_list) {
+               unsigned long rb_size;
                unsigned int i;
 
+               rb_size = sizeof(struct resolvation_block) * 
from->nr_successors;
+               from->resolvation_blocks = malloc(rb_size);
+               if (!from->resolvation_blocks)
+                       return -ENOMEM;
+
                for (i = 0; i < from->nr_successors; i++) {
                        struct live_interval_mapping mappings[cu->nr_vregs];
                        struct basic_block *to;
                        int nr_mapped = 0;
 
+                       resolvation_block_init(&from->resolvation_blocks[i]);
+
                        if (cu->nr_vregs == 0)
                                continue;
 
@@ -300,6 +325,8 @@ static void resolve_data_flow(struct compilation_unit *cu)
                        insert_mov_insns(cu, mappings, nr_mapped, from, to);
                }
        }
+
+       return 0;
 }
 
 int insert_spill_reload_insns(struct compilation_unit *cu)
@@ -321,7 +348,7 @@ int insert_spill_reload_insns(struct compilation_unit *cu)
         * Make sure intervals spilled across basic block boundaries will be
         * reloaded correctly.
         */
-       resolve_data_flow(cu);
+       err = resolve_data_flow(cu);
 
        return err;
 }
diff --git a/jit/statement.c b/jit/statement.c
index 7547b5f..635e0e2 100644
--- a/jit/statement.c
+++ b/jit/statement.c
@@ -69,6 +69,7 @@ void free_statement(struct statement *stmt)
 
 struct tableswitch *alloc_tableswitch(struct tableswitch_info *info,
                                      struct compilation_unit *cu,
+                                     struct basic_block *bb,
                                      unsigned long offset)
 {
        struct tableswitch *table;
@@ -77,6 +78,8 @@ struct tableswitch *alloc_tableswitch(struct tableswitch_info 
*info,
        if (!table)
                return NULL;
 
+       table->src = bb;
+
        table->low = info->low;
        table->high = info->high;
 
diff --git a/jit/switch-bc.c b/jit/switch-bc.c
index a2a9b84..ce7cd44 100644
--- a/jit/switch-bc.c
+++ b/jit/switch-bc.c
@@ -75,10 +75,6 @@ int convert_tableswitch(struct parse_context *ctx)
        get_tableswitch_info(ctx->code, ctx->offset, &info);
        ctx->buffer->pos += info.insn_size;
 
-       table = alloc_tableswitch(&info, ctx->cu, ctx->offset);
-       if (!table)
-               return -ENOMEM;
-
        default_bb = find_bb(ctx->cu, ctx->offset + info.default_target);
        if (!default_bb)
                goto fail_default_bb;
@@ -109,6 +105,10 @@ int convert_tableswitch(struct parse_context *ctx)
                bb_add_successor(b2, target_bb);
        }
 
+       table = alloc_tableswitch(&info, ctx->cu, b2, ctx->offset);
+       if (!table)
+               return -ENOMEM;
+
        struct statement *if_lesser_stmt;
        struct statement *if_greater_stmt;
        struct statement *stmt;
@@ -143,7 +143,7 @@ int convert_tableswitch(struct parse_context *ctx)
  fail_greater_stmt:
        free_statement(if_lesser_stmt);
  fail_lesser_stmt:
- fail_default_bb:
        free_tableswitch(table);
+ fail_default_bb:
        return -1;
 }
diff --git a/regression/jvm/RegisterAllocatorTortureTest.java 
b/regression/jvm/RegisterAllocatorTortureTest.java
index 91682a3..8660b40 100644
--- a/regression/jvm/RegisterAllocatorTortureTest.java
+++ b/regression/jvm/RegisterAllocatorTortureTest.java
@@ -77,9 +77,34 @@ public class RegisterAllocatorTortureTest extends TestCase {
         doTestIntervalSplitOnRegisterAllocatorPressure(obj, 0);
     }
 
+    static class DataFlowTestClass {
+        private void test2(int a, int b, int c, int d, int e, int f, int g, 
int h, int i) {
+            assertEquals(0, a);
+            assertEquals(1, b);
+            assertEquals(2, c);
+            assertEquals(3, d);
+            assertEquals(4, e);
+            assertEquals(5, f);
+            assertEquals(6, g);
+            assertEquals(1, h);
+            assertEquals(7, i);
+        }
+
+        public void test(int a, int b, int c, int d, int e, int f, int g, int 
h){
+            test2(a, b, c, d, e, f, g, ((f != 0) ? 1 : 0), h);
+        }
+    };
+
+    public static void testDataFlowResolution() {
+        DataFlowTestClass x = new DataFlowTestClass();
+
+        x.test(0, 1, 2, 3, 4, 5, 6, 7);
+    }
+
     public static void main(String[] args) {
       testIntegerBigExpression();
       testComplexRegisterAllocatorPressure();
       testIntervalSplitOnRegisterAllocatorPressure();
+      testDataFlowResolution();
     }
 }
-- 
1.6.0.6


------------------------------------------------------------------------------
Let Crystal Reports handle the reporting - Free Crystal Reports 2008 30-Day 
trial. Simplify your report design, integration and deployment - and focus on 
what you do best, core application coding. Discover what's new with 
Crystal Reports now.  http://p.sf.net/sfu/bobj-july
_______________________________________________
Jatovm-devel mailing list
Jatovm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/jatovm-devel

Reply via email to