Motivation: Exception manager will need to translate native instruction pointers to bytecode offsets to select appropriate exception handler from exception_table.
This solution uses timestamping method to map the bytecode offset to different intermediate representation entities. Each entity gets a unique timestamp. The greater the timestamp is the later the entity has been created. Before and after convertiion of each bytecode the current timestamp is stored. By comparing timestamps we can easily figure out to which offset given HIR entity belongs. A similar process is used to map bytecode offset to LIR entities, provided we have a mapping for parenting HIR entity. Having bytecode offset mapped per struct insn it is straight forward to get the mapping for native instruction pointer. Signed-off-by: Tomek Grabiec <[email protected]> --- Makefile | 3 +- arch/x86/include/arch/instruction.h | 3 + arch/x86/insn-selector_32.brg | 49 ++++++- arch/x86/instruction.c | 2 + include/jit/bcoffset-mapping.h | 35 +++++ include/jit/compilation-unit.h | 7 + include/jit/expression.h | 3 + include/jit/statement.h | 2 + jit/bcoffset-mapping.c | 279 +++++++++++++++++++++++++++++++++++ jit/bytecode-to-ir.c | 12 ++ jit/compilation-unit.c | 3 + jit/expression.c | 2 + jit/spill-reload.c | 2 + jit/statement.c | 1 + test/arch-x86/Makefile | 1 + test/include/arch/instruction.h | 5 + test/jit/Makefile | 1 + vm/jato.c | 1 + 18 files changed, 403 insertions(+), 8 deletions(-) create mode 100644 include/jit/bcoffset-mapping.h create mode 100644 jit/bcoffset-mapping.c diff --git a/Makefile b/Makefile index 9dd15c9..5c318e6 100644 --- a/Makefile +++ b/Makefile @@ -79,7 +79,8 @@ JIT_OBJS = \ jit/typeconv-bc.o \ jit/vtable.o \ jit/fixup-site.o \ - jit/exception.o + jit/exception.o \ + jit/bcoffset-mapping.o VM_OBJS = \ vm/bitset.o \ diff --git a/arch/x86/include/arch/instruction.h b/arch/x86/include/arch/instruction.h index ec5f4cb..4a27d02 100644 --- a/arch/x86/include/arch/instruction.h +++ b/arch/x86/include/arch/instruction.h @@ -120,6 +120,9 @@ struct insn { /* Position of this instruction in LIR. */ unsigned long lir_pos; bool escaped; + + /* Used by bytecode offset mapping */ + unsigned long timestamp; }; static inline unsigned long lir_position(struct use_position *reg) diff --git a/arch/x86/insn-selector_32.brg b/arch/x86/insn-selector_32.brg index 9dcc26c..09d8af3 100644 --- a/arch/x86/insn-selector_32.brg +++ b/arch/x86/insn-selector_32.brg @@ -1299,21 +1299,44 @@ emulate_op_64(struct _MBState *state, struct basic_block *s, void *func, bb_add_insn(s, reg_reg_insn(INSN_MOV_REG_REG, edx, state->reg2)); } -static void emit_code(struct basic_block *bb, MBState *state, int goal) +static int emit_code(struct basic_block *bb, MBState *state, int goal) { + unsigned long begin_timestamp; MBState *kids[2]; int ern; const guint16 *nts; int i; + int err = 0; + bool nomapping; ern = mono_burg_rule(state, goal); nts = mono_burg_nts_data + mono_burg_nts[ern]; mono_burg_kids(state, ern, kids); - for (i = 0; nts[i]; i++) - emit_code(bb, kids[i], nts[i]); + for (i = 0; nts[i]; i++) { + err = emit_code(bb, kids[i], nts[i]); + if (err) + return err; + } + + err = lir_mapping_begin(bb->b_parent, state->tree, &begin_timestamp); + /* If there is no mapping for tree node, then no mapping + for LIR entities should be created. This should not + be indicated as an error because majority of testing + code in test/arch-x86/ does not create mapping. */ + nomapping = (err == -ENOENT); + + if (err && err != -ENOENT) + return err; + else + err = 0; mono_burg_emit(ern, state, state->tree, bb); + + if (!nomapping) + lir_mapping_end(bb->b_parent, begin_timestamp); + + return err; } static void free_state(MBState *state) @@ -1327,18 +1350,25 @@ static void free_state(MBState *state) g_free(state); } -static void insn_select(struct basic_block *bb) +static int insn_select(struct basic_block *bb) { struct statement *stmt; MBState *state; + int err = 0; mono_burg_init(); for_each_stmt(stmt, &bb->stmt_list) { state = mono_burg_label(&stmt->node, bb); - emit_code(bb, state, MB_NTERM_stmt); + + err = emit_code(bb, state, MB_NTERM_stmt); + if (err) + return err; + free_state(state); } + + return err; } int select_instructions(struct compilation_unit *cu) @@ -1365,8 +1395,13 @@ int select_instructions(struct compilation_unit *cu) get_fixed_var(cu, REG_ECX); get_fixed_var(cu, REG_EDX); - for_each_basic_block(bb, &cu->bb_list) - insn_select(bb); + mapping_reset_timestamp(); + + for_each_basic_block(bb, &cu->bb_list) { + err = insn_select(bb); + if (err) + goto out; + } out: return err; diff --git a/arch/x86/instruction.c b/arch/x86/instruction.c index 624651a..c9e1a45 100644 --- a/arch/x86/instruction.c +++ b/arch/x86/instruction.c @@ -25,6 +25,7 @@ */ #include <arch/instruction.h> +#include <jit/bcoffset-mapping.h> #include <stdlib.h> #include <string.h> @@ -37,6 +38,7 @@ struct insn *alloc_insn(enum insn_type type) INIT_LIST_HEAD(&insn->insn_list_node); INIT_LIST_HEAD(&insn->branch_list_node); insn->type = type; + insn->timestamp = next_timestamp(); } return insn; } diff --git a/include/jit/bcoffset-mapping.h b/include/jit/bcoffset-mapping.h new file mode 100644 index 0000000..2352c42 --- /dev/null +++ b/include/jit/bcoffset-mapping.h @@ -0,0 +1,35 @@ +#ifndef _BCOFFSET_MAPPING_ +#define _BCOFFSET_MAPPING_ + +#include <jit/compilation-unit.h> +#include <jit/tree-node.h> +#include <vm/string.h> +#include <limits.h> + +#define BC_OFFSET_UNKNOWN ULONG_MAX + +unsigned long next_timestamp(); + +void init_bcoffset_mapping(); +void free_bcoffset_map(struct bcoffset_map_entry *bcoffset_map); + +void mapping_reset_timestamp(); + +int hir_mapping_begin(struct compilation_unit *cu, unsigned long bc_offset, + unsigned long *begin_timestamp); +void hir_mapping_end(struct compilation_unit *cu, + unsigned long begin_timestamp); +int lir_mapping_begin(struct compilation_unit *cu, struct tree_node *node, + unsigned long *begin_timestamp); +void lir_mapping_end(struct compilation_unit *cu, + unsigned long begin_timestamp); + +void print_bcoffset_map(struct bcoffset_map_entry *map, int size); +void bcoffset_print(unsigned long bc_offset, struct string *str); + +unsigned long mapping_get_insn_bcoffset(struct compilation_unit *cu, + struct insn *insn); +unsigned long mapping_get_native_bcoffset(struct compilation_unit *cu, + unsigned char *native_ptr); + +#endif diff --git a/include/jit/compilation-unit.h b/include/jit/compilation-unit.h index 9de65d7..1e63224 100644 --- a/include/jit/compilation-unit.h +++ b/include/jit/compilation-unit.h @@ -13,6 +13,7 @@ #include <pthread.h> struct buffer; +struct bcoffset_map_entry; struct compilation_unit { struct methodblock *method; @@ -33,6 +34,12 @@ struct compilation_unit { /* The stack frame contains information of stack slots for stack-based arguments, local variables, and spill/reload storage. */ struct stack_frame *stack_frame; + + /* Bytecode offset mapping */ + struct bcoffset_map_entry *hir_bcoffset_map; + struct bcoffset_map_entry *lir_bcoffset_map; + int hir_bcoffset_map_size; + int lir_bcoffset_map_size; }; struct compilation_unit *alloc_compilation_unit(struct methodblock *); diff --git a/include/jit/expression.h b/include/jit/expression.h index cba8209..1f683a4 100644 --- a/include/jit/expression.h +++ b/include/jit/expression.h @@ -6,6 +6,7 @@ #include <vm/vm.h> #include <jit/tree-node.h> +#include <jit/bcoffset-mapping.h> #include <arch/instruction.h> @@ -76,6 +77,8 @@ enum unary_operator { struct expression { unsigned long refcount; enum vm_type vm_type; + unsigned long timestamp; + union { struct tree_node node; diff --git a/include/jit/statement.h b/include/jit/statement.h index f87abc0..07cb972 100644 --- a/include/jit/statement.h +++ b/include/jit/statement.h @@ -23,6 +23,8 @@ enum statement_type { }; struct statement { + unsigned long timestamp; + union { struct tree_node node; diff --git a/jit/bcoffset-mapping.c b/jit/bcoffset-mapping.c new file mode 100644 index 0000000..04287de --- /dev/null +++ b/jit/bcoffset-mapping.c @@ -0,0 +1,279 @@ +/* + * Copyright (c) 2009 Tomasz Grabiec + * + * This file is released under the GPL version 2 with the following + * clarification and special exception: + * + * Linking this library statically or dynamically with other modules is + * making a combined work based on this library. Thus, the terms and + * conditions of the GNU General Public License cover the whole + * combination. + * + * As a special exception, the copyright holders of this library give you + * permission to link this library with independent modules to produce an + * executable, regardless of the license terms of these independent + * modules, and to copy and distribute the resulting executable under terms + * of your choice, provided that you also meet, for each linked independent + * module, the terms and conditions of the license of that module. An + * independent module is a module which is not derived from or based on + * this library. If you modify this library, you may extend this exception + * to your version of the library, but you are not obligated to do so. If + * you do not wish to do so, delete this exception statement from your + * version. + * + * Please refer to the file LICENSE for details. + */ + +#include <jit/bcoffset-mapping.h> +#include <jit/statement.h> +#include <jit/expression.h> +#include <jit/tree-printer.h> + +#include <vm/stdlib.h> +#include <vm/string.h> +#include <vm/buffer.h> + +#include <arch/instruction.h> + +#include <pthread.h> +#include <malloc.h> +#include <memory.h> +#include <errno.h> + +pthread_key_t timestamp_key; + +/** + * The meaning of bcoffset maps is that all HIR or LIR entities + * (depends on the map) with timestamp grater than @begin_timestamp + * and less than @end_timestamp are considered to be originated from + * @bc_offset. This information is especially useful when deciding + * which exception handler to call when exception occures. + */ +struct bcoffset_map_entry { + unsigned long bc_offset; + unsigned long begin_timestamp; + unsigned long end_timestamp; +}; + +static void timestamp_descructor(void *value) +{ + free(value); + pthread_setspecific(timestamp_key, NULL); +} + +void init_bcoffset_mapping() +{ + pthread_key_create(×tamp_key, timestamp_descructor); +} + +void free_bcoffset_map(struct bcoffset_map_entry *bcoffset_map) +{ + if (bcoffset_map) + free(bcoffset_map); +} + +static inline bool timestamp_between(unsigned long begin, unsigned long end, + unsigned long t) +{ + return begin <= t && t < end; +} + +static unsigned long *timestamp_new() +{ + return zalloc(sizeof(unsigned long)); +} + +static unsigned long *current_timestamp() +{ + unsigned long *current; + + current = pthread_getspecific(timestamp_key); + if (current == NULL) { + current = timestamp_new(); + pthread_setspecific(timestamp_key, current); + } + + return current; +} + +static inline unsigned long current_timestamp_value() { + return *current_timestamp(); +} + +unsigned long next_timestamp() +{ + unsigned long *current = current_timestamp(); + + (*current)++; + + return *current; +} + +void mapping_reset_timestamp() +{ + unsigned long *current = current_timestamp(); + + *current = 0; +} + +static int mapping_put(struct bcoffset_map_entry **map_p, + int *bcoffset_map_size_p, + unsigned long bc_offset, + unsigned long begin_timestamp) +{ + struct bcoffset_map_entry *new_map; + struct bcoffset_map_entry *entry; + int new_size; + + new_size = sizeof(struct bcoffset_map_entry) * (*bcoffset_map_size_p + 1); + new_map = realloc(*map_p, new_size); + if (new_map == NULL) + return -ENOMEM; + + entry = &new_map[*bcoffset_map_size_p]; + + entry->bc_offset = bc_offset; + entry->begin_timestamp = begin_timestamp; + entry->end_timestamp = begin_timestamp; + + (*bcoffset_map_size_p)++; + *map_p = new_map; + + return 0; +} + +static void mapping_end(struct bcoffset_map_entry *map, int bcoffset_map_size, + unsigned long begin_timestamp) +{ + int i; + + for (i = 0; i < bcoffset_map_size; i++) + if (map[i].begin_timestamp == begin_timestamp) + break; + + assert(map[i].begin_timestamp == begin_timestamp); + + map[i].end_timestamp = next_timestamp(); +} + +static int timestamp_to_bcoffset(struct bcoffset_map_entry *map, int map_size, + unsigned long timestamp, + unsigned long *bc_offset) +{ + int i; + + for (i = 0; i < map_size; i++) { + struct bcoffset_map_entry *entry = &map[i]; + + assert(entry->begin_timestamp < entry->end_timestamp); + + if (timestamp_between(entry->begin_timestamp, + entry->end_timestamp, + timestamp)) { + *bc_offset = entry->bc_offset; + return 0; + } + } + + return -ENOENT; +} + +int hir_mapping_begin(struct compilation_unit *cu, unsigned long bc_offset, + unsigned long *begin_timestamp) +{ + *begin_timestamp = current_timestamp_value(); + + return mapping_put(&cu->hir_bcoffset_map, &cu->hir_bcoffset_map_size, + bc_offset, *begin_timestamp); +} + +void hir_mapping_end(struct compilation_unit *cu, unsigned long begin_timestamp) +{ + mapping_end(cu->hir_bcoffset_map, cu->hir_bcoffset_map_size, + begin_timestamp); +} + +static unsigned long tree_timestamp(struct tree_node *node) +{ + if (node_is_stmt(node)) + return to_stmt(node)->timestamp; + + return to_expr(node)->timestamp; +} + +void print_bcoffset_map(struct bcoffset_map_entry *map, int map_size) +{ + int i; + + for (i = 0; i < map_size; i++) + printf("%ld\t [%ld ; %ld[\n", map[i].bc_offset, + map[i].begin_timestamp, + map[i].end_timestamp); +} + + +int lir_mapping_begin(struct compilation_unit *cu, struct tree_node *node, + unsigned long *begin_timestamp) +{ + unsigned long bc_offset = BC_OFFSET_UNKNOWN; + int err = 0; + + err = timestamp_to_bcoffset(cu->hir_bcoffset_map, + cu->hir_bcoffset_map_size, + tree_timestamp(node), + &bc_offset); + if (err) + return err; + + *begin_timestamp = current_timestamp_value(); + + return mapping_put(&cu->lir_bcoffset_map, &cu->lir_bcoffset_map_size, + bc_offset, *begin_timestamp); +} + +void lir_mapping_end(struct compilation_unit *cu, unsigned long begin_timestamp) +{ + mapping_end(cu->lir_bcoffset_map, cu->lir_bcoffset_map_size, + begin_timestamp); +} + +unsigned long mapping_get_insn_bcoffset(struct compilation_unit *cu, + struct insn *insn) +{ + unsigned long bc_offset = BC_OFFSET_UNKNOWN; + + timestamp_to_bcoffset(cu->lir_bcoffset_map, + cu->lir_bcoffset_map_size, + insn->timestamp, + &bc_offset); + + return bc_offset; +} + +unsigned long mapping_get_native_bcoffset(struct compilation_unit *cu, + unsigned char *native_ptr) +{ + unsigned char *method_ptr = buffer_ptr(cu->objcode); + struct basic_block *bb; + struct insn *insn; + + for_each_basic_block(bb, &cu->bb_list) { + for_each_insn(insn, &bb->insn_list) { + if (method_ptr + insn->mach_offset == native_ptr) + return mapping_get_insn_bcoffset(cu, insn); + } + } + + return BC_OFFSET_UNKNOWN; +} + +void bcoffset_print(unsigned long bc_offset, struct string *str) +{ + if (bc_offset == BC_OFFSET_UNKNOWN) + str_append(str, "?"); + else { + static char buf[32]; + sprintf(buf, "%ld", bc_offset); + str_append(str, buf); + } +} diff --git a/jit/bytecode-to-ir.c b/jit/bytecode-to-ir.c index 8680efa..ba3599e 100644 --- a/jit/bytecode-to-ir.c +++ b/jit/bytecode-to-ir.c @@ -11,6 +11,7 @@ #include <jit/compiler.h> #include <jit/statement.h> #include <jit/expression.h> +#include <jit/bcoffset-mapping.h> #include <vm/bytecode.h> #include <vm/bytecodes.h> @@ -296,10 +297,21 @@ int convert_to_ir(struct compilation_unit *cu) }; int err = 0; + mapping_reset_timestamp(); + while (ctx.offset < ctx.code_size) { + unsigned long begin_timestamp; + + err = hir_mapping_begin(cu, ctx.offset, &begin_timestamp); + if (err) + break; + err = parse_bytecode_insn(&ctx); if (err) break; + + hir_mapping_end(cu, begin_timestamp); } + return err; } diff --git a/jit/compilation-unit.c b/jit/compilation-unit.c index 0848f69..2ba6a0a 100644 --- a/jit/compilation-unit.c +++ b/jit/compilation-unit.c @@ -28,6 +28,7 @@ #include <jit/compilation-unit.h> #include <jit/stack-slot.h> #include <jit/vars.h> +#include <jit/bcoffset-mapping.h> #include <vm/buffer.h> #include <vm/vm.h> @@ -87,6 +88,8 @@ void free_compilation_unit(struct compilation_unit *cu) free_buffer(cu->objcode); free_var_infos(cu->var_infos); free_stack_frame(cu->stack_frame); + free_bcoffset_map(cu->hir_bcoffset_map); + free_bcoffset_map(cu->lir_bcoffset_map); free(cu); } diff --git a/jit/expression.c b/jit/expression.c index 8a37502..a299e93 100644 --- a/jit/expression.c +++ b/jit/expression.c @@ -6,6 +6,7 @@ */ #include <jit/expression.h> +#include <jit/bcoffset-mapping.h> #include <vm/vm.h> #include <vm/method.h> #include <stdlib.h> @@ -21,6 +22,7 @@ struct expression *alloc_expression(enum expression_type type, expr->node.op = type << EXPR_TYPE_SHIFT; expr->vm_type = vm_type; expr->refcount = 1; + expr->timestamp = next_timestamp(); } return expr; } diff --git a/jit/spill-reload.c b/jit/spill-reload.c index 14e1758..bcb91a4 100644 --- a/jit/spill-reload.c +++ b/jit/spill-reload.c @@ -63,6 +63,7 @@ static int insert_spill_insn(struct live_interval *interval, struct compilation_ interval->spill_slot = slot; + spill->timestamp = last->timestamp; list_add(&spill->insn_list_node, &last->insn_list_node); return 0; @@ -89,6 +90,7 @@ static int insert_reload_insn(struct live_interval *interval, struct compilation if (!reload) return -ENOMEM; + reload->timestamp = first->timestamp; list_add_tail(&reload->insn_list_node, &first->insn_list_node); return 0; diff --git a/jit/statement.c b/jit/statement.c index 0c8ecf4..1a81dd1 100644 --- a/jit/statement.c +++ b/jit/statement.c @@ -19,6 +19,7 @@ struct statement *alloc_statement(enum statement_type type) memset(stmt, 0, sizeof *stmt); INIT_LIST_HEAD(&stmt->stmt_list_node); stmt->node.op = type << STMT_TYPE_SHIFT; + stmt->timestamp = next_timestamp(); } return stmt; diff --git a/test/arch-x86/Makefile b/test/arch-x86/Makefile index 5014de5..60ff6f5 100644 --- a/test/arch-x86/Makefile +++ b/test/arch-x86/Makefile @@ -43,6 +43,7 @@ OBJS = \ ../../jit/statement.o \ ../../jit/tree-printer.o \ ../../jit/fixup-site.o \ + ../../jit/bcoffset-mapping.o \ ../../arch/x86/emit-code$(ARCH_POSTFIX).o \ ../../arch/x86/instruction.o \ ../../arch/x86/insn-selector$(ARCH_POSTFIX).o \ diff --git a/test/include/arch/instruction.h b/test/include/arch/instruction.h index d9776ca..0d2c81d 100644 --- a/test/include/arch/instruction.h +++ b/test/include/arch/instruction.h @@ -52,8 +52,13 @@ struct insn { struct operand operand; }; struct list_head insn_list_node; + /* Offset in machine code. */ + unsigned long mach_offset; /* Position of this instruction in LIR. */ unsigned long lir_pos; + + /* Used by bytecode offset mapping */ + unsigned long timestamp; }; static inline unsigned long lir_position(struct use_position *reg) diff --git a/test/jit/Makefile b/test/jit/Makefile index 1fb21d9..04df5d4 100644 --- a/test/jit/Makefile +++ b/test/jit/Makefile @@ -34,6 +34,7 @@ OBJS = \ ../../jit/tree-printer.o \ ../../jit/args.o \ ../../jit/exception.o \ + ../../jit/bcoffset-mapping.o \ ../libharness/libharness.o \ ../jamvm/alloc-stub.o \ ../jamvm/resolve-stub.o \ diff --git a/vm/jato.c b/vm/jato.c index 8598648..710052a 100644 --- a/vm/jato.c +++ b/vm/jato.c @@ -293,6 +297,7 @@ int main(int argc, char *argv[]) { exe_name = argv[0]; setup_signal_handlers(); + init_bcoffset_mapping(); setDefaultInitArgs(&args); int class_arg = parseCommandLine(argc, argv, &args); -- 1.6.0.6 ------------------------------------------------------------------------------ Register Now & Save for Velocity, the Web Performance & Operations Conference from O'Reilly Media. Velocity features a full day of expert-led, hands-on workshops and two days of sessions from industry leaders in dedicated Performance & Operations tracks. Use code vel09scf and Save an extra 15% before 5/3. http://p.sf.net/sfu/velocityconf _______________________________________________ Jatovm-devel mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/jatovm-devel
