Author: jisom Date: Sat Aug 4 16:02:34 2007 New Revision: 20481 Modified: trunk/src/jit/amd64/core.jit trunk/src/jit/amd64/jit_emit.h
Log: [jit/amd64] Add support for some mapped ints, not much yet but groundwork's important. Some code cleanup. Accidental breakage with a static parrot(with some tests, not all). It's not enabled by default anyway. Modified: trunk/src/jit/amd64/core.jit ============================================================================== --- trunk/src/jit/amd64/core.jit (original) +++ trunk/src/jit/amd64/core.jit Sat Aug 4 16:02:34 2007 @@ -1,5 +1,5 @@ ; -; i386/core.jit +; amd64/core.jit ; ; $Id$ ; @@ -8,6 +8,76 @@ jit_emit_end(NATIVECODE); } +Parrot_noop { + emit_nop(NATIVECODE); +} + +TEMPLATE Parrot_set_x_x { + if (MAP[1] && MAP[2]) { + emit_mov_r_r(NATIVECODE, MAP[1], MAP[2]); + } + else if (MAP[1]) { + emit_mov_r_mr(NATIVECODE, MAP[1], RBX, ROFFS_INT(2)); + } + else if (MAP[2]) { + emit_mov_mr_r(NATIVECODE, RBX, ROFFS_INT(1), MAP[2]); + } + else { + emit_mov_r_mr(NATIVECODE, ISR1, RBX, ROFFS_INT(2)); + emit_mov_mr_r(NATIVECODE, RBX, ROFFS_INT(1), ISR1); + } +} + + +Parrot_set_i_i { + Parrot_set_x_x s/<_N>/_i/ +} + + +Parrot_set_i_ic { + if (MAP[1]) { + emit_mov_r_i(NATIVECODE, MAP[1], *INT_CONST[2]); + } + else { + emit_mov_mr_i(NATIVECODE, RBX, (long)ROFFS_INT(1), *INT_CONST[2]); + } +} + +TEMPLATE Parrot_binop_x_x { + if (MAP[1] && MAP[2]) { + emit_<op>_r_r(NATIVECODE, MAP[1], MAP[2]); + } + else if (MAP[1]) { + emit_<op>_r_mr(NATIVECODE, MAP[1], RBX, ROFFS_INT(2)); + } + else if (MAP[2]) { +# ifdef emit_<op>_mr_r + emit_<op>_mr_r(NATIVECODE, RBX, ROFFS_INT(1), MAP[2]); +# else + emit_mov_r_mr(NATIVECODE, ISR1, RBX, ROFFS_INT(1)); + emit_<op>_r_r(NATIVECODE, ISR1, MAP[2]); + emit_mov_mr_r(NATIVECODE, RBX, ROFFS_INT(1), ISR1); +# endif + } + else { + emit_mov_r_mr(NATIVECODE, ISR1, RBX, ROFFS_INT(1)); + emit_<op>_r_mr(NATIVECODE, ISR1, RBX, ROFFS_INT(2)); + emit_mov_mr_r(NATIVECODE, RBX, ROFFS_INT(1), ISR1); + } +} + +Parrot_add_i_i { + Parrot_binop_x_x s/<op>/add/ +} + +Parrot_sub { + Parrot_binop_x_x s/<op>/sub/ +} + + + + + /* * Local variables: * c-indentation-style: bsd Modified: trunk/src/jit/amd64/jit_emit.h ============================================================================== --- trunk/src/jit/amd64/jit_emit.h (original) +++ trunk/src/jit/amd64/jit_emit.h Sat Aug 4 16:02:34 2007 @@ -74,6 +74,11 @@ void Parrot_jit_begin(Parrot_jit_info_t *, Interp *); +/* This is used for testing whether or not keeping these two in registers is an + * improvement or not. This file may need to be expanded further to know for + * sure. */ +# undef USE_OP_MAP_AND_CODE_START + /* * define all the available cpu registers @@ -83,7 +88,7 @@ RAX, /* return values */ RCX, RDX, - RBX, + RBX, /* parrot base pointer */ RSP, /* stack pointer */ RBP, /* base pointer */ RSI, @@ -96,13 +101,18 @@ ISR1 = R11, R12, R13, +#ifdef USE_OP_MAP_AND_CODE_START CODE_START = R13, /* Don't forget, there are very weird pecularities */ +#endif R14, +#ifdef USE_OP_MAP_AND_CODE_START OP_MAP = R14, +#endif R15, INTERP = R15 } amd64_iregister_t; + /* * If your arch doesn't have that much register available, you * don't cache OP_MAP, CODE_START, and INTERP in a cpu register. @@ -114,7 +124,7 @@ * define, which register is the register base pointer */ -#define Parrot_jit_emit_get_base_reg_no(pc) RBP +#define Parrot_jit_emit_get_base_reg_no(pc) RBX /* * define floating point register too, if there are some @@ -170,7 +180,7 @@ *(pc++) = (char)((scale << 6) | ((index & 7) << 3) | (base & 7)); } /* 0xXX +rq */ -# define emit_op_r(op, rexreq, pc, reg) do { \ +# define emit_op_r(op, rexreq, pc, reg) { \ if (rexreq) { \ *(pc++) = (char)(0x48 | (((reg) & 8) >> 3)); \ } \ @@ -178,16 +188,19 @@ *(pc++) = (char)(0x40 | (((reg) & 8) >> 3)); \ } \ *(pc++) = (char)((op) | ((reg) & 7)); \ - } while (0) + } /* 0xXX /r */ -# define emit_op_r_r(op, pc, dst, src, disp) do { \ +# define emit_op_r_r(op, pc, dst, src) { \ emit_REX(pc, dst, src); \ *(pc++) = (char) op; \ - if (disp == 0) { \ - emit_modrm(pc, b11, dst, src); \ - } \ - else if (is8bit(disp)) { \ + emit_modrm(pc, b11, dst, src); \ + } + +# define emit_op_r_mr(op, pc, dst, src, disp) { \ + emit_REX(pc, dst, src); \ + *(pc++) = (char) op; \ + if (is8bit(disp)) { \ emit_modrm(pc, b01, dst, src); \ *(pc++) = (char)disp; \ } \ @@ -196,16 +209,16 @@ *(int *)pc = (int)disp; \ pc += 4; \ } \ - } while (0) + } -# define emit_op_i(op, pc, imm) do { \ +# define emit_op_i(op, pc, imm) { \ *(pc++) = (char)(op); \ - *(int *)pc = (int)imm; \ + *(int *)pc = (int)(imm); \ pc += 4; \ - } while (0) + } -# define emit_op_r_i(pc, op, op2, code, dst, imm) do { \ +# define emit_op_r_i(pc, op, op2, code, dst, imm) { \ emit_REX(pc, dst, 0x0); \ if (is8bit(imm)) { \ *(pc++) = (char) op; \ @@ -218,64 +231,110 @@ *(int *)pc = (int)imm; \ pc += 4; \ } \ - } while (0) + } # define emit_add_r_i(pc, dst, imm) emit_op_r_i(pc, 0x83, 0x81, 0x0, dst, imm) +# define emit_add_r_r(pc, dst, src) emit_op_r_r(0x03, pc, dst, src) +# define emit_add_r_mr(pc, dst, src, disp) emit_op_r_mr(0x03, pc, dst, src, disp) +# define emit_add_mr_r(pc, dst, disp, src) emit_op_r_mr(0x01, pc, src, dst, disp) + +# define emit_sub_r_i(pc, dst, imm) emit_op_r_i(pc, 0x83, 0x81, 0x5, dst, imm) +# define emit_sub_r_r(pc, dst, src) emit_op_r_r(0x2b, pc, dst, src) +# define emit_sub_r_mr(pc, dst, src, disp) emit_op_r_mr(0x29, pc, dst, src, disp) +# define emit_sub_mr_r(pc, dst, disp, src) emit_op_r_mr(0x2b, pc, src, dst, disp) -# define emit_call_i(pc, imm) emit_op_i(0xe8, pc, (imm) - (long)(pc) - 4) +/* XXX emit_call_i is slightly broken I think, stick to a shared parrot */ +# define emit_call_i(pc, imm) emit_op_i(0xe8, (pc), (long)(imm) - (long)(pc) - 4) # define emit_call_r(pc, reg) { \ emit_REX(pc, 0x0, reg); \ *(pc)++ = (char)0xff; \ emit_modrm(pc, b11, 0x2, reg); } + +# define emit_jump_r_r(pc, reg1, reg2) { \ + emit_REX(jit_info->native_ptr, reg1, reg2); \ + *(jit_info->native_ptr++) = (char)0xff; \ + emit_modrm(jit_info->native_ptr, b00, 0x4, b100); \ + emit_sib(jit_info->native_ptr, b00, reg1, reg2); \ +} + # define emit_leave(pc) *(pc++) = (char)0xc9; # define emit_ret(pc) *(pc++) = (char)0xc3; # define emit_mov_r_r(pc, dst, src) \ - emit_op_r_r(0x8B, pc, dst, src, 0) + emit_op_r_r(0x8B, pc, dst, src) + +/* mov [reg + offs], imm */ +# define emit_mov_mr_i(pc, reg, offs, imm) {\ + if (is32bit(imm)) { \ + emit_REX(pc, 0x0, reg); \ + *(pc++) = (char) 0xc7; \ + if (is8bit(offs)) { \ + emit_modrm(pc, b01, 0x0, reg); \ + *(pc++) = (char)offs; \ + } \ + else { \ + emit_modrm(pc, b10, 0x0, reg); \ + *(int *)pc = (int)offs; \ + pc += 4; \ + } \ + *(int *)pc = (int)(imm); \ + pc += 4; \ + } \ + else { \ + emit_mov_r_i(pc, ISR1, imm); \ + emit_mov_mr_r(pc, reg, offs, ISR1); \ + } \ + } -# define emit_mov_r_i(pc, reg, imm) do {\ +/* mov reg, imm */ +# define emit_mov_r_i(pc, reg, imm) {\ emit_op_r(0xb8, 1, pc, reg); \ *(long *)pc = (long)(imm); \ pc += 8; \ - } while (0) - -# define emit_sub_r_r(pc, dst, src) \ - emit_op_r_r(0x29, pc, src, dst, 0) + } +/* push reg */ # define emit_push_r(pc, reg) emit_op_r(0x50, 0, pc, reg) +/* pop reg */ # define emit_pop_r(pc, reg) emit_op_r(0x58, 0, pc, reg) +/* push imm */ # define emit_push_i(pc, imm) emit_op_i(0x68, pc, imm) -# define is8bit(c) ((c) >= -128 && (c) <= 127) +/* did you know, that (unsigned)0 is not an 8 bit value? */ +# define is8bit(c) (((long)c) >= -128 && ((long)c) <= 127) +# define is32bit(c) (((long)c) >= -2147483648 && ((long)c) <= 2147483647) # define emit_get_int_from_stack(pc, dst, disp) \ emit_mov_r_mr(pc, dst, RBP, disp) # define emit_send_int_to_stack(pc, src, disp) \ - emit_mov_mr_r(pc, RBP, src, disp) + emit_mov_mr_r(pc, RBP, disp, src) +/* mov dst, [src + disp] */ # define emit_mov_r_mr(pc, dst, src, disp) \ - emit_op_r_r(0x8b, pc, dst, src, disp) -# define emit_mov_mr_r(pc, dst, src, disp) \ - emit_op_r_r(0x89, pc, src, dst, disp) + emit_op_r_mr(0x8b, pc, dst, src, disp) +/* mov [dst + disp], src */ +# define emit_mov_mr_r(pc, dst, disp, src) \ + emit_op_r_mr(0x89, pc, src, dst, disp) +/* lea dst, [src + disp] */ # define emit_lea_r_mr(pc, dst, src, disp) \ - emit_op_r_r(0x8d, pc, src, dst, disp) + emit_op_r_mr(0x8d, pc, src, dst, disp) /* push rbp * mov rbp, rsp */ /* move rsp to rbp; set rbp to rsp */ -# define jit_emit_stack_frame_enter(pc) do { \ +# define jit_emit_stack_frame_enter(pc) { \ emit_push_r(pc, RBP); \ emit_mov_r_r(pc, RBP, RSP); \ - } while (0) + } /* pop rbp */ -# define jit_emit_stack_frame_leave(pc) do { \ +# define jit_emit_stack_frame_leave(pc) { \ emit_pop_r(pc, RBP); \ - } while (0) + } # define emit_jcc(pc, code, disp) { \ if (is8bit(disp)) { \ @@ -288,7 +347,7 @@ *(int *)pc = (int)disp; \ pc += 4; \ } \ -} while (0) +} typedef enum { jcc_jo, /* Jump if overflow */ @@ -324,7 +383,7 @@ } amd64_jcc_t; # define emit_test_r_r(pc, dst, src) \ - emit_op_r_r(0x85, pc, src, dst, 0) + emit_op_r_r(0x85, pc, src, dst) /* pop r15 * pop r14 @@ -346,6 +405,7 @@ #ifdef JIT_EMIT +#ifdef USE_OP_MAP_AND_CODE_START /* These two can be mixed together just like in the i386 jit. All the places I * can see this being called require it to be included, but for the moment I'm * keeping it as these macros. */ @@ -354,10 +414,10 @@ * and sets the OP_MAP register */ # define jit_emit_load_op_map(pc) { \ - emit_mov_r_mr(jit_info->native_ptr, OP_MAP, INTERP, (long)offsetof(Interp, code)); \ - emit_mov_r_mr(jit_info->native_ptr, OP_MAP, OP_MAP, (long)offsetof(PackFile_ByteCode, jit_info)); \ - emit_lea_r_mr(jit_info->native_ptr, OP_MAP, OP_MAP, (long)offsetof(Parrot_jit_info_t, arena)); \ - emit_mov_r_mr(jit_info->native_ptr, OP_MAP, OP_MAP, (long)offsetof(Parrot_jit_arena_t, op_map)); \ + emit_mov_r_mr(pc, OP_MAP, INTERP, (long)offsetof(Interp, code)); \ + emit_mov_r_mr(pc, OP_MAP, OP_MAP, (long)offsetof(PackFile_ByteCode, jit_info)); \ + emit_lea_r_mr(pc, OP_MAP, OP_MAP, (long)offsetof(Parrot_jit_info_t, arena)); \ + emit_mov_r_mr(pc, OP_MAP, OP_MAP, (long)offsetof(Parrot_jit_arena_t, op_map)); \ } /* @@ -365,16 +425,17 @@ * and sets the CODE_START register */ # define jit_emit_load_code_start(pc) { \ - emit_mov_r_mr(jit_info->native_ptr, CODE_START, INTERP, (long)offsetof(Interp, code)); \ - emit_mov_r_mr(jit_info->native_ptr, CODE_START, CODE_START, (long)offsetof(PackFile_Segment, data)); \ + emit_mov_r_mr(pc, CODE_START, INTERP, (long)offsetof(Interp, code)); \ + emit_mov_r_mr(pc, CODE_START, CODE_START, (long)offsetof(PackFile_Segment, data)); \ } +#endif /* USE_OP_MAP_AND_CODE_START */ + /* * emit code that calls a Parrot opcode function */ static void call_func(Parrot_jit_info_t *jit_info, void *addr) { - /* Despite what gcc says, I'm not afraid of the following line */ - if ((long)addr > INT_MAX) { + if ((long)addr > (long)INT_MAX) { /* Move the address into our scratch register R11 * We cannot use just the immediate form of call because the address * will be too large if we're using a shared parrot, but will be ok on @@ -393,6 +454,41 @@ } } +/* Jump to RAX, which needs to be set before calling this */ +static void +Parrot_emit_jump_to_rax(Parrot_jit_info_t *jit_info, Interp *interp) +{ + if (!jit_info->objfile) { +#ifdef USE_OP_MAP_AND_CODE_START + /* Get interp->code->base.data */ + jit_emit_load_code_start(jit_info->native_ptr); + emit_sub_r_r(jit_info->native_ptr, RAX, CODE_START); + + /* Get interp->code->jit_info->arena->op_map */ + jit_emit_load_op_map(jit_info->native_ptr); +#else + /* emit code that gets interp->code->base.data */ + emit_mov_r_mr(jit_info->native_ptr, RCX, INTERP, (long)offsetof(Interp, code)); + emit_mov_r_mr(jit_info->native_ptr, RDX, RCX, (long)offsetof(PackFile_Segment, data)); + emit_sub_r_r(jit_info->native_ptr, RAX, RDX); + + /* Reuse interp->code in RCX, get interp->code->jit_info->arena->op_map */ + emit_mov_r_mr(jit_info->native_ptr, RDX, RCX, (long)offsetof(PackFile_ByteCode, jit_info)); + emit_lea_r_mr(jit_info->native_ptr, RDX, RDX, (long)offsetof(Parrot_jit_info_t, arena)); + emit_mov_r_mr(jit_info->native_ptr, RDX, RDX, (long)offsetof(Parrot_jit_arena_t, op_map)); +#endif + } + /* Base pointer */ + emit_mov_r_mr(jit_info->native_ptr, RBX, INTERP, (long)offsetof(Interp, ctx.bp)); + +#ifdef USE_OP_MAP_AND_CODE_START + emit_jump_r_r(jit_info->native_ptr, RAX, OP_MAP); +#else + emit_jump_r_r(jit_info->native_ptr, RAX, RDX); +#endif +} + + #endif /* JIT_EMIT */ #if JIT_EMIT == 2 @@ -430,7 +526,6 @@ call_func(jit_info, (void (*)(void))interp->op_func_table[cur_op]); - } /* @@ -443,21 +538,7 @@ Interp *interp) { Parrot_jit_normal_op(jit_info, interp); - /* Parrot_emit_jump_to_eax */ - if (!jit_info->objfile) { - /* Get interp->code->base.data */ - jit_emit_load_code_start(jit_info->native_ptr); - emit_sub_r_r(jit_info->native_ptr, RAX, CODE_START); - - /* Get interp->code->jit_info->arena->op_map */ - jit_emit_load_op_map(jit_info->native_ptr); - } - - emit_REX(jit_info->native_ptr, 0x0, OP_MAP); - *(jit_info->native_ptr++) = (char)0xff; - emit_modrm(jit_info->native_ptr, b00, 0x4, RSP); - emit_sib(jit_info->native_ptr, b00, RAX, OP_MAP); - /* END Parrot_emit_jump_to_eax */ + Parrot_emit_jump_to_rax(jit_info, interp); } /* @@ -481,32 +562,19 @@ /* Quick fixup, but we know it's 12, anyway it needs to be a byte */ emit_jcc(jit_info->native_ptr, jcc_jnz, 0x00); - sav_ptr = (long)jit_info->native_ptr - 1; + sav_ptr = (void *)(jit_info->native_ptr - 1); Parrot_end_jit(jit_info, interp); *sav_ptr = (char)(jit_info->native_ptr - sav_ptr - 1); - /* Parrot_emit_jump_to_eax */ - if (!jit_info->objfile) { - /* Get interp->code->base.data */ - jit_emit_load_code_start(jit_info->native_ptr); - emit_sub_r_r(jit_info->native_ptr, RAX, CODE_START); - - /* Get interp->code->jit_info->arena->op_map */ - jit_emit_load_op_map(jit_info->native_ptr); - } - - emit_REX(jit_info->native_ptr, 0x0, OP_MAP); - *(jit_info->native_ptr++) = (char)0xff; - emit_modrm(jit_info->native_ptr, b00, 0x4, RSP); - emit_sib(jit_info->native_ptr, b00, RAX, OP_MAP); - /* END Parrot_emit_jump_to_eax */ - + Parrot_emit_jump_to_rax(jit_info, interp); } #endif /* JIT_EMIT == 2 */ #if JIT_EMIT == 0 +# define REQUIRES_CONSTANT_POOL 0 + /* * emit stack frame according to ABI * preserve mapped registers according to ABI @@ -544,22 +612,7 @@ emit_mov_r_r(jit_info->native_ptr, R15, RDI); emit_mov_r_r(jit_info->native_ptr, RAX, RSI); - - /* Parrot_emit_jump_to_eax */ - if (!jit_info->objfile) { - /* Get interp->code->base.data */ - jit_emit_load_code_start(jit_info->native_ptr); - emit_sub_r_r(jit_info->native_ptr, RAX, CODE_START); - - /* Get interp->code->jit_info->arena->op_map */ - jit_emit_load_op_map(jit_info->native_ptr); - } - - emit_REX(jit_info->native_ptr, 0x0, OP_MAP); - *(jit_info->native_ptr++) = (char)0xff; - emit_modrm(jit_info->native_ptr, b00, 0x4, RSP); - emit_sib(jit_info->native_ptr, b00, RAX, OP_MAP); - /* END Parrot_emit_jump_to_eax */ + Parrot_emit_jump_to_rax(jit_info, interp); } @@ -590,32 +643,32 @@ /* set mem to reg */ static void -jit_mov_mr_n_offs(Parrot_jit_info_t *jit_info, +jit_mov_mr_n_offs(Interp *interp, Parrot_jit_info_t *jit_info, int base_reg, INTVAL offs, int src_reg) { emit_64nop(jit_info->native_ptr); } static void -jit_mov_mr_offs(Parrot_jit_info_t *jit_info, +jit_mov_mr_offs(Interp *interp, Parrot_jit_info_t *jit_info, int base_reg, INTVAL offs, int src_reg) { - emit_get_int_from_stack(jit_info->native_ptr, src_reg, offs); + emit_mov_mr_r(jit_info->native_ptr, base_reg, offs, src_reg); } /* set reg to mem */ static void -jit_mov_rm_n_offs(Parrot_jit_info_t *jit_info, +jit_mov_rm_n_offs(Interp *interp, Parrot_jit_info_t *jit_info, int dst_reg, int base_reg, INTVAL offs) { emit_64nop(jit_info->native_ptr); } static void -jit_mov_rm_offs(Parrot_jit_info_t *jit_info, +jit_mov_rm_offs(Interp *interp, Parrot_jit_info_t *jit_info, int dst_reg, int base_reg, INTVAL offs) { - emit_send_int_to_stack(jit_info->native_ptr, dst_reg, offs); + emit_mov_r_mr(jit_info->native_ptr, dst_reg, base_reg, offs); } /* @@ -634,10 +687,26 @@ static const char intval_map[INT_REGISTERS_TO_MAP] = { - /* Preserved */ - RBP, RBX, R12, R13, R14, R15, + + /* Preserved, we'd have more, but keeping code_start, op_map, interp, + * and the base pointer in registers takes away four, not to mention + * RBP which is used for easier debugging. That's five registers used + * for one reason or another at the moment. I'm not sure if it's worth + * it yet. */ + /* + * RBX for Interp->ctx.bp + * RBP for debugging, can add it to the preserved list + * R12 + * R13 for CODE_START + * R14 for OP_MAP + * R15 for INTERP + */ + R12, +#ifndef USE_OP_MAP_AND_CODE_START + R13, R14, +#endif /* Unpreserved */ - RAX, RCX, RDX, RSP, RSI, RDI, R8, R9, R10, R11, + RSI, RDI, R8, R9, R10, RCX, RDX }; static const char floatval_map[FLOAT_REGISTERS_TO_MAP] = @@ -652,8 +721,10 @@ */ static const jit_arch_info arch_info = { + /* CPU <- Parrot reg move functions */ jit_mov_rm_offs, jit_mov_rm_n_offs, + /* Parrot <- CPU reg move functions */ jit_mov_mr_offs, jit_mov_mr_n_offs, Parrot_jit_dofixup, @@ -662,31 +733,36 @@ /* JIT_CODE_FILE */ { Parrot_jit_begin, /* emit code prologue */ - 0, - 0, - intval_map, - 0, /* mapped float regs */ - 0, /* all preserved */ - floatval_map - }, +#ifdef USE_OP_MAP_AND_CODE_START + 8, + 1, +#else + 10, /* mapped int */ + 3, /* preserved int */ +#endif + intval_map, /* which ints mapped */ + 0, /* mapped float */ + 0, /* preserved float */ + floatval_map /* which floats mapped */ + }, /* JIT_CODE_SUB */ { - Parrot_jit_begin_sub, /* emit code prologue */ - 0, /* 7 mapped ints */ - 0, /* all volatile */ + Parrot_jit_begin_sub, + 9, + 8, intval_map, - 0, /* mapped float regs */ - 0, /* all volatile */ + 0, + 0, floatval_map }, /* JIT_CODE_SUB_REGS_ONLY */ { /*Parrot_jit_begin_sub_regs*/0, /* emit code prologue */ - 16, /* 7 mapped ints */ - 6, /* all volatile */ + 16, + 6, intval_map, - 0, /* 12 mapped float regs */ - 0, /* all volatile */ + 0, + 0, floatval_map } }