Author: jisom Date: Sun Aug 5 13:06:58 2007 New Revision: 20498 Modified: trunk/src/jit/amd64/core.jit trunk/src/jit/amd64/jit_emit.h
Log: [jit/amd64] Mapped floats now Modified: trunk/src/jit/amd64/core.jit ============================================================================== --- trunk/src/jit/amd64/core.jit (original) +++ trunk/src/jit/amd64/core.jit Sun Aug 5 13:06:58 2007 @@ -14,26 +14,25 @@ TEMPLATE Parrot_set_x_x { if (MAP[1] && MAP[2]) { - emit_mov_r_r(NATIVECODE, MAP[1], MAP[2]); + emit_mov_<R>_<R>(NATIVECODE, MAP[1], MAP[2]); } else if (MAP[1]) { - emit_mov_r_mr(NATIVECODE, MAP[1], RBX, ROFFS_INT(2)); + emit_mov_<R>_m<R>(NATIVECODE, MAP[1], RBX, ROFFS_INT(2)); } else if (MAP[2]) { - emit_mov_mr_r(NATIVECODE, RBX, ROFFS_INT(1), MAP[2]); + emit_mov_m<R>_<R>(NATIVECODE, RBX, ROFFS_INT(1), MAP[2]); } else { - emit_mov_r_mr(NATIVECODE, ISR1, RBX, ROFFS_INT(2)); - emit_mov_mr_r(NATIVECODE, RBX, ROFFS_INT(1), ISR1); + emit_mov_<R>_m<R>(NATIVECODE, ISR1, RBX, ROFFS_INT(2)); + emit_mov_m<R>_<R>(NATIVECODE, RBX, ROFFS_INT(1), ISR1); } } Parrot_set_i_i { - Parrot_set_x_x s/<_N>/_i/ + Parrot_set_x_x s/<R>/r/ } - Parrot_set_i_ic { if (MAP[1]) { emit_mov_r_i(NATIVECODE, MAP[1], *INT_CONST[2]); @@ -43,35 +42,92 @@ } } +Parrot_set_n_n { + Parrot_set_x_x s/<R>/x/ s/ISR/FSR/ s/INT/NUM/ +} + +Parrot_set_n_ic { + if (MAP[1]) { + emit_mov_r_i(NATIVECODE, ISR1, *INT_CONST[2]); + emit_cvtsi2sd_x_r(NATIVECODE, MAP[1], ISR1); + } + else { + emit_mov_r_i(NATIVECODE, ISR1, *INT_CONST[2]); + emit_cvtsi2sd_x_r(NATIVECODE, FSR1, ISR1); + emit_mov_mx_x(NATIVECODE, RBX, ROFFS_NUM(1), FSR1); + } +} + +Parrot_set_n_nc { + if (MAP[1]) { + /* I want something like this + * emit_mov_x_mx(NATIVECODE, MAP[1], RBX, ROFFS_NUM(2)); + * but this works, even if ugly + * It's either this or dereferencing + * ((interp->ctx).state)->constants[cur_opcode[2]]->u.number + */ + emit_mov_r_i(NATIVECODE, ISR1, 0); + emit_mov_x_mx(NATIVECODE, MAP[1], ISR1, &NUM_CONST[2]); + } + else { + /* unmapped floats is screwy at the moment, but we get 15 plus a + * scratch */ + emit_mov_r_i(NATIVECODE, ISR1, 0); + emit_mov_x_mx(NATIVECODE, FSR1, ISR1, &NUM_CONST[2]); + emit_mov_mx_x(NATIVECODE, RBX, ROFFS_NUM(1), FSR1); + } +} + + TEMPLATE Parrot_binop_x_x { if (MAP[1] && MAP[2]) { - emit_<op>_r_r(NATIVECODE, MAP[1], MAP[2]); + emit_<op>_<R>_<R>(NATIVECODE, MAP[1], MAP[2]); } else if (MAP[1]) { - emit_<op>_r_mr(NATIVECODE, MAP[1], RBX, ROFFS_INT(2)); + emit_<op>_<R>_m<R>(NATIVECODE, MAP[1], RBX, ROFFS_INT(2)); } else if (MAP[2]) { -# ifdef emit_<op>_mr_r - emit_<op>_mr_r(NATIVECODE, RBX, ROFFS_INT(1), MAP[2]); +# ifdef emit_<op>_m<R>_<R> + emit_<op>_m<R>_<R>(NATIVECODE, RBX, ROFFS_INT(1), MAP[2]); # else - emit_mov_r_mr(NATIVECODE, ISR1, RBX, ROFFS_INT(1)); - emit_<op>_r_r(NATIVECODE, ISR1, MAP[2]); - emit_mov_mr_r(NATIVECODE, RBX, ROFFS_INT(1), ISR1); + emit_mov_<R>_m<R>(NATIVECODE, ISR1, RBX, ROFFS_INT(1)); + emit_<op>_<R>_<R>(NATIVECODE, ISR1, MAP[2]); + emit_mov_m<R>_<R>(NATIVECODE, RBX, ROFFS_INT(1), ISR1); # endif } else { - emit_mov_r_mr(NATIVECODE, ISR1, RBX, ROFFS_INT(1)); - emit_<op>_r_mr(NATIVECODE, ISR1, RBX, ROFFS_INT(2)); - emit_mov_mr_r(NATIVECODE, RBX, ROFFS_INT(1), ISR1); + emit_mov_<R>_m<R>(NATIVECODE, ISR1, RBX, ROFFS_INT(1)); + emit_<op>_<R>_m<R>(NATIVECODE, ISR1, RBX, ROFFS_INT(2)); + emit_mov_m<R>_<R>(NATIVECODE, RBX, ROFFS_INT(1), ISR1); } } Parrot_add_i_i { - Parrot_binop_x_x s/<op>/add/ + Parrot_binop_x_x s/<op>/add/ s/<R>/r/ +} + +Parrot_sub_i_i { + Parrot_binop_x_x s/<op>/sub/ s/<R>/r/ +} + +Parrot_add_n_n { + Parrot_binop_x_x s/<op>/add/ s/<R>/x/ s/ISR/FSR/ s/INT/NUM/ +} + +Parrot_sub_n_n { + Parrot_binop_x_x s/<op>/sub/ s/<R>/x/ s/ISR/FSR/ s/INT/NUM/ +} + +Parrot_mul_n_n { + Parrot_binop_x_x s/<op>/mul/ s/<R>/x/ s/ISR/FSR/ s/INT/NUM/ +} + +Parrot_div_n_n { + Parrot_binop_x_x s/<op>/div/ s/<R>/x/ s/ISR/FSR/ s/INT/NUM/ } -Parrot_sub { - Parrot_binop_x_x s/<op>/sub/ +Parrot_sqrt_n_n { + Parrot_binop_x_x s/<op>/sqrt/ s/<R>/x/ s/ISR/FSR/ s/INT/NUM/ } Modified: trunk/src/jit/amd64/jit_emit.h ============================================================================== --- trunk/src/jit/amd64/jit_emit.h (original) +++ trunk/src/jit/amd64/jit_emit.h Sun Aug 5 13:06:58 2007 @@ -73,10 +73,12 @@ #include <limits.h> void Parrot_jit_begin(Parrot_jit_info_t *, Interp *); +static const char* div_by_zero = "Divide by zero"; + /* This is used for testing whether or not keeping these two in registers is an * improvement or not. This file may need to be expanded further to know for - * sure. */ + * sure. But so far, there appears a 20 second improvement on my 2GHz. */ #undef USE_OP_MAP_AND_CODE_START @@ -85,24 +87,24 @@ * reserve some for special purposes */ typedef enum { - RAX, /* return values */ + RAX, RCX, RDX, - RBX, /* parrot base pointer */ - RSP, /* stack pointer */ - RBP, /* base pointer */ + RBX, + RSP, + RBP, RSI, RDI, R8, R9, R10, - R11, /* Scratch */ + R11, ISR1 = R11, R12, R13, #ifdef USE_OP_MAP_AND_CODE_START - CODE_START = R13, /* Don't forget, there are very weird pecularities */ + CODE_START = R13, #endif R14, #ifdef USE_OP_MAP_AND_CODE_START @@ -112,6 +114,25 @@ INTERP = R15 } amd64_iregister_t; +/* Register usage + * RAX return values + * RCX allocated, unpreserved + * RDX allocated, unpreserved + * RBX parrot register frame pointer + * RSP stack pointer + * RBP base pointer + * RSI allocated, unpreserved + * RDI allocated, unpreserved + + * R8 allocated, unpreserved + * R9 allocated, unpreserved + * R10 allocated, unpreserved + * R11 Scratch + * R12 allocated, preserved + * R13 allocated, preserved, or code_start + * R14 allocated, preserved, or op_map + * R15 interp + */ /* * If your arch doesn't have that much register available, you @@ -130,7 +151,9 @@ * define floating point register too, if there are some */ typedef enum { - XMM0, XMMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM0, + FSR1 = XMM0, + XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15 } amd64_fregister_t; @@ -162,9 +185,14 @@ #define b111 7 /* rex.[wrxb], incomplete but oh well */ -# define emit_REX(pc, dst, src) { \ +# define emit_rex64(pc, dst, src) { \ *(pc++) = (char)(0x48 | (((dst) & 8) >> 1) | (((src) & 8) >> 3)); } +# define emit_rex(pc, dst, src) { \ + if ((dst) & 8 || (src) & 8) { \ + *(pc++) = (char)(0x40 | (((dst) & 8) >> 1) | (((src) & 8) >> 3)); }} + +/* Use a 0x66 prefix for increased padding */ # define emit_nop(pc) { \ *(pc++) = (char)(0x90); } @@ -180,25 +208,25 @@ *(pc++) = (char)((scale << 6) | ((index & 7) << 3) | (base & 7)); } /* 0xXX +rq */ -# define emit_op_r(op, rexreq, pc, reg) { \ - if (rexreq) { \ - *(pc++) = (char)(0x48 | (((reg) & 8) >> 3)); \ - } \ - else if ((reg) & 8) { \ - *(pc++) = (char)(0x40 | (((reg) & 8) >> 3)); \ - } \ +# define emit_op_r(op, pc, reg) { \ + emit_rex64(pc, 0x0, reg); \ + *(pc++) = (char)((op) | ((reg) & 7)); \ + } + +# define emit_64op_r(op, pc, reg) { \ + emit_rex(pc, 0x0, reg); \ *(pc++) = (char)((op) | ((reg) & 7)); \ } /* 0xXX /r */ # define emit_op_r_r(op, pc, dst, src) { \ - emit_REX(pc, dst, src); \ + emit_rex64(pc, dst, src); \ *(pc++) = (char) op; \ emit_modrm(pc, b11, dst, src); \ } # define emit_op_r_mr(op, pc, dst, src, disp) { \ - emit_REX(pc, dst, src); \ + emit_rex64(pc, dst, src); \ *(pc++) = (char) op; \ if (is8bit(disp)) { \ emit_modrm(pc, b01, dst, src); \ @@ -219,7 +247,7 @@ # define emit_op_r_i(pc, op, op2, code, dst, imm) { \ - emit_REX(pc, dst, 0x0); \ + emit_rex64(pc, dst, 0x0); \ if (is8bit(imm)) { \ *(pc++) = (char) op; \ emit_modrm(pc, b11, code, dst); \ @@ -244,18 +272,20 @@ # define emit_sub_r_mr(pc, dst, src, disp) emit_op_r_mr(0x29, pc, dst, src, disp) # define emit_sub_mr_r(pc, dst, disp, src) emit_op_r_mr(0x2b, pc, src, dst, disp) + /* XXX emit_call_i is slightly broken I think, stick to a shared parrot */ -# define emit_call_i(pc, imm) emit_op_i(0xe8, (pc), (long)(imm) - (long)(pc) - 4) +# define emit_call_i(pc, imm) { emit_op_i(0xe8, (pc), ((long)(imm) - (long)(pc) - 4));\ + printf("Calling address %p\n", imm); } # define emit_call_r(pc, reg) { \ - emit_REX(pc, 0x0, reg); \ + emit_rex64(pc, 0x0, reg); \ *(pc)++ = (char)0xff; \ emit_modrm(pc, b11, 0x2, reg); } # define emit_jump_r_r(pc, reg1, reg2) { \ - emit_REX(jit_info->native_ptr, reg1, reg2); \ - *(jit_info->native_ptr++) = (char)0xff; \ - emit_modrm(jit_info->native_ptr, b00, 0x4, b100); \ - emit_sib(jit_info->native_ptr, b00, reg1, reg2); \ + emit_rex(pc, reg1, reg2); \ + *(pc++) = (char)0xff; \ + emit_modrm(pc, b00, 0x4, b100); \ + emit_sib(pc, b00, reg1, reg2); \ } # define emit_leave(pc) *(pc++) = (char)0xc9; @@ -267,7 +297,7 @@ /* mov [reg + offs], imm */ # define emit_mov_mr_i(pc, reg, offs, imm) {\ if (is32bit(imm)) { \ - emit_REX(pc, 0x0, reg); \ + emit_rex64(pc, 0x0, reg); \ *(pc++) = (char) 0xc7; \ if (is8bit(offs)) { \ emit_modrm(pc, b01, 0x0, reg); \ @@ -289,15 +319,15 @@ /* mov reg, imm */ # define emit_mov_r_i(pc, reg, imm) {\ - emit_op_r(0xb8, 1, pc, reg); \ + emit_op_r(0xb8, pc, reg); \ *(long *)pc = (long)(imm); \ pc += 8; \ } /* push reg */ -# define emit_push_r(pc, reg) emit_op_r(0x50, 0, pc, reg) +# define emit_push_r(pc, reg) emit_64op_r(0x50, pc, reg) /* pop reg */ -# define emit_pop_r(pc, reg) emit_op_r(0x58, 0, pc, reg) +# define emit_pop_r(pc, reg) emit_64op_r(0x58, pc, reg) /* push imm */ # define emit_push_i(pc, imm) emit_op_i(0x68, pc, imm) @@ -403,6 +433,123 @@ emit_ret(pc); \ } + +/************************************** + * Floating Point * + **************************************/ + +# define emit_op_x_x(prefix, op, pc, dst, src) { \ + *(pc++) = (char) prefix; \ + emit_rex(pc, dst, src); \ + *(pc++) = (char) 0x0f; \ + *(pc++) = (char) op; \ + emit_modrm(pc, b11, dst, src); \ +} + +# define emit_op_x_mx(prefix, op, pc, dst, src, offs) { \ + *(pc++) = (char) prefix; \ + emit_rex(pc, dst, src); \ + *(pc++) = (char) 0x0f; \ + *(pc++) = (char) op; \ + if (is8bit(offs)) { \ + emit_modrm(pc, b01, dst, src); \ + *(pc++) = (char)offs; \ + } \ + else { \ + emit_modrm(pc, b10, dst, src); \ + *(int *)pc = (int)offs; \ + pc += 4; \ + } \ +} + +# define emit_mov_x_x(pc, dst, src) emit_op_x_x(0x66, 0x28, pc, dst, src) + +# define emit_mov_x_mx(pc, dst, src, offs) emit_op_x_mx(0xf2, 0x10, pc, dst, src, offs) +# define emit_mov_mx_x(pc, dst, offs, src) emit_op_x_mx(0xf2, 0x11, pc, src, dst, offs) + +/* Intended to zero a register */ +# define emit_movhlps_x_x(pc, dst, src) { \ + emit_rex(pc, src, dst); \ + *(pc++) = (char) 0x0f; \ + *(pc++) = (char) 0x12; \ + emit_modrm(pc, b11, src, dst); \ +} + +# define emit_movd_r_x(pc, dst, src) { \ + *(pc++) = (char) 0x66; \ + emit_rex64(pc, dst, src); \ + *(pc++) = (char) 0x0f; \ + *(pc++) = (char) 0x6e; \ + emit_modrm(pc, b11, dst, src); \ +} + +# define emit_movd_x_r(pc, dst, src) { \ + *(pc++) = (char) 0x66; \ + emit_rex64(pc, src, dst); \ + *(pc++) = (char) 0x0f; \ + *(pc++) = (char) 0x7e; \ + emit_modrm(pc, b11, src, dst); \ +} + +# define emit_comisd_x_x(pc, dst, src) emit_op_x_x(0x66, 0x2f, pc, dst, src) +# define emit_comisd_x_mx(pc, dst, src, offs) emit_op_x_mx(0x66, 0x2f, pc, dst, src, offs) + +# define emit_add_x_x(pc, dst, src) emit_op_x_x(0xf2, 0x58, pc, dst, src) +# define emit_add_x_mx(pc, dst, src, offs) emit_op_x_mx(0xf2, 0x58, pc, dst, src, offs) + +# define emit_sub_x_x(pc, dst, src) emit_op_x_x(0xf2, 0x5c, pc, dst, src) +# define emit_sub_x_mx(pc, dst, src, offs) emit_op_x_mx(0xf2, 0x5c, pc, dst, src, offs) + +# define emit_mul_x_x(pc, dst, src) emit_op_x_x(0xf2, 0x59, pc, dst, src) +# define emit_mul_x_mx(pc, dst, src, offs) emit_op_x_mx(0xf2, 0x59, pc, dst, src, offs) + +# define emit_div_check_zero_x(pc, reg) { \ + char *sav_ptr; \ + emit_movhlps_x_x(pc, FSR1, FSR1); \ + emit_comisd_x_x(pc, FSR1, reg); \ + emit_jcc(pc, jcc_jnz, 0x00); \ + sav_ptr = (void *)(pc - 1); \ + emit_mov_r_r(pc, RDI, INTERP); \ + emit_mov_r_i(pc, RSI, 0); \ + emit_mov_r_i(pc, RDX, E_ZeroDivisionError); \ + emit_mov_r_i(pc, RCX, div_by_zero); \ + /* This assumes that jit_info is defined, if it's not, the code's not "consistent" */ \ + call_func(jit_info, (void *) real_exception); \ + *sav_ptr = (char)(pc - sav_ptr - 1); \ +} + +# define emit_div_check_zero_mx(pc, reg, offs) { \ + char *sav_ptr; \ + emit_movhlps_x_x(pc, FSR1, FSR1); \ + emit_comisd_x_mx(pc, FSR1, reg, offs); \ + emit_jcc(pc, jcc_jnz, 0x00); \ + sav_ptr = (void *)(pc - 1); \ + emit_mov_r_r(pc, RDI, INTERP); \ + emit_mov_r_i(pc, RSI, 0); \ + emit_mov_r_i(pc, RDX, E_ZeroDivisionError); \ + emit_mov_r_i(pc, RCX, div_by_zero); \ + /* This assumes that jit_info is defined, if it's not, the code's not "consistent" */ \ + call_func(jit_info, (void *) real_exception); \ + *sav_ptr = (char)(pc - sav_ptr - 1); \ +} + +# define emit_div_x_x(pc, dst, src) { \ + emit_div_check_zero_x(pc, src); \ + emit_op_x_x(0xf2, 0x5e, pc, dst, src); \ +} +# define emit_div_x_mx(pc, dst, src, offs) { \ + emit_div_check_zero_mx(pc, src, offs); \ + emit_op_x_mx(0xf2, 0x5e, pc, dst, src, offs); \ +} + +# define emit_sqrt_x_x(pc, dst, src) emit_op_x_x(0xf2, 0x51, pc, dst, src) +# define emit_sqrt_x_mx(pc, dst, src, offs) emit_op_x_mx(0xf2, 0x51, pc, dst, src, offs) + +# define emit_cvtsi2sd_x_mr(pc, dst, src, offs) emit_op_x_mx(0xf2, 0x2a, pc, dst, src, offs) +# define emit_cvtsi2sd_x_r(pc, dst, src) emit_op_x_x(0xf2, 0x2a, pc, dst, src) + +/*********************************************************/ + #ifdef JIT_EMIT # ifdef USE_OP_MAP_AND_CODE_START @@ -646,11 +793,11 @@ jit_mov_mr_n_offs(Interp *interp, Parrot_jit_info_t *jit_info, int base_reg, INTVAL offs, int src_reg) { - emit_64nop(jit_info->native_ptr); + emit_mov_mx_x(jit_info->native_ptr, base_reg, offs, src_reg); } static void -jit_mov_mr_offs(Interp *interp, Parrot_jit_info_t *jit_info, +jit_mov_mr_r(Interp *interp, Parrot_jit_info_t *jit_info, int base_reg, INTVAL offs, int src_reg) { emit_mov_mr_r(jit_info->native_ptr, base_reg, offs, src_reg); @@ -661,11 +808,11 @@ jit_mov_rm_n_offs(Interp *interp, Parrot_jit_info_t *jit_info, int dst_reg, int base_reg, INTVAL offs) { - emit_64nop(jit_info->native_ptr); + emit_mov_x_mx(jit_info->native_ptr, dst_reg, base_reg, offs); } static void -jit_mov_rm_offs(Interp *interp, Parrot_jit_info_t *jit_info, +jit_mov_r_mr(Interp *interp, Parrot_jit_info_t *jit_info, int dst_reg, int base_reg, INTVAL offs) { emit_mov_r_mr(jit_info->native_ptr, dst_reg, base_reg, offs); @@ -676,8 +823,8 @@ * jit core */ -# define INT_REGISTERS_TO_MAP 16 -# define FLOAT_REGISTERS_TO_MAP 16 +# define INT_REGISTERS_TO_MAP 10 +# define FLOAT_REGISTERS_TO_MAP 15 /* * enumerate these mapped registers @@ -706,12 +853,12 @@ R13, R14, # endif /* Unpreserved */ - RSI, RDI, R8, R9, R10, RCX, RDX + RCX, RDX, RSI, RDI, R8, R9, R10 }; static const char floatval_map[FLOAT_REGISTERS_TO_MAP] = { - XMM0, XMMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15 }; @@ -722,10 +869,10 @@ static const jit_arch_info arch_info = { /* CPU <- Parrot reg move functions */ - jit_mov_rm_offs, + jit_mov_r_mr, jit_mov_rm_n_offs, /* Parrot <- CPU reg move functions */ - jit_mov_mr_offs, + jit_mov_mr_r, jit_mov_mr_n_offs, Parrot_jit_dofixup, (jit_arch_f)0, @@ -741,7 +888,7 @@ 3, /* preserved int */ # endif intval_map, /* which ints mapped */ - 0, /* mapped float */ + 15, /* mapped float */ 0, /* preserved float */ floatval_map /* which floats mapped */ },