Firstly, thank you very much for your response quickly! On 5/12/15 00:55, Richard Henderson wrote: > On 05/10/2015 03:45 PM, Chen Gang wrote: >> +static void gen_cmpltsi(struct DisasContext *dc, >> + uint8_t rdst, uint8_t rsrc, int8_t imm8) >> +{ >> + qemu_log_mask(CPU_LOG_TB_IN_ASM, "cmpltsi r%d, r%d, %d\n", >> + rdst, rsrc, imm8); >> + tcg_gen_setcondi_i64(TCG_COND_LTU, dest_gr(dc, rdst), load_gr(dc, rsrc), >> + (int64_t)imm8); >> +} >> + >> +static void gen_cmpltui(struct DisasContext *dc, >> + uint8_t rdst, uint8_t rsrc, uint8_t imm8) >> +{ >> + qemu_log_mask(CPU_LOG_TB_IN_ASM, "cmpltui r%d, r%d, %d\n", >> + rdst, rsrc, imm8); >> + tcg_gen_setcondi_i64(TCG_COND_LTU, >> + dest_gr(dc, rdst), load_gr(dc, rsrc), imm8); >> +} >> + >> +static void gen_cmpeqi(struct DisasContext *dc, >> + uint8_t rdst, uint8_t rsrc, uint8_t imm8) >> +{ >> + qemu_log_mask(CPU_LOG_TB_IN_ASM, "cmpeqi r%d, r%d, %d\n", rdst, rsrc, >> imm8); >> + tcg_gen_setcondi_i64(TCG_COND_EQ, >> + dest_gr(dc, rdst), load_gr(dc, rsrc), imm8); >> +} > > Merge these. >
OK, thanks. >> + >> +static void gen_cmp(struct DisasContext *dc, >> + uint8_t rdst, uint8_t rsrc, uint8_t rsrcb, TCGCond cond) >> +{ >> + const char *prefix; >> + >> + switch (cond) { >> + case TCG_COND_EQ: >> + prefix = "eq"; >> + break; >> + case TCG_COND_LE: >> + prefix = "les"; >> + break; >> + case TCG_COND_LEU: >> + prefix = "leu"; >> + break; >> + case TCG_COND_LT: >> + prefix = "lts"; >> + break; >> + case TCG_COND_LTU: >> + prefix = "ltu"; >> + break; >> + case TCG_COND_NE: >> + prefix = "ne"; >> + break; >> + default: >> + dc->exception = TILEGX_EXCP_OPCODE_UNKNOWN; >> + return; >> + } >> + >> + qemu_log_mask(CPU_LOG_TB_IN_ASM, "cmp%s r%d, r%d, r%d\n", >> + prefix, rdst, rsrc, rsrcb); > > Better to just pass down the opcode name with the TCGCond rather than trying > to > recreate it. Then there's no need for a switch, nor a need for a confusing > TILEGX_EXCP_OPCODE_UNKNOWN path. > >> +static void gen_exch(struct DisasContext *dc, >> + uint8_t rdst, uint8_t rsrc, uint8_t rsrcb, int excp) >> +{ >> + const char *prefix, *width; >> + >> + switch (excp) { >> + case TILEGX_EXCP_OPCODE_EXCH4: >> + prefix = ""; >> + width = "4"; >> + break; >> + case TILEGX_EXCP_OPCODE_EXCH: >> + prefix = ""; >> + width = ""; >> + break; >> + case TILEGX_EXCP_OPCODE_CMPEXCH4: >> + prefix = "cmp"; >> + width = "4"; >> + break; >> + case TILEGX_EXCP_OPCODE_CMPEXCH: >> + prefix = "cmp"; >> + width = ""; >> + break; >> + default: >> + dc->exception = TILEGX_EXCP_OPCODE_UNKNOWN; >> + return; >> + } > > Likewise. > OK, thanks. >> +static void gen_v1cmpeqi(struct DisasContext *dc, >> + uint8_t rdst, uint8_t rsrc, uint8_t imm8) >> +{ >> + int count; >> + TCGv vdst = dest_gr(dc, rdst); >> + TCGv tmp = tcg_temp_new_i64(); >> + >> + qemu_log_mask(CPU_LOG_TB_IN_ASM, "v1cmpeqi r%d, r%d, %d\n", >> + rdst, rsrc, imm8); >> + >> + tcg_gen_movi_i64(vdst, 0); >> + >> + for (count = 0; count < 8; count++) { >> + tcg_gen_shri_i64(tmp, load_gr(dc, rsrc), (8 - count - 1) * 8); >> + tcg_gen_andi_i64(tmp, tmp, 0xff); >> + tcg_gen_setcondi_i64(TCG_COND_EQ, tmp, tmp, imm8); >> + tcg_gen_or_i64(vdst, vdst, tmp); >> + tcg_gen_shli_i64(vdst, vdst, 8); > > For all of these vector instructions, I would encourage you to use helpers to > extract and insert values. Extraction has little choice but to use a shift > and > a mask, as you use here. But insertion can use tcg_gen_deposit_i64. I think > that is a lot easier to reason with than your construction here which > sequentially shifts vdst. > > E.g. > > static inline void > extract_v1(TCGv out, TCGv in, unsigned byte) > { > tcg_gen_shri_i64(out, in, byte * 8); > tcg_gen_ext8u_i64(out, out); > } > > static inline void > insert_v1(TCGv out, TCGv in, unsigned byte) > { > tcg_gen_deposit_i64(out, out, in, byte * 8, 8); > } > > > This loop then becomes > > TCGv vsrc = load_gr(dc, src); > for (count = 0; count < 8; ++count) { > extract_v1(tmp, vsrc, count); > tcg_gen_setcondi_i64(TCG_COND_EQ, tmp, tmp, imm8); > insert_v1(vdst, tmp, count); > } > >> +static void gen_v1int_l(struct DisasContext *dc, >> + uint8_t rdst, uint8_t rsrc, uint8_t rsrcb) >> +{ >> + int count; >> + TCGv vdst = dest_gr(dc, rdst); >> + TCGv tmp = tcg_temp_new_i64(); >> + >> + qemu_log_mask(CPU_LOG_TB_IN_ASM, "v1int_l r%d, r%d, r%d\n", >> + rdst, rsrc, rsrcb); >> + >> + tcg_gen_movi_i64(vdst, 0); >> + for (count = 0; count < 4; count++) { >> + >> + tcg_gen_shli_i64(vdst, vdst, 8); >> + >> + tcg_gen_shri_i64(tmp, load_gr(dc, rsrc), (4 - count - 1) * 8); >> + tcg_gen_andi_i64(tmp, tmp, 0xff); >> + tcg_gen_or_i64(vdst, vdst, tmp); >> + tcg_gen_shli_i64(vdst, vdst, 8); >> + >> + tcg_gen_shri_i64(tmp, load_gr(dc, rsrcb), (4 - count - 1) * 8); >> + tcg_gen_andi_i64(tmp, tmp, 0xff); >> + tcg_gen_or_i64(vdst, vdst, tmp); >> + } >> + tcg_temp_free_i64(tmp); > > TCGv vsrc = load_gr(dc, rsrc); > TCGv vsrcb = load_gr(dc, rsrcb); > > for (count = 0; count < 4; ++count) { > extract_v1(tmp, vsrc, count); > insert_v1(vdst, tmp, count * 2); > extract_v1(tmp, vsrcb, count); > insert_v1(vdst, tmp, count * 2 + 1); > } > > OK, thanks. >> +} >> + >> +/* >> + * Functional Description >> + * >> + * uint64_t output = 0; >> + * uint32_t counter; >> + * for (counter = 0; counter < (WORD_SIZE / 32); counter++) >> + * { >> + * bool asel = ((counter & 1) == 1); >> + * int in_sel = 0 + counter / 2; >> + * int32_t srca = get4Byte (rf[SrcA], in_sel); >> + * int32_t srcb = get4Byte (rf[SrcB], in_sel); >> + * output = set4Byte (output, counter, (asel ? srca : srcb)); >> + * } >> + * rf[Dest] = output; >> +*/ >> + >> +static void gen_v4int_l(struct DisasContext *dc, >> + uint8_t rdst, uint8_t rsrc, uint8_t rsrcb) >> +{ >> + TCGv vdst = dest_gr(dc, rdst); >> + TCGv tmp = tcg_temp_new_i64(); >> + >> + qemu_log_mask(CPU_LOG_TB_IN_ASM, "v4int_l r%d, r%d, r%d\n", >> + rdst, rsrc, rsrcb); >> + >> + tcg_gen_andi_i64(vdst, load_gr(dc, rsrc), 0xffffffff); >> + tcg_gen_shli_i64(vdst, vdst, 8); >> + tcg_gen_andi_i64(tmp, load_gr(dc, rsrcb), 0xffffffff); >> + tcg_gen_or_i64(vdst, vdst, tmp); > > And herein is a bug, that I'd hope using the helper functions would avoid: you > shift by 8 instead of 32. This function simplifies to > OK, thank you very much. > tcg_gen_deposit_i64(vdst, load_gr(dc, rsrc), load_gr(dc, rsrcb), > 32, 32); > OK, thanks. >> +/* >> + * uint64_t mask = 0; >> + * int64_t background = ((rf[SrcA] >> BFEnd) & 1) ? -1ULL : 0ULL; >> + * mask = ((-1ULL) ^ ((-1ULL << ((BFEnd - BFStart) & 63)) << 1)); >> + * uint64_t rot_src = (((uint64_t) rf[SrcA]) >> BFStart) >> + * | (rf[SrcA] << (64 - BFStart)); >> + * rf[Dest] = (rot_src & mask) | (background & ~mask); >> + */ >> +static void gen_bfexts(struct DisasContext *dc, uint8_t rdst, uint8_t rsrc, >> + uint8_t start, uint8_t end) >> +{ >> + uint64_t mask = (-1ULL) ^ ((-1ULL << ((end - start) & 63)) << 1); >> + TCGv vldst = tcg_temp_local_new_i64(); >> + TCGv tmp = tcg_temp_local_new_i64(); >> + TCGv pmsk = tcg_const_i64(-1ULL); > > Why the local temps? > Excuse me, I am not quite sure whether tcg_gen_movcond_i64() belongs to the branch or not. >> + >> + qemu_log_mask(CPU_LOG_TB_IN_ASM, "bfexts r%d, r%d, %d, %d\n", >> + rdst, rsrc, start, end); >> + >> + tcg_gen_rotri_i64(vldst, load_gr(dc, rsrc), start); >> + tcg_gen_andi_i64(vldst, vldst, mask); >> + >> + tcg_gen_shri_i64(tmp, load_gr(dc, rsrc), end); >> + tcg_gen_andi_i64(tmp, tmp, 1); >> + tcg_gen_movcond_i64(TCG_COND_EQ, tmp, tmp, load_zero(dc), >> + load_zero(dc), pmsk); > > This movcond is equivalent to negation. > OK, thanks. >> +/* >> + Functional Description >> + uint64_t a = rf[SrcA]; >> + uint64_t b = rf[SrcB]; >> + uint64_t d = rf[Dest]; >> + uint64_t output = 0; >> + unsigned int counter; >> + for (counter = 0; counter < (WORD_SIZE / BYTE_SIZE); counter++) >> + { >> + int sel = getByte (b, counter) & 0xf; >> + uint8_t byte = (sel < 8) ? getByte (d, sel) : getByte (a, (sel - >> 8)); >> + output = setByte (output, counter, byte); >> + } >> + rf[Dest] = output; >> + */ >> +static void gen_shufflebytes(struct DisasContext *dc, >> + uint8_t rdst, uint8_t rsrc, uint8_t rsrcb) > > I strongly suggest this be moved to op_helper.c. It's too big. > OK, thanks. >> +/* FIXME: At present, skip unalignment checking */ >> +static void gen_ld(struct DisasContext *dc, >> + uint8_t rdst, uint8_t rsrc, TCGMemOp ops) > > Alignment checks would never be done here anyway. OK, thanks. > Again, pass down the opcode string rather than rebuild. > OK, thanks. Thanks. -- Chen Gang Open, share, and attitude like air, water, and life which God blessed