Hi Peter, just two nits, answering from the tablet so sorry if arrives with strange formatting, hope not..
On Thursday, January 23, 2014, Peter Maydell <peter.mayd...@linaro.org<javascript:_e({}, 'cvml', 'peter.mayd...@linaro.org');>> wrote: > From: Alex Bennée <alex.ben...@linaro.org> > > This implements a subset of the AdvSIMD shift operations (namely all the > none saturating or narrowing ones). The actual shift generation code > itself is common for both the scalar and vector cases but wrapped with > either vector element iteration or the fp reg access. > > The rounding operations need to take special care to correctly reflect > the result of adding rounding bits on high bits as the intermediates do > not truncate. > > Signed-off-by: Alex Bennée <alex.ben...@linaro.org> > Reviewed-by: Richard Henderson <r...@twiddle.net> > --- > target-arm/translate-a64.c | 381 > ++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 379 insertions(+), 2 deletions(-) > > diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c > index 5eabf24..9eb91fc4 100644 > --- a/target-arm/translate-a64.c > +++ b/target-arm/translate-a64.c > @@ -5531,15 +5531,220 @@ static void > disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn) > unsupported_encoding(s, insn); > } > > +/* > + * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate) > + * > + * This code is handles the common shifting code and is used by both > + * the vector and scalar code. > + */ > +static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src, > + TCGv_i64 tcg_rnd, bool accumulate, > + bool is_u, int size, int shift) > +{ > + bool extended_result = false; > + bool round = !TCGV_IS_UNUSED_I64(tcg_rnd); > + int ext_lshift = 0; > + TCGv_i64 tcg_src_hi; > + > + if (round && size == 3) { > + extended_result = true; > + ext_lshift = 64 - shift; > + tcg_src_hi = tcg_temp_new_i64(); > + } else if (shift == 64) { > + if (!accumulate && is_u) { > + /* result is zero */ > + tcg_gen_movi_i64(tcg_res, 0); > + return; > + } > + } > + > + /* Deal with the rounding step */ > + if (round) { > + if (extended_result) { > + TCGv_i64 tcg_zero = tcg_const_i64(0); > + if (!is_u) { > + /* take care of sign extending tcg_res */ > + tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63); > + tcg_gen_add2_i64(tcg_src, tcg_src_hi, > + tcg_src, tcg_src_hi, > + tcg_rnd, tcg_zero); > + } else { > + tcg_gen_add2_i64(tcg_src, tcg_src_hi, > + tcg_src, tcg_zero, > + tcg_rnd, tcg_zero); > + } > + tcg_temp_free_i64(tcg_zero); > + } else { > + tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd); > + } > + } > + > + /* Now do the shift right */ > + if (round && extended_result) { > + /* extended case, >64 bit precision required */ > + if (ext_lshift == 0) { > + /* special case, only high bits matter */ > + tcg_gen_mov_i64(tcg_src, tcg_src_hi); > + } else { > + tcg_gen_shri_i64(tcg_src, tcg_src, shift); > + tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift); > + tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi); > + } > + } else { > + if (is_u) { > + if (shift == 64) { > + /* essentially shifting in 64 zeros */ > + tcg_gen_movi_i64(tcg_src, 0); > + } else { > + tcg_gen_shri_i64(tcg_src, tcg_src, shift); > + } > + } else { > + if (shift == 64) { > + /* effectively extending the sign-bit */ > + tcg_gen_sari_i64(tcg_src, tcg_src, 63); > + } else { > + tcg_gen_sari_i64(tcg_src, tcg_src, shift); > + } > + } > + } > + > + if (accumulate) { > + tcg_gen_add_i64(tcg_res, tcg_res, tcg_src); > + } else { > + tcg_gen_mov_i64(tcg_res, tcg_src); > + } > + > + if (extended_result) { > + tcg_temp_free(tcg_src_hi); should this be tcg_temp_free_i64 ? > + } > +} > + > +/* Common SHL/SLI - Shift left with an optional insert */ > +static void handle_shli_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src, > + bool insert, int shift) > +{ > + if (insert) { /* SLI */ > + tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, shift, 64 - shift); > + } else { /* SHL */ > + tcg_gen_shli_i64(tcg_res, tcg_src, shift); > + } > +} > + > +/* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) > */ > +static void handle_scalar_simd_shri(DisasContext *s, > + bool is_u, int immh, int immb, > + int opcode, int rn, int rd) > +{ > + const int size = 3; > + int immhb = immh << 3 | immb; > + int shift = 2 * (8 << size) - immhb; > + bool accumulate = false; > + bool round = false; > + TCGv_i64 tcg_rn; > + TCGv_i64 tcg_rd; > + TCGv_i64 tcg_round; > + > + if (!extract32(immh, 3, 1)) { > + unallocated_encoding(s); > + return; > + } > + > + switch (opcode) { > + case 0x02: /* SSRA / USRA (accumulate) */ > + accumulate = true; > + break; > + case 0x04: /* SRSHR / URSHR (rounding) */ > + round = true; > + break; > + case 0x06: /* SRSRA / URSRA (accum + rounding) */ > + accumulate = round = true; > + break; > + } > + > + if (round) { > + uint64_t round_const = 1ULL << (shift - 1); > + tcg_round = tcg_const_i64(round_const); > + } else { > + TCGV_UNUSED_I64(tcg_round); > + } > + > + tcg_rn = read_fp_dreg(s, rn); > + tcg_rd = accumulate ? read_fp_dreg(s, rd) : tcg_temp_new_i64(); > + > + handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, > + accumulate, is_u, size, shift); > + > + write_fp_dreg(s, rd, tcg_rd); > + > + tcg_temp_free_i64(tcg_rn); > + tcg_temp_free_i64(tcg_rd); > + if (round) { > + tcg_temp_free_i64(tcg_round); > + } > +} > + > +/* SHL/SLI - Scalar shift left */ > +static void handle_scalar_simd_shli(DisasContext *s, bool insert, > + int immh, int immb, int opcode, > + int rn, int rd) > +{ > + int size = 32 - clz32(immh) - 1; > + int immhb = immh << 3 | immb; > + int shift = immhb - (8 << size); > + TCGv_i64 tcg_rn = new_tmp_a64(s); > + TCGv_i64 tcg_rd = new_tmp_a64(s); > + > + if (!extract32(immh, 3, 1)) { > + unallocated_encoding(s); > + return; > + } > + > + tcg_rn = read_fp_dreg(s, rn); > + tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64(); > + > + handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift); > + > + write_fp_dreg(s, rd, tcg_rd); > + > + tcg_temp_free_i64(tcg_rn); > + tcg_temp_free_i64(tcg_rd); > + > + return; no harm but maybe remove return? > +} > + > /* C3.6.9 AdvSIMD scalar shift by immediate > * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0 > * +-----+---+-------------+------+------+--------+---+------+------+ > * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd | > * +-----+---+-------------+------+------+--------+---+------+------+ > + * > + * This is the scalar version so it works on a fixed sized registers > */ > static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn) > { > - unsupported_encoding(s, insn); > + int rd = extract32(insn, 0, 5); > + int rn = extract32(insn, 5, 5); > + int opcode = extract32(insn, 11, 5); > + int immb = extract32(insn, 16, 3); > + int immh = extract32(insn, 19, 4); > + bool is_u = extract32(insn, 29, 1); > + > + switch (opcode) { > + case 0x00: /* SSHR / USHR */ > + case 0x02: /* SSRA / USRA */ > + case 0x04: /* SRSHR / URSHR */ > + case 0x06: /* SRSRA / URSRA */ > + handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd); > + break; > + case 0x0a: /* SHL / SLI */ > + handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd); > + break; > + default: > + unsupported_encoding(s, insn); > + break; > + } > + > + return; also here > } > > /* C3.6.10 AdvSIMD scalar three different > @@ -5845,6 +6050,150 @@ static void disas_simd_scalar_indexed(DisasContext > *s, uint32_t insn) > unsupported_encoding(s, insn); > } > > +/* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) > */ > +static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u, > + int immh, int immb, int opcode, int rn, > int rd) > +{ > + int size = 32 - clz32(immh) - 1; > + int immhb = immh << 3 | immb; > + int shift = 2 * (8 << size) - immhb; > + bool accumulate = false; > + bool round = false; > + int dsize = is_q ? 128 : 64; > + int esize = 8 << size; > + int elements = dsize/esize; > + TCGMemOp memop = size | (is_u ? 0 : MO_SIGN); > + TCGv_i64 tcg_rn = new_tmp_a64(s); > + TCGv_i64 tcg_rd = new_tmp_a64(s); > + TCGv_i64 tcg_round; > + int i; > + > + if (extract32(immh, 3, 1) && !is_q) { > + unallocated_encoding(s); > + return; > + } > + > + if (size > 3 && !is_q) { > + unallocated_encoding(s); > + return; > + } > + > + switch (opcode) { > + case 0x02: /* SSRA / USRA (accumulate) */ > + accumulate = true; > + break; > + case 0x04: /* SRSHR / URSHR (rounding) */ > + round = true; > + break; > + case 0x06: /* SRSRA / URSRA (accum + rounding) */ > + accumulate = round = true; > + break; > + } > + > + if (round) { > + uint64_t round_const = 1ULL << (shift - 1); > + tcg_round = tcg_const_i64(round_const); > + } else { > + TCGV_UNUSED_I64(tcg_round); > + } > + > + for (i = 0; i < elements; i++) { > + read_vec_element(s, tcg_rn, rn, i, memop); > + if (accumulate) { > + read_vec_element(s, tcg_rd, rd, i, memop); > + } > + > + handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, > + accumulate, is_u, size, shift); > + > + write_vec_element(s, tcg_rd, rd, i, size); > + } > + > + if (!is_q) { > + clear_vec_high(s, rd); > + } > + > + if (round) { > + tcg_temp_free_i64(tcg_round); > + } > +} > + > +/* SHL/SLI - Vector shift left */ > +static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert, > + int immh, int immb, int opcode, int rn, > int rd) > +{ > + int size = 32 - clz32(immh) - 1; > + int immhb = immh << 3 | immb; > + int shift = immhb - (8 << size); > + int dsize = is_q ? 128 : 64; > + int esize = 8 << size; > + int elements = dsize/esize; > + TCGv_i64 tcg_rn = new_tmp_a64(s); > + TCGv_i64 tcg_rd = new_tmp_a64(s); > + int i; > + > + if (extract32(immh, 3, 1) && !is_q) { > + unallocated_encoding(s); > + return; > + } > + > + if (size > 3 && !is_q) { > + unallocated_encoding(s); > + return; > + } > + > + for (i = 0; i < elements; i++) { > + read_vec_element(s, tcg_rn, rn, i, size); > + if (insert) { > + read_vec_element(s, tcg_rd, rd, i, size); > + } > + > + handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift); > + > + write_vec_element(s, tcg_rd, rd, i, size); > + } > + > + if (!is_q) { > + clear_vec_high(s, rd); > + } > + > + return; also here. Ciao Claudio > +} > + > +/* USHLL/SHLL - Vector shift left with widening */ > +static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u, > + int immh, int immb, int opcode, int rn, > int rd) > +{ > + int size = 32 - clz32(immh) - 1; > + int immhb = immh << 3 | immb; > + int shift = immhb - (8 << size); > + int dsize = 64; > + int esize = 8 << size; > + int elements = dsize/esize; > + TCGv_i64 tcg_rn = new_tmp_a64(s); > + TCGv_i64 tcg_rd = new_tmp_a64(s); > + int i; > + > + if (size >= 3) { > + unallocated_encoding(s); > + return; > + } > + > + /* For the LL variants the store is larger than the load, > + * so if rd == rn we would overwrite parts of our input. > + * So load everything right now and use shifts in the main loop. > + */ > + read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64); > + > + for (i = 0; i < elements; i++) { > + tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize); > + ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0); > + tcg_gen_shli_i64(tcg_rd, tcg_rd, shift); > + write_vec_element(s, tcg_rd, rd, i, size + 1); > + } > +} > + > + > /* C3.6.14 -- > 1.8.5 > >