This saves a round trip through an integer register and back to memory. Signed-off-by: Richard Henderson <richard.hender...@linaro.org> --- tcg/ppc/tcg-target.h | 2 +- tcg/ppc/tcg-target.inc.c | 57 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 57 insertions(+), 2 deletions(-)
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index 5143ee853a..8ba5668fae 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -152,7 +152,7 @@ extern bool have_isa_3_00; #define TCG_TARGET_HAS_mul_vec 1 #define TCG_TARGET_HAS_sat_vec 1 #define TCG_TARGET_HAS_minmax_vec 1 -#define TCG_TARGET_HAS_dupm_vec 0 +#define TCG_TARGET_HAS_dupm_vec 1 void flush_icache_range(uintptr_t start, uintptr_t stop); void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t); diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c index 61a245b828..85e332fcd3 100644 --- a/tcg/ppc/tcg-target.inc.c +++ b/tcg/ppc/tcg-target.inc.c @@ -467,6 +467,8 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, #define NOP ORI /* ori 0,0,0 */ #define LVX XO31(103) +#define LVEBX XO31(7) +#define LVEHX XO31(39) #define LVEWX XO31(71) #define STVX XO31(231) @@ -2835,6 +2837,7 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) case INDEX_op_xor_vec: case INDEX_op_andc_vec: case INDEX_op_not_vec: + case INDEX_op_dupm_vec: return 1; case INDEX_op_add_vec: case INDEX_op_sub_vec: @@ -2854,6 +2857,55 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) } } +static void tcg_out_dupm_vec(TCGContext *s, unsigned vece, TCGReg out, + TCGReg base, intptr_t offset) +{ + int elt; + + out &= 31; + switch (vece) { + case MO_8: + tcg_out_mem_long(s, 0, LVEBX, out, base, offset); + elt = extract32(offset, 0, 4); +#ifndef HOST_WORDS_BIGENDIAN + elt ^= 15; +#endif + tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16)); + break; + case MO_16: + assert((offset & 1) == 0); + tcg_out_mem_long(s, 0, LVEHX, out, base, offset); + elt = extract32(offset, 1, 3); +#ifndef HOST_WORDS_BIGENDIAN + elt ^= 7; +#endif + tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16)); + break; + case MO_32: + assert((offset & 3) == 0); + tcg_out_mem_long(s, 0, LVEWX, out, base, offset); + elt = extract32(offset, 2, 2); +#ifndef HOST_WORDS_BIGENDIAN + elt ^= 3; +#endif + tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16)); + break; + case MO_64: + assert((offset & 7) == 0); + tcg_out_mem_long(s, 0, LVX, out, base, offset); + /* FIXME: 32-bit altivec */ + tcg_out_dupi_vec(s, TCG_TYPE_V128, TCG_VEC_TMP1, + offset & 8 + ? 0x08090a0b0c0d0e0full + : 0x0001020304050607ull); + tcg_out32(s, VPERM | VRT(out) | VRA(out) | VRB(out) + | VRC(TCG_VEC_TMP1)); + break; + default: + g_assert_not_reached(); + } +} + static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, unsigned vece, const TCGArg *args, const int *const_args) @@ -2884,7 +2936,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, case INDEX_op_st_vec: tcg_out_st(s, type, a0, a1, a2); return; - + case INDEX_op_dupm_vec: + tcg_out_dupm_vec(s, vece, a0, a1, a2); + return; case INDEX_op_add_vec: insn = add_op[vece]; break; @@ -3251,6 +3305,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) return &v_v; case INDEX_op_ld_vec: case INDEX_op_st_vec: + case INDEX_op_dupm_vec: return &v_r; default: -- 2.17.2