This patch includes: - VREPLVE[I].{B/H/W/D}; - VBSLL.V, VBSRL.V; - VPACK{EV/OD}.{B/H/W/D}; - VPICK{EV/OD}.{B/H/W/D}.
Reviewed-by: Richard Henderson <richard.hender...@linaro.org> Signed-off-by: Song Gao <gaos...@loongson.cn> --- target/loongarch/disas.c | 35 +++++ target/loongarch/helper.h | 18 +++ target/loongarch/insn_trans/trans_lsx.c.inc | 144 ++++++++++++++++++++ target/loongarch/insns.decode | 34 +++++ target/loongarch/lsx_helper.c | 88 ++++++++++++ 5 files changed, 319 insertions(+) diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c index 7255a2aa4f..c6cf782725 100644 --- a/target/loongarch/disas.c +++ b/target/loongarch/disas.c @@ -833,6 +833,11 @@ static void output_vr(DisasContext *ctx, arg_vr *a, const char *mnemonic) output(ctx, mnemonic, "v%d, r%d", a->vd, a->rj); } +static void output_vvr(DisasContext *ctx, arg_vvr *a, const char *mnemonic) +{ + output(ctx, mnemonic, "v%d, v%d, r%d", a->vd, a->vj, a->rk); +} + INSN_LSX(vadd_b, vvv) INSN_LSX(vadd_h, vvv) INSN_LSX(vadd_w, vvv) @@ -1594,3 +1599,33 @@ INSN_LSX(vreplgr2vr_b, vr) INSN_LSX(vreplgr2vr_h, vr) INSN_LSX(vreplgr2vr_w, vr) INSN_LSX(vreplgr2vr_d, vr) + +INSN_LSX(vreplve_b, vvr) +INSN_LSX(vreplve_h, vvr) +INSN_LSX(vreplve_w, vvr) +INSN_LSX(vreplve_d, vvr) +INSN_LSX(vreplvei_b, vv_i) +INSN_LSX(vreplvei_h, vv_i) +INSN_LSX(vreplvei_w, vv_i) +INSN_LSX(vreplvei_d, vv_i) + +INSN_LSX(vbsll_v, vv_i) +INSN_LSX(vbsrl_v, vv_i) + +INSN_LSX(vpackev_b, vvv) +INSN_LSX(vpackev_h, vvv) +INSN_LSX(vpackev_w, vvv) +INSN_LSX(vpackev_d, vvv) +INSN_LSX(vpackod_b, vvv) +INSN_LSX(vpackod_h, vvv) +INSN_LSX(vpackod_w, vvv) +INSN_LSX(vpackod_d, vvv) + +INSN_LSX(vpickev_b, vvv) +INSN_LSX(vpickev_h, vvv) +INSN_LSX(vpickev_w, vvv) +INSN_LSX(vpickev_d, vvv) +INSN_LSX(vpickod_b, vvv) +INSN_LSX(vpickod_h, vvv) +INSN_LSX(vpickod_w, vvv) +INSN_LSX(vpickod_d, vvv) diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h index 8eb2738cd0..51ad694be2 100644 --- a/target/loongarch/helper.h +++ b/target/loongarch/helper.h @@ -653,3 +653,21 @@ DEF_HELPER_3(vsetallnez_b, void, env, i32, i32) DEF_HELPER_3(vsetallnez_h, void, env, i32, i32) DEF_HELPER_3(vsetallnez_w, void, env, i32, i32) DEF_HELPER_3(vsetallnez_d, void, env, i32, i32) + +DEF_HELPER_4(vpackev_b, void, env, i32, i32, i32) +DEF_HELPER_4(vpackev_h, void, env, i32, i32, i32) +DEF_HELPER_4(vpackev_w, void, env, i32, i32, i32) +DEF_HELPER_4(vpackev_d, void, env, i32, i32, i32) +DEF_HELPER_4(vpackod_b, void, env, i32, i32, i32) +DEF_HELPER_4(vpackod_h, void, env, i32, i32, i32) +DEF_HELPER_4(vpackod_w, void, env, i32, i32, i32) +DEF_HELPER_4(vpackod_d, void, env, i32, i32, i32) + +DEF_HELPER_4(vpickev_b, void, env, i32, i32, i32) +DEF_HELPER_4(vpickev_h, void, env, i32, i32, i32) +DEF_HELPER_4(vpickev_w, void, env, i32, i32, i32) +DEF_HELPER_4(vpickev_d, void, env, i32, i32, i32) +DEF_HELPER_4(vpickod_b, void, env, i32, i32, i32) +DEF_HELPER_4(vpickod_h, void, env, i32, i32, i32) +DEF_HELPER_4(vpickod_w, void, env, i32, i32, i32) +DEF_HELPER_4(vpickod_d, void, env, i32, i32, i32) diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc index e722b79bea..1146ace1b7 100644 --- a/target/loongarch/insn_trans/trans_lsx.c.inc +++ b/target/loongarch/insn_trans/trans_lsx.c.inc @@ -3933,3 +3933,147 @@ TRANS(vreplgr2vr_b, gvec_dup, MO_8) TRANS(vreplgr2vr_h, gvec_dup, MO_16) TRANS(vreplgr2vr_w, gvec_dup, MO_32) TRANS(vreplgr2vr_d, gvec_dup, MO_64) + +static bool trans_vreplvei_b(DisasContext *ctx, arg_vv_i *a) +{ + CHECK_SXE; + tcg_gen_gvec_dup_mem(MO_8,vec_full_offset(a->vd), + offsetof(CPULoongArchState, + fpr[a->vj].vreg.B((a->imm))), + 16, ctx->vl/8); + return true; +} + +static bool trans_vreplvei_h(DisasContext *ctx, arg_vv_i *a) +{ + CHECK_SXE; + tcg_gen_gvec_dup_mem(MO_16, vec_full_offset(a->vd), + offsetof(CPULoongArchState, + fpr[a->vj].vreg.H((a->imm))), + 16, ctx->vl/8); + return true; +} +static bool trans_vreplvei_w(DisasContext *ctx, arg_vv_i *a) +{ + CHECK_SXE; + tcg_gen_gvec_dup_mem(MO_32, vec_full_offset(a->vd), + offsetof(CPULoongArchState, + fpr[a->vj].vreg.W((a->imm))), + 16, ctx->vl/8); + return true; +} +static bool trans_vreplvei_d(DisasContext *ctx, arg_vv_i *a) +{ + CHECK_SXE; + tcg_gen_gvec_dup_mem(MO_64, vec_full_offset(a->vd), + offsetof(CPULoongArchState, + fpr[a->vj].vreg.D((a->imm))), + 16, ctx->vl/8); + return true; +} + +static bool gen_vreplve(DisasContext *ctx, arg_vvr *a, int vece, int bit, + void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long)) +{ + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_ptr t1 = tcg_temp_new_ptr(); + TCGv_i64 t2 = tcg_temp_new_i64(); + + CHECK_SXE; + + tcg_gen_andi_i64(t0, gpr_src(ctx, a->rk, EXT_NONE), (LSX_LEN/bit) -1); + tcg_gen_shli_i64(t0, t0, vece); + if (HOST_BIG_ENDIAN) { + tcg_gen_xori_i64(t0, t0, vece << ((LSX_LEN/bit) -1)); + } + + tcg_gen_trunc_i64_ptr(t1, t0); + tcg_gen_add_ptr(t1, t1, cpu_env); + func(t2, t1, vec_full_offset(a->vj)); + tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd), 16, ctx->vl/8, t2); + + return true; +} + +TRANS(vreplve_b, gen_vreplve, MO_8, 8, tcg_gen_ld8u_i64) +TRANS(vreplve_h, gen_vreplve, MO_16, 16, tcg_gen_ld16u_i64) +TRANS(vreplve_w, gen_vreplve, MO_32, 32, tcg_gen_ld32u_i64) +TRANS(vreplve_d, gen_vreplve, MO_64, 64, tcg_gen_ld_i64) + +static bool trans_vbsll_v(DisasContext *ctx, arg_vv_i *a) +{ + int ofs; + TCGv_i64 desthigh, destlow, high, low; + + CHECK_SXE; + + desthigh = tcg_temp_new_i64(); + destlow = tcg_temp_new_i64(); + high = tcg_temp_new_i64(); + low = tcg_temp_new_i64(); + + get_vreg64(low, a->vj, 0); + + ofs = ((a->imm) & 0xf) * 8; + if (ofs < 64) { + get_vreg64(high, a->vj, 1); + tcg_gen_extract2_i64(desthigh, low, high, 64 - ofs); + tcg_gen_shli_i64(destlow, low, ofs); + } else { + tcg_gen_shli_i64(desthigh, low, ofs - 64); + destlow = tcg_constant_i64(0); + } + + set_vreg64(desthigh, a->vd, 1); + set_vreg64(destlow, a->vd, 0); + + return true; +} + +static bool trans_vbsrl_v(DisasContext *ctx, arg_vv_i *a) +{ + TCGv_i64 desthigh, destlow, high, low; + int ofs; + + CHECK_SXE; + + desthigh = tcg_temp_new_i64(); + destlow = tcg_temp_new_i64(); + high = tcg_temp_new_i64(); + low = tcg_temp_new_i64(); + + get_vreg64(high, a->vj, 1); + + ofs = ((a->imm) & 0xf) * 8; + if (ofs < 64) { + get_vreg64(low, a->vj, 0); + tcg_gen_extract2_i64(destlow, low, high, ofs); + tcg_gen_shri_i64(desthigh, high, ofs); + } else { + tcg_gen_shri_i64(destlow, high, ofs - 64); + desthigh = tcg_constant_i64(0); + } + + set_vreg64(desthigh, a->vd, 1); + set_vreg64(destlow, a->vd, 0); + + return true; +} + +TRANS(vpackev_b, gen_vvv, gen_helper_vpackev_b) +TRANS(vpackev_h, gen_vvv, gen_helper_vpackev_h) +TRANS(vpackev_w, gen_vvv, gen_helper_vpackev_w) +TRANS(vpackev_d, gen_vvv, gen_helper_vpackev_d) +TRANS(vpackod_b, gen_vvv, gen_helper_vpackod_b) +TRANS(vpackod_h, gen_vvv, gen_helper_vpackod_h) +TRANS(vpackod_w, gen_vvv, gen_helper_vpackod_w) +TRANS(vpackod_d, gen_vvv, gen_helper_vpackod_d) + +TRANS(vpickev_b, gen_vvv, gen_helper_vpickev_b) +TRANS(vpickev_h, gen_vvv, gen_helper_vpickev_h) +TRANS(vpickev_w, gen_vvv, gen_helper_vpickev_w) +TRANS(vpickev_d, gen_vvv, gen_helper_vpickev_d) +TRANS(vpickod_b, gen_vvv, gen_helper_vpickod_b) +TRANS(vpickod_h, gen_vvv, gen_helper_vpickod_h) +TRANS(vpickod_w, gen_vvv, gen_helper_vpickod_w) +TRANS(vpickod_d, gen_vvv, gen_helper_vpickod_d) diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode index d1d255ab82..ab9e9e422f 100644 --- a/target/loongarch/insns.decode +++ b/target/loongarch/insns.decode @@ -499,6 +499,7 @@ dbcl 0000 00000010 10101 ............... @i15 &vr_i vd rj imm &rv_i rd vj imm &vr vd rj +&vvr vd vj rk # # LSX Formats @@ -506,6 +507,8 @@ dbcl 0000 00000010 10101 ............... @i15 @vv .... ........ ..... ..... vj:5 vd:5 &vv @cv .... ........ ..... ..... vj:5 .. cd:3 &cv @vvv .... ........ ..... vk:5 vj:5 vd:5 &vvv +@vv_ui1 .... ........ ..... .... imm:1 vj:5 vd:5 &vv_i +@vv_ui2 .... ........ ..... ... imm:2 vj:5 vd:5 &vv_i @vv_ui3 .... ........ ..... .. imm:3 vj:5 vd:5 &vv_i @vv_ui4 .... ........ ..... . imm:4 vj:5 vd:5 &vv_i @vv_ui5 .... ........ ..... imm:5 vj:5 vd:5 &vv_i @@ -524,6 +527,7 @@ dbcl 0000 00000010 10101 ............... @i15 @rv_ui2 .... ........ ..... ... imm:2 vj:5 rd:5 &rv_i @rv_ui1 .... ........ ..... .... imm:1 vj:5 rd:5 &rv_i @vr .... ........ ..... ..... rj:5 vd:5 &vr +@vvr .... ........ ..... rk:5 vj:5 vd:5 &vvr vadd_b 0111 00000000 10100 ..... ..... ..... @vvv vadd_h 0111 00000000 10101 ..... ..... ..... @vvv @@ -1197,3 +1201,33 @@ vreplgr2vr_b 0111 00101001 11110 00000 ..... ..... @vr vreplgr2vr_h 0111 00101001 11110 00001 ..... ..... @vr vreplgr2vr_w 0111 00101001 11110 00010 ..... ..... @vr vreplgr2vr_d 0111 00101001 11110 00011 ..... ..... @vr + +vreplve_b 0111 00010010 00100 ..... ..... ..... @vvr +vreplve_h 0111 00010010 00101 ..... ..... ..... @vvr +vreplve_w 0111 00010010 00110 ..... ..... ..... @vvr +vreplve_d 0111 00010010 00111 ..... ..... ..... @vvr +vreplvei_b 0111 00101111 01111 0 .... ..... ..... @vv_ui4 +vreplvei_h 0111 00101111 01111 10 ... ..... ..... @vv_ui3 +vreplvei_w 0111 00101111 01111 110 .. ..... ..... @vv_ui2 +vreplvei_d 0111 00101111 01111 1110 . ..... ..... @vv_ui1 + +vbsll_v 0111 00101000 11100 ..... ..... ..... @vv_ui5 +vbsrl_v 0111 00101000 11101 ..... ..... ..... @vv_ui5 + +vpackev_b 0111 00010001 01100 ..... ..... ..... @vvv +vpackev_h 0111 00010001 01101 ..... ..... ..... @vvv +vpackev_w 0111 00010001 01110 ..... ..... ..... @vvv +vpackev_d 0111 00010001 01111 ..... ..... ..... @vvv +vpackod_b 0111 00010001 10000 ..... ..... ..... @vvv +vpackod_h 0111 00010001 10001 ..... ..... ..... @vvv +vpackod_w 0111 00010001 10010 ..... ..... ..... @vvv +vpackod_d 0111 00010001 10011 ..... ..... ..... @vvv + +vpickev_b 0111 00010001 11100 ..... ..... ..... @vvv +vpickev_h 0111 00010001 11101 ..... ..... ..... @vvv +vpickev_w 0111 00010001 11110 ..... ..... ..... @vvv +vpickev_d 0111 00010001 11111 ..... ..... ..... @vvv +vpickod_b 0111 00010010 00000 ..... ..... ..... @vvv +vpickod_h 0111 00010010 00001 ..... ..... ..... @vvv +vpickod_w 0111 00010010 00010 ..... ..... ..... @vvv +vpickod_d 0111 00010010 00011 ..... ..... ..... @vvv diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c index 2fda1a72cb..d5e1a1231d 100644 --- a/target/loongarch/lsx_helper.c +++ b/target/loongarch/lsx_helper.c @@ -2766,3 +2766,91 @@ SETALLNEZ(vsetallnez_b, MO_8) SETALLNEZ(vsetallnez_h, MO_16) SETALLNEZ(vsetallnez_w, MO_32) SETALLNEZ(vsetallnez_d, MO_64) + +#define VPACKEV(NAME, BIT, E) \ +void HELPER(NAME)(CPULoongArchState *env, \ + uint32_t vd, uint32_t vj, uint32_t vk) \ +{ \ + int i; \ + VReg temp; \ + VReg *Vd = &(env->fpr[vd].vreg); \ + VReg *Vj = &(env->fpr[vj].vreg); \ + VReg *Vk = &(env->fpr[vk].vreg); \ + \ + for (i = 0; i < LSX_LEN/BIT; i++) { \ + temp.E(2 * i + 1) = Vj->E(2 * i); \ + temp.E(2 *i) = Vk->E(2 * i); \ + } \ + *Vd = temp; \ +} + +VPACKEV(vpackev_b, 16, B) +VPACKEV(vpackev_h, 32, H) +VPACKEV(vpackev_w, 64, W) +VPACKEV(vpackev_d, 128, D) + +#define VPACKOD(NAME, BIT, E) \ +void HELPER(NAME)(CPULoongArchState *env, \ + uint32_t vd, uint32_t vj, uint32_t vk) \ +{ \ + int i; \ + VReg temp; \ + VReg *Vd = &(env->fpr[vd].vreg); \ + VReg *Vj = &(env->fpr[vj].vreg); \ + VReg *Vk = &(env->fpr[vk].vreg); \ + \ + for (i = 0; i < LSX_LEN/BIT; i++) { \ + temp.E(2 * i + 1) = Vj->E(2 * i + 1); \ + temp.E(2 * i) = Vk->E(2 * i + 1); \ + } \ + *Vd = temp; \ +} + +VPACKOD(vpackod_b, 16, B) +VPACKOD(vpackod_h, 32, H) +VPACKOD(vpackod_w, 64, W) +VPACKOD(vpackod_d, 128, D) + +#define VPICKEV(NAME, BIT, E) \ +void HELPER(NAME)(CPULoongArchState *env, \ + uint32_t vd, uint32_t vj, uint32_t vk) \ +{ \ + int i; \ + VReg temp; \ + VReg *Vd = &(env->fpr[vd].vreg); \ + VReg *Vj = &(env->fpr[vj].vreg); \ + VReg *Vk = &(env->fpr[vk].vreg); \ + \ + for (i = 0; i < LSX_LEN/BIT; i++) { \ + temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i); \ + temp.E(i) = Vk->E(2 * i); \ + } \ + *Vd = temp; \ +} + +VPICKEV(vpickev_b, 16, B) +VPICKEV(vpickev_h, 32, H) +VPICKEV(vpickev_w, 64, W) +VPICKEV(vpickev_d, 128, D) + +#define VPICKOD(NAME, BIT, E) \ +void HELPER(NAME)(CPULoongArchState *env, \ + uint32_t vd, uint32_t vj, uint32_t vk) \ +{ \ + int i; \ + VReg temp; \ + VReg *Vd = &(env->fpr[vd].vreg); \ + VReg *Vj = &(env->fpr[vj].vreg); \ + VReg *Vk = &(env->fpr[vk].vreg); \ + \ + for (i = 0; i < LSX_LEN/BIT; i++) { \ + temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i + 1); \ + temp.E(i) = Vk->E(2 * i + 1); \ + } \ + *Vd = temp; \ +} + +VPICKOD(vpickod_b, 16, B) +VPICKOD(vpickod_h, 32, H) +VPICKOD(vpickod_w, 64, W) +VPICKOD(vpickod_d, 128, D) -- 2.31.1