On Thursday, October 17, 2019, Stefan Brankovic <stefan.branko...@rt-rk.com> wrote:
> 'trans_vupkpx' function implements both vupkhpx and vupklpx instructions > with > argument 'high' determine which instruction is processed. Instructions are > implemented in two 'for' loops. Outer 'for' loop repeats unpacking two > times, > since both doubleword elements of destination register are formed the same > way. > It also stores result of every iteration in temporary register, that is > later > transferred to destination register. Inner 'for' loop does unpacking of > pixels > and forms resulting doubleword 32 by 32 bits. > > Signed-off-by: Stefan Brankovic <stefan.branko...@rt-rk.com> > --- > target/ppc/helper.h | 2 - > target/ppc/int_helper.c | 20 -------- > target/ppc/translate/vmx-impl.inc.c | 91 ++++++++++++++++++++++++++++++ > ++++++- > 3 files changed, 89 insertions(+), 24 deletions(-) > > diff --git a/target/ppc/helper.h b/target/ppc/helper.h > index b489b38..fd06b56 100644 > --- a/target/ppc/helper.h > +++ b/target/ppc/helper.h > @@ -233,8 +233,6 @@ DEF_HELPER_2(vextsh2d, void, avr, avr) > DEF_HELPER_2(vextsw2d, void, avr, avr) > DEF_HELPER_2(vnegw, void, avr, avr) > DEF_HELPER_2(vnegd, void, avr, avr) > -DEF_HELPER_2(vupkhpx, void, avr, avr) > -DEF_HELPER_2(vupklpx, void, avr, avr) > DEF_HELPER_2(vupkhsb, void, avr, avr) > DEF_HELPER_2(vupkhsh, void, avr, avr) > DEF_HELPER_2(vupkhsw, void, avr, avr) > diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c > index f910c11..9ee667d 100644 > --- a/target/ppc/int_helper.c > +++ b/target/ppc/int_helper.c > @@ -1737,26 +1737,6 @@ void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t > *r, ppc_avr_t *a, ppc_avr_t *b) > #define UPKHI 0 > #define UPKLO 1 > #endif > -#define VUPKPX(suffix, hi) \ > - void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ > - { \ > - int i; \ > - ppc_avr_t result; \ > - \ > - for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ > - uint16_t e = b->u16[hi ? i : i + 4]; \ > - uint8_t a = (e >> 15) ? 0xff : 0; \ > - uint8_t r = (e >> 10) & 0x1f; \ > - uint8_t g = (e >> 5) & 0x1f; \ > - uint8_t b = e & 0x1f; \ > - \ > - result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ > - } \ > - *r = result; \ > - } > -VUPKPX(lpx, UPKLO) > -VUPKPX(hpx, UPKHI) > -#undef VUPKPX > > #define VUPK(suffix, unpacked, packee, hi) \ > void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ > diff --git a/target/ppc/translate/vmx-impl.inc.c > b/target/ppc/translate/vmx-impl.inc.c > index 3550ffa..09d80d6 100644 > --- a/target/ppc/translate/vmx-impl.inc.c > +++ b/target/ppc/translate/vmx-impl.inc.c > @@ -1031,6 +1031,95 @@ static void trans_vclzd(DisasContext *ctx) > tcg_temp_free_i64(avr); > } > > +/* > + * vupkhpx VRT,VRB - Vector Unpack High Pixel > + * vupklpx VRT,VRB - Vector Unpack Low Pixel > + * > + * Unpacks 4 pixels coded in 1-5-5-5 pattern from high/low doubleword > element > + * of source register into contigous array of bits in the destination > register. > + * Argument 'high' determines if high or low doubleword element of source > + * register is processed. > + */ > +static void trans_vupkpx(DisasContext *ctx, int high) > +{ > + int VT = rD(ctx->opcode); > + int VB = rB(ctx->opcode); > + TCGv_i64 tmp = tcg_temp_new_i64(); > + TCGv_i64 avr = tcg_temp_new_i64(); > + TCGv_i64 result = tcg_temp_new_i64(); > + TCGv_i64 result1 = tcg_temp_new_i64(); > + TCGv_i64 result2 = tcg_temp_new_i64(); > + int64_t mask1 = 0x1fULL; > + int64_t mask2 = 0x1fULL << 8; > + int64_t mask3 = 0x1fULL << 16; > + int64_t mask4 = 0xffULL << 56; > + int i, j; > + > + if (high == 1) { > + get_avr64(avr, VB, true); > + } else { > + get_avr64(avr, VB, false); > + } > + > + tcg_gen_movi_i64(result, 0x0ULL); > + for (i = 0; i < 2; i++) { > + for (j = 0; j < 2; j++) { > + tcg_gen_shli_i64(tmp, avr, (j * 16)); > + tcg_gen_andi_i64(tmp, tmp, mask1 << (j * 32)); > + tcg_gen_or_i64(result, result, tmp); > + > + tcg_gen_shli_i64(tmp, avr, 3 + (j * 16)); > + tcg_gen_andi_i64(tmp, tmp, mask2 << (j * 32)); > + tcg_gen_or_i64(result, result, tmp); > + > + tcg_gen_shli_i64(tmp, avr, 6 + (j * 16)); > + tcg_gen_andi_i64(tmp, tmp, mask3 << (j * 32)); > + tcg_gen_or_i64(result, result, tmp); > + > + tcg_gen_shri_i64(tmp, avr, (j * 16)); > + tcg_gen_ext16s_i64(tmp, tmp); > + tcg_gen_andi_i64(tmp, tmp, mask4); > + tcg_gen_shri_i64(tmp, tmp, (32 * (1 - j))); > + tcg_gen_or_i64(result, result, tmp); > + } > + if (i == 0) { > + tcg_gen_mov_i64(result1, result); > + tcg_gen_movi_i64(result, 0x0ULL); > + tcg_gen_shri_i64(avr, avr, 32); > + } > + if (i == 1) { > + tcg_gen_mov_i64(result2, result); > + } > + } > + > + set_avr64(VT, result1, false); > + set_avr64(VT, result2, true); > + > + tcg_temp_free_i64(tmp); > + tcg_temp_free_i64(avr); > + tcg_temp_free_i64(result); > + tcg_temp_free_i64(result1); > + tcg_temp_free_i64(result2); > +} > + > +static void gen_vupkhpx(DisasContext *ctx) > +{ > + if (unlikely(!ctx->altivec_enabled)) { > + gen_exception(ctx, POWERPC_EXCP_VPU); > + return; > + } > + trans_vupkpx(ctx, 1); > +} > + > +static void gen_vupklpx(DisasContext *ctx) > +{ > + if (unlikely(!ctx->altivec_enabled)) { > + gen_exception(ctx, POWERPC_EXCP_VPU); > + return; > + } > + trans_vupkpx(ctx, 0); > +} > + > GEN_VXFORM(vmuloub, 4, 0); > GEN_VXFORM(vmulouh, 4, 1); > GEN_VXFORM(vmulouw, 4, 2); > @@ -1348,8 +1437,6 @@ GEN_VXFORM_NOA(vupkhsw, 7, 25); > GEN_VXFORM_NOA(vupklsb, 7, 10); > GEN_VXFORM_NOA(vupklsh, 7, 11); > GEN_VXFORM_NOA(vupklsw, 7, 27); > -GEN_VXFORM_NOA(vupkhpx, 7, 13); > -GEN_VXFORM_NOA(vupklpx, 7, 15); There is inconsistency here compared to your previous patches. There should be lines: GEN_VXFORM_TRANS(vupkhpx, 7, 13); GEN_VXFORM_TRANS(vupklpx, 7, 15); and there should be two new functions trans_vupkhpx() and trans_vupklpx() drfined as thin wrappers around trans_vupkpx(). gen_vupkhpx() and gen_vupklpx() should be deleted. > GEN_VXFORM_NOA_ENV(vrefp, 5, 4); > GEN_VXFORM_NOA_ENV(vrsqrtefp, 5, 5); > GEN_VXFORM_NOA_ENV(vexptefp, 5, 6); > -- > 2.7.4 > > >