Hello David, Thank you very much for your review. As you might have noticed this is my first patch so I hope you don't mind about my newbie questions/explanations below.
On Thu, Oct 27, 2016 at 12:05:30PM +1100, David Gibson wrote: > On Wed, Oct 26, 2016 at 11:18:55AM -0200, Jose Ricardo Ziviani wrote: > > bcdcfn. converts from National numeric format to BCD. National format > > uses a byte to represent a digit where the most significant nibble is > > always 0x3 and the least sign. nibbles is the digit itself. > > > > Signed-off-by: Jose Ricardo Ziviani <jos...@linux.vnet.ibm.com> > > --- > > target-ppc/helper.h | 1 + > > target-ppc/int_helper.c | 54 ++++++++++++++++++++++++++ > > target-ppc/translate/vmx-impl.inc.c | 75 > > +++++++++++++++++++++++++++++++++++++ > > target-ppc/translate/vmx-ops.inc.c | 4 +- > > 4 files changed, 132 insertions(+), 2 deletions(-) > > > > diff --git a/target-ppc/helper.h b/target-ppc/helper.h > > index 04c6421..d30ec60 100644 > > --- a/target-ppc/helper.h > > +++ b/target-ppc/helper.h > > @@ -369,6 +369,7 @@ DEF_HELPER_4(vpermxor, void, avr, avr, avr, avr) > > > > DEF_HELPER_4(bcdadd, i32, avr, avr, avr, i32) > > DEF_HELPER_4(bcdsub, i32, avr, avr, avr, i32) > > +DEF_HELPER_3(bcdcfn, i32, avr, avr, i32) > > > > DEF_HELPER_2(xsadddp, void, env, i32) > > DEF_HELPER_2(xssubdp, void, env, i32) > > diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c > > index 5aee0a8..494c74e 100644 > > --- a/target-ppc/int_helper.c > > +++ b/target-ppc/int_helper.c > > @@ -2417,6 +2417,8 @@ void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, > > ppc_avr_t *b, ppc_avr_t *c) > > #define BCD_NEG_PREF 0xD > > #define BCD_NEG_ALT 0xB > > #define BCD_PLUS_ALT_2 0xE > > +#define NATIONAL_PLUS 0x2B > > +#define NATIONAL_NEG 0x2D > > > > #if defined(HOST_WORDS_BIGENDIAN) > > #define BCD_DIG_BYTE(n) (15 - (n/2)) > > @@ -2483,6 +2485,15 @@ static void bcd_put_digit(ppc_avr_t *bcd, uint8_t > > digit, int n) > > } > > } > > > > +static uint8_t get_national_digit(ppc_avr_t *reg, int n) > > +{ > > +#if defined(HOST_WORDS_BIGENDIAN) > > + return reg->u16[8 - n] & 0xFF; > > +#else > > + return reg->u16[n] & 0xFF; > > +#endif > > You're discarding the high byte of each digit here, which means you > won't detect badly encoded values that have correct low bytes. OK! The high byte is expected to be 0, I'll check if it's != 0 and set a flag like: *invalid = (reg->u16[n] >> 8) != 0; makes sense? > > > +} > > + > > static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) > > { > > int i; > > @@ -2613,6 +2624,49 @@ uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, > > ppc_avr_t *b, uint32_t ps) > > return helper_bcdadd(r, a, &bcopy, ps); > > } > > > > +uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) > > +{ > > + int i; > > + int is_zero = 0; > > + int cr = 0; > > + int national = 0; > > + ppc_avr_t ret = { .u64 = { 0, 0 } }; > > + uint16_t sgnb = get_national_digit(b, 0); > > + int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); > > + > > + for (i = 1; i < 8; i++) { > > + national = get_national_digit(b, i); > > + > > + is_zero += (national == 0x30); > > + if (unlikely(national < 0x30 || national > 0x39)) { > > + invalid = 1; > > + } > > + > > + bcd_put_digit(&ret, national & 0xf, i); > > + } > > + > > + if (sgnb == NATIONAL_PLUS || > > + (b->u64[0] == 0 && b->u64[1] == 0)) { > > The second part of this test doesn't seem to make much sense. I think > you're trying to always put a positive sign on a zero result. But > you're testing the national encoded input, not the BCD encoded output, > and zero will *not* be all zero bits in national encoding. OK! ISA defines invalid encoding as undefined behavior. I forced a positive sign to vrt when vrb = 0 (invalid enc.) but it is not necessary. I'll remove it in v2. > > > + bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, > > 0); > > + } else { > > + bcd_put_digit(&ret, BCD_NEG_PREF, 0); > > + } > > + > > + if (!is_zero) { > > From the logic above, 'is_zero' is currently a count of how many 0 > digits the value has, not whether the value as a whole is zero. That > means you will get the wrong result here. > OK! I'll fix that logic. > > + cr = (sgnb == NATIONAL_PLUS) ? 1 << CRF_GT : 1 << CRF_LT; > > + } else { > > + cr = 1 << CRF_EQ; > > + } > > + > > + if (unlikely(invalid)) { > > + cr = 1 << CRF_SO; > > + } > > + > > + *r = ret; > > + > > + return cr; > > +} > > + > > void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) > > { > > int i; > > diff --git a/target-ppc/translate/vmx-impl.inc.c > > b/target-ppc/translate/vmx-impl.inc.c > > index c8998f3..2abdcac 100644 > > --- a/target-ppc/translate/vmx-impl.inc.c > > +++ b/target-ppc/translate/vmx-impl.inc.c > > @@ -871,8 +871,81 @@ static void gen_##op(DisasContext *ctx) \ > > tcg_temp_free_i32(ps); \ > > } > > > > +#define GEN_BCD2(op) \ > > +static void gen_##op(DisasContext *ctx) \ > > +{ \ > > + TCGv_ptr rd, rb; \ > > + TCGv_i32 ps; \ > > + \ > > + if (unlikely(!ctx->altivec_enabled)) { \ > > + gen_exception(ctx, POWERPC_EXCP_VPU); \ > > + return; \ > > + } \ > > + \ > > + rb = gen_avr_ptr(rB(ctx->opcode)); \ > > + rd = gen_avr_ptr(rD(ctx->opcode)); \ > > + \ > > + ps = tcg_const_i32((ctx->opcode & 0x200) != 0); \ > > + \ > > + gen_helper_##op(cpu_crf[6], rd, rb, ps); \ > > + \ > > + tcg_temp_free_ptr(rb); \ > > + tcg_temp_free_ptr(rd); \ > > + tcg_temp_free_i32(ps); \ > > +} > > + > > GEN_BCD(bcdadd) > > GEN_BCD(bcdsub) > > +GEN_BCD2(bcdcfn) > > + > > +static void gen_xpnd04_1(DisasContext *ctx) > > +{ > > + switch (opc4(ctx->opcode)) { > > + case 0: > > + break; /* bcdctsq. */ > > + case 2: > > + break; /* bcdcfsq. */ > > + case 4: > > + break; /* bcdctz. */ > > + case 5: > > + break; /* bcdctn. */ > > + case 6: > > + break; /* bcdcfz. */ > > + case 7: > > + gen_bcdcfn(ctx); > > + break; > > + case 31: > > + break; /* bcdsetsgn. */ > > + default: > > + break; > > + } > > +} > > + > > +static void gen_xpnd04_2(DisasContext *ctx) > > +{ > > + switch (opc4(ctx->opcode)) { > > + case 0: > > + break; /* bcdctsq. */ > > + case 2: > > + break; /* bcdcfsq. */ > > + case 4: > > + break; /* bcdctz. */ > > + case 6: > > + break; /* bcdcfz. */ > > + case 7: > > + gen_bcdcfn(ctx); > > + break; > > + case 31: > > + break; /* bcdsetsgn. */ > > + default: > > + break; > > + } > > +} > > I thougt the main opcode table now had support for opc4, so you > shouldn't need these special expanders. This is the hardest one. :) For all the functions above we have opcode collisions today. Let's take the bcdcfn case, its opcodes are: 000100 vrt:5 00111 vrb:5 11110 00000 1, when ps=1 000100 vrt:5 00111 vrb:5 10110 00000 1, when ps=0 Registering the opcode in the opcode table at translate_init.c, the following sequence is assumed: 0x4 -> 0x0 -> 0x1e (ps=1) or 0x16 (ps=0) -> 0x7 However, 0x4, 0x0, 0x1e already maps to a direct opcode: vsubsws 000100 vrt:5 vra:5 vrb:5 11110 00000 0 as well as 0x4, 0x0, 0x16, that maps to: vsubcuw 000100 vrt:5 vra:5 vrb:5 10110 00000 0 I this case, gen_xpnd04_1 handles ps=0 collisions and gen_xpnd04_2 does the same when ps=1. So Nikunj gave me this great idea to handle these exceptional cases. > > > +GEN_VXFORM_DUAL(vsubcuw, PPC_ALTIVEC, PPC_NONE, \ > > + xpnd04_1, PPC_NONE, PPC2_ISA300) > > +GEN_VXFORM_DUAL(vsubsws, PPC_ALTIVEC, PPC_NONE, \ > > + xpnd04_2, PPC_NONE, PPC2_ISA300) > > > > GEN_VXFORM_DUAL(vsububm, PPC_ALTIVEC, PPC_NONE, \ > > bcdadd, PPC_NONE, PPC2_ALTIVEC_207) > > @@ -949,3 +1022,5 @@ GEN_VXFORM_DUAL(vsldoi, PPC_ALTIVEC, PPC_NONE, > > #undef GEN_VXFORM_NOA > > #undef GEN_VXFORM_UIMM > > #undef GEN_VAFORM_PAIRED > > + > > +#undef GEN_BCD2 > > diff --git a/target-ppc/translate/vmx-ops.inc.c > > b/target-ppc/translate/vmx-ops.inc.c > > index 68cba3e..7a5fec6 100644 > > --- a/target-ppc/translate/vmx-ops.inc.c > > +++ b/target-ppc/translate/vmx-ops.inc.c > > @@ -122,7 +122,7 @@ GEN_VXFORM_300(vslv, 2, 29), > > GEN_VXFORM(vslo, 6, 16), > > GEN_VXFORM(vsro, 6, 17), > > GEN_VXFORM(vaddcuw, 0, 6), > > -GEN_VXFORM(vsubcuw, 0, 22), > > +GEN_VXFORM_DUAL(vsubcuw, xpnd04_1, 0, 22, PPC_ALTIVEC, PPC_NONE), > > GEN_VXFORM(vaddubs, 0, 8), > > GEN_VXFORM(vadduhs, 0, 9), > > GEN_VXFORM(vadduws, 0, 10), > > @@ -134,7 +134,7 @@ GEN_VXFORM_DUAL(vsubuhs, bcdsub, 0, 25, PPC_ALTIVEC, > > PPC_NONE), > > GEN_VXFORM(vsubuws, 0, 26), > > GEN_VXFORM(vsubsbs, 0, 28), > > GEN_VXFORM(vsubshs, 0, 29), > > -GEN_VXFORM(vsubsws, 0, 30), > > +GEN_VXFORM_DUAL(vsubsws, xpnd04_2, 0, 30, PPC_ALTIVEC, PPC_NONE), > > GEN_VXFORM_207(vadduqm, 0, 4), > > GEN_VXFORM_207(vaddcuq, 0, 5), > > GEN_VXFORM_DUAL(vaddeuqm, vaddecuq, 30, 0xFF, PPC_NONE, PPC2_ALTIVEC_207), > > -- > David Gibson | I'll have my music baroque, and my code > david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ > _other_ > | _way_ _around_! > http://www.ozlabs.org/~dgibson