On Fri, Mar 27, 2020 at 12:16 AM Richard Henderson <richard.hender...@linaro.org> wrote: [...] > diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c > index 5d75aed7b7..d7c181ddb8 100644 > --- a/target/arm/sve_helper.c > +++ b/target/arm/sve_helper.c > @@ -681,6 +681,73 @@ DO_ZPZZ_D(sve2_uhsub_zpzz_d, uint64_t, DO_HSUB_D) > #undef DO_ZPZZ > #undef DO_ZPZZ_D > > +/* > + * Three operand expander, operating on element pairs. > + * If the slot I is even, the elements from from VN {I, I+1}. > + * If the slot I is odd, the elements from from VM {I-1, I}. > + */ > +#define DO_ZPZZ_PAIR(NAME, TYPE, H, OP) \ > +void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ > +{ \ > + intptr_t i, opr_sz = simd_oprsz(desc); \ > + for (i = 0; i < opr_sz; ) { \ > + uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ > + do { \ > + if (pg & 1) { \ > + void *p = (i & 1 ? vm : vn); \ > + TYPE nn = *(TYPE *)(p + H(i & ~1)); \ > + TYPE mm = *(TYPE *)(p + H(i | 1)); \ > + *(TYPE *)(vd + H(i)) = OP(nn, mm); \ > + } \ > + i += sizeof(TYPE), pg >>= sizeof(TYPE); \ > + } while (i & 15); \ > + } \ > +}
You should not use 1 as mask but sizeof(TYPE). A temporary should be used because vd also is a source. > +/* Similarly, specialized for 64-bit operands. */ > +#define DO_ZPZZ_PAIR_D(NAME, TYPE, OP) \ > +void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ > +{ \ > + intptr_t i, opr_sz = simd_oprsz(desc) / 8; \ > + TYPE *d = vd, *n = vn, *m = vm; \ > + uint8_t *pg = vg; \ > + for (i = 0; i < opr_sz; i += 1) { \ > + if (pg[H1(i)] & 1) { \ > + TYPE *p = (i & 1 ? m : n) + (i & ~1); \ > + TYPE nn = p[0], mm = p[1]; \ > + d[i] = OP(nn, mm); \ > + } \ > + } \ > +} A temporary should be used because vd also is a source. Laurent