Le mar. 1 sept. 2020 00:31, Richard Henderson <richard.hender...@linaro.org> a écrit :
> With larger vector sizes, it turns out oprsz == maxsz, and we only > need to represent mismatch for oprsz <= 32. We do, however, need > to represent larger oprsz and do so without reducing SIMD_DATA_BITS. > > Reduce the size of the oprsz field and increase the maxsz field. > Steal the oprsz value of 24 to indicate equality with maxsz. > > Signed-off-by: Richard Henderson <richard.hender...@linaro.org> > --- > include/tcg/tcg-gvec-desc.h | 38 ++++++++++++++++++++++++------------- > tcg/tcg-op-gvec.c | 35 ++++++++++++++++++++++++++-------- > 2 files changed, 52 insertions(+), 21 deletions(-) > > diff --git a/include/tcg/tcg-gvec-desc.h b/include/tcg/tcg-gvec-desc.h > index 0224ac3e78..704bd86454 100644 > --- a/include/tcg/tcg-gvec-desc.h > +++ b/include/tcg/tcg-gvec-desc.h > @@ -20,29 +20,41 @@ > #ifndef TCG_TCG_GVEC_DESC_H > #define TCG_TCG_GVEC_DESC_H > > -/* ??? These bit widths are set for ARM SVE, maxing out at 256 byte > vectors. */ > -#define SIMD_OPRSZ_SHIFT 0 > -#define SIMD_OPRSZ_BITS 5 > +/* > + * This configuration allows MAXSZ to represent 2048 bytes, and > + * OPRSZ to match MAXSZ, or represent the smaller values 8, 16, or 32. > + * > + * Encode this with: > + * 0, 1, 3 -> 8, 16, 32 > + * 2 -> maxsz > + * > + * This steals the input that would otherwise map to 24 to match maxsz. > Nice trick :) + */ > +#define SIMD_MAXSZ_SHIFT 0 > +#define SIMD_MAXSZ_BITS 8 > > -#define SIMD_MAXSZ_SHIFT (SIMD_OPRSZ_SHIFT + SIMD_OPRSZ_BITS) > -#define SIMD_MAXSZ_BITS 5 > +#define SIMD_OPRSZ_SHIFT (SIMD_MAXSZ_SHIFT + SIMD_MAXSZ_BITS) > +#define SIMD_OPRSZ_BITS 2 > > -#define SIMD_DATA_SHIFT (SIMD_MAXSZ_SHIFT + SIMD_MAXSZ_BITS) > +#define SIMD_DATA_SHIFT (SIMD_OPRSZ_SHIFT + SIMD_OPRSZ_BITS) > #define SIMD_DATA_BITS (32 - SIMD_DATA_SHIFT) > > /* Create a descriptor from components. */ > uint32_t simd_desc(uint32_t oprsz, uint32_t maxsz, int32_t data); > > -/* Extract the operation size from a descriptor. */ > -static inline intptr_t simd_oprsz(uint32_t desc) > -{ > - return (extract32(desc, SIMD_OPRSZ_SHIFT, SIMD_OPRSZ_BITS) + 1) * 8; > -} > - > /* Extract the max vector size from a descriptor. */ > static inline intptr_t simd_maxsz(uint32_t desc) > { > - return (extract32(desc, SIMD_MAXSZ_SHIFT, SIMD_MAXSZ_BITS) + 1) * 8; > + return extract32(desc, SIMD_MAXSZ_SHIFT, SIMD_MAXSZ_BITS) * 8 + 8; > +} > + > +/* Extract the operation size from a descriptor. */ > +static inline intptr_t simd_oprsz(uint32_t desc) > +{ > + uint32_t f = extract32(desc, SIMD_OPRSZ_SHIFT, SIMD_OPRSZ_BITS); > + intptr_t o = f * 8 + 8; > + intptr_t m = simd_maxsz(desc); > + return f == 2 ? m : o; > } > > /* Extract the operation-specific data from a descriptor. */ > diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c > index 7ebd9e8298..ddbe06b71a 100644 > --- a/tcg/tcg-op-gvec.c > +++ b/tcg/tcg-op-gvec.c > @@ -37,11 +37,21 @@ static const TCGOpcode vecop_list_empty[1] = { 0 }; > of the operand offsets so that we can check them all at once. */ > static void check_size_align(uint32_t oprsz, uint32_t maxsz, uint32_t ofs) > { > - uint32_t opr_align = oprsz >= 16 ? 15 : 7; > - uint32_t max_align = maxsz >= 16 || oprsz >= 16 ? 15 : 7; > - tcg_debug_assert(oprsz > 0); > - tcg_debug_assert(oprsz <= maxsz); > - tcg_debug_assert((oprsz & opr_align) == 0); > + uint32_t max_align; > + > + switch (oprsz) { > + case 8: > + case 16: > + case 32: > + tcg_debug_assert(oprsz <= maxsz); > + break; > + default: > + tcg_debug_assert(oprsz == maxsz); > + break; > + } > + tcg_debug_assert(maxsz <= (8 << SIMD_MAXSZ_BITS)); > + > + max_align = maxsz >= 16 ? 15 : 7; > tcg_debug_assert((maxsz & max_align) == 0); > tcg_debug_assert((ofs & max_align) == 0); > } > @@ -77,12 +87,21 @@ uint32_t simd_desc(uint32_t oprsz, uint32_t maxsz, > int32_t data) > { > uint32_t desc = 0; > > - assert(oprsz % 8 == 0 && oprsz <= (8 << SIMD_OPRSZ_BITS)); > - assert(maxsz % 8 == 0 && maxsz <= (8 << SIMD_MAXSZ_BITS)); > - assert(data == sextract32(data, 0, SIMD_DATA_BITS)); > + check_size_align(oprsz, maxsz, 0); > + tcg_debug_assert(data == sextract32(data, 0, SIMD_DATA_BITS)); > > oprsz = (oprsz / 8) - 1; > maxsz = (maxsz / 8) - 1; > + > + /* > + * We have just asserted in check_size_align that either > + * oprsz is {8,16,32} or matches maxsz. Encode the final > + * case with '2', as that would otherwise map to 24. > + */ > + if (oprsz == maxsz) { > + oprsz = 2; > + } > Reviewed-by: Philippe Mathieu-Daudé <f4...@amsat.org> + > desc = deposit32(desc, SIMD_OPRSZ_SHIFT, SIMD_OPRSZ_BITS, oprsz); > desc = deposit32(desc, SIMD_MAXSZ_SHIFT, SIMD_MAXSZ_BITS, maxsz); > desc = deposit32(desc, SIMD_DATA_SHIFT, SIMD_DATA_BITS, data); > -- > 2.25.1 > > >