On Thu, Oct 09, 2014 at 04:15:23PM +0400, Ilya Tocar wrote: > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -21358,32 +21358,169 @@ ix86_expand_int_vcond (rtx operands[]) > return true; > } > > +/* AVX512F does support 64-byte integer vector operations, > + thus the longest vector we are faced with is V64QImode. */ > +#define MAX_VECT_LEN 64 > + > +struct expand_vec_perm_d > +{ > + rtx target, op0, op1; > + unsigned char perm[MAX_VECT_LEN]; > + enum machine_mode vmode; > + unsigned char nelt; > + bool one_operand_p; > + bool testing_p; > +}; > + > static bool > -ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1) > +ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1, struct > expand_vec_perm_d *d)
Too long line, please wrap it. > { > - enum machine_mode mode = GET_MODE (op0); > + enum machine_mode mode = GET_MODE (d ? d->op0 : op0); > + > switch (mode) > { > + case V8HImode: > + if (!TARGET_AVX512VL || !TARGET_AVX512BW) > + return false; > + break; > + case V16HImode: > + if (!TARGET_AVX512VL || !TARGET_AVX512BW) > + return false; > + case V32HImode: > + if (!TARGET_AVX512BW) > + return false; > + break; > + case V4SImode: > + if (!TARGET_AVX512VL) > + return false; > + break; > + case V8SImode: > + if (!TARGET_AVX512VL) > + return false; > + break; > + case V16SImode: > + if (!TARGET_AVX512F) > + return false; > + break; > + case V4SFmode: > + if (!TARGET_AVX512VL) > + return false; > + break; > + case V8SFmode: > + if (!TARGET_AVX512VL) > + return false; > + break; > + case V16SFmode: > + if (!TARGET_AVX512F) > + return false; > + break; > + case V2DImode: > + if (!TARGET_AVX512VL) > + return false; > + break; > + case V4DImode: > + if (!TARGET_AVX512VL) > + return false; > + break; > + case V8DImode: > + if (!TARGET_AVX512F) > + return false; > + break; > + case V2DFmode: > + if (!TARGET_AVX512VL) > + return false; > + break; > + case V4DFmode: > + if (!TARGET_AVX512VL) > + return false; > + break; > + case V8DFmode: > + if (!TARGET_AVX512F) > + return false; > + break; > + default: > + return false; > + } > + > + /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const > expander, > + so args are either in d, or in op0, op1 etc. */ > + if (d) > + { > + rtx vec[64]; > + target = d->target; > + op0 = d->op0; > + op1 = d->op1; > + for (int i = 0; i < d->nelt; ++i) > + vec[i] = GEN_INT (d->perm[i]); > + mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (d->nelt, vec)); Shouldn't the mask use integral vector mode rather than floating? My strong preference would be: enum machine_mode maskmode = mode; rtx (*gen) (rtx, rtx, rtx, rtx); right below the enum machine_mode mode = GET_MODE (d ? d->op0 : op0); line and then inside of the first switch just do: ... case V16SImode: if (!TARGET_AVX512F) return false; gen = gen_avx512f_vpermi2varv16si3; break; case V4SFmode: if (!TARGET_AVX512VL) return false; gen = gen_avx512vl_vpermi2varv4sf3; maskmode = V4SImode; break; ... etc., then in the mask = line use: mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec)); and finally instead of the second switch do: emit_insn (gen (target, op0, force_reg (maskmode, mask), op1)); return true; Otherwise, the patch LGTM, but will leave the final approval to Uros. Jakub