On Thu, Oct 09, 2014 at 04:15:23PM +0400, Ilya Tocar wrote:
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -21358,32 +21358,169 @@ ix86_expand_int_vcond (rtx operands[])
> return true;
> }
>
> +/* AVX512F does support 64-byte integer vector operations,
> + thus the longest vector we are faced with is V64QImode. */
> +#define MAX_VECT_LEN 64
> +
> +struct expand_vec_perm_d
> +{
> + rtx target, op0, op1;
> + unsigned char perm[MAX_VECT_LEN];
> + enum machine_mode vmode;
> + unsigned char nelt;
> + bool one_operand_p;
> + bool testing_p;
> +};
> +
> static bool
> -ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
> +ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1, struct
> expand_vec_perm_d *d)
Too long line, please wrap it.
> {
> - enum machine_mode mode = GET_MODE (op0);
> + enum machine_mode mode = GET_MODE (d ? d->op0 : op0);
> +
> switch (mode)
> {
> + case V8HImode:
> + if (!TARGET_AVX512VL || !TARGET_AVX512BW)
> + return false;
> + break;
> + case V16HImode:
> + if (!TARGET_AVX512VL || !TARGET_AVX512BW)
> + return false;
> + case V32HImode:
> + if (!TARGET_AVX512BW)
> + return false;
> + break;
> + case V4SImode:
> + if (!TARGET_AVX512VL)
> + return false;
> + break;
> + case V8SImode:
> + if (!TARGET_AVX512VL)
> + return false;
> + break;
> + case V16SImode:
> + if (!TARGET_AVX512F)
> + return false;
> + break;
> + case V4SFmode:
> + if (!TARGET_AVX512VL)
> + return false;
> + break;
> + case V8SFmode:
> + if (!TARGET_AVX512VL)
> + return false;
> + break;
> + case V16SFmode:
> + if (!TARGET_AVX512F)
> + return false;
> + break;
> + case V2DImode:
> + if (!TARGET_AVX512VL)
> + return false;
> + break;
> + case V4DImode:
> + if (!TARGET_AVX512VL)
> + return false;
> + break;
> + case V8DImode:
> + if (!TARGET_AVX512F)
> + return false;
> + break;
> + case V2DFmode:
> + if (!TARGET_AVX512VL)
> + return false;
> + break;
> + case V4DFmode:
> + if (!TARGET_AVX512VL)
> + return false;
> + break;
> + case V8DFmode:
> + if (!TARGET_AVX512F)
> + return false;
> + break;
> + default:
> + return false;
> + }
> +
> + /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
> expander,
> + so args are either in d, or in op0, op1 etc. */
> + if (d)
> + {
> + rtx vec[64];
> + target = d->target;
> + op0 = d->op0;
> + op1 = d->op1;
> + for (int i = 0; i < d->nelt; ++i)
> + vec[i] = GEN_INT (d->perm[i]);
> + mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (d->nelt, vec));
Shouldn't the mask use integral vector mode rather than floating?
My strong preference would be:
enum machine_mode maskmode = mode;
rtx (*gen) (rtx, rtx, rtx, rtx);
right below the enum machine_mode mode = GET_MODE (d ? d->op0 : op0);
line and then inside of the first switch just do:
...
case V16SImode:
if (!TARGET_AVX512F)
return false;
gen = gen_avx512f_vpermi2varv16si3;
break;
case V4SFmode:
if (!TARGET_AVX512VL)
return false;
gen = gen_avx512vl_vpermi2varv4sf3;
maskmode = V4SImode;
break;
...
etc., then in the mask = line use:
mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
and finally instead of the second switch do:
emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
return true;
Otherwise, the patch LGTM, but will leave the final approval to Uros.
Jakub