Hi Jakub, On 03 May 20:57, Jakub Jelinek wrote: > Hi! > > This patch improves code generation e.g. on the first attached testcase > and allows accepting the second one. > > I've noticed we don't allow TFmode or V1TImode in xmm16+ regs at all, > while they are allowed in xmm0-xmm15, so IMHO should be ok even with > AVX512VL. > > Wonder if it wouldn't be better to add a new constraint that would act > like v constraint for TARGET_AVX512VL and like x constraint otherwise, > that might greatly simplify the i386.md changes in this patch. Good idea, I thought about that myself. IMHO this might be a follow up.
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk, > or with some changes? Haven't figured out how to test the *andnot* > and *<logic>* patterns though. Are you going to commit testcases? Yeah, tests for FP *logic* look odd, so I am OK for not having them. -- Thanks, K > 2016-05-03 Jakub Jelinek <ja...@redhat.com> > > * config/i386/i386.h (VALID_AVX512VL_128_REG_MODE): Allow > TFmode and V1TImode in xmm16+ registers for TARGET_AVX512VL. > * config/i386/i386.md (avx512fvecmode): New mode attr. > (*pushtf): Use v constraint instead of x. > (*movtf_internal): Likewise. For TARGET_AVX512VL and > xmm16+ registers, use vmovdqu64 or vmovdqa64 instructions. > (*absneg<mode>2): Add avx512vl alternatives. > (*absnegtf2_sse): Likewise. > (copysign<mode>3_const, copysign<mode>3_var): Likewise. > * config/i386/sse.md (*andnot<mode>3): Add avx512vl and > avx512f alternatives. > (*andnottf3, *<code><mode>3, *<code>tf3): Likewise. > > --- gcc/config/i386/i386.h.jj 2016-03-30 16:00:17.000000000 +0200 > +++ gcc/config/i386/i386.h 2016-05-03 15:55:46.656342870 +0200 > @@ -1126,7 +1126,8 @@ extern const char *host_detect_local_cpu > > #define VALID_AVX512VL_128_REG_MODE(MODE) \ > ((MODE) == V2DImode || (MODE) == V2DFmode || (MODE) == V16QImode \ > - || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode) > + || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode \ > + || (MODE) == TFmode || (MODE) == V1TImode) > > #define VALID_SSE2_REG_MODE(MODE) \ > ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \ > --- gcc/config/i386/i386.md.jj 2016-05-03 14:16:14.000000000 +0200 > +++ gcc/config/i386/i386.md 2016-05-03 17:13:46.643545826 +0200 > @@ -1165,6 +1165,10 @@ (define_mode_attr ssevecmode > (define_mode_attr ssevecmodelower > [(QI "v16qi") (HI "v8hi") (SI "v4si") (DI "v2di") (SF "v4sf") (DF "v2df")]) > > +;; AVX512F vector mode corresponding to a scalar mode > +(define_mode_attr avx512fvecmode > + [(QI "V64QI") (HI "V32HI") (SI "V16SI") (DI "V8DI") (SF "V16SF") (DF > "V8DF")]) > + > ;; Instruction suffix for REX 64bit operators. > (define_mode_attr rex64suffix [(SI "") (DI "{q}")]) > > @@ -2928,7 +2932,7 @@ (define_insn "*insvqi" > > (define_insn "*pushtf" > [(set (match_operand:TF 0 "push_operand" "=<,<") > - (match_operand:TF 1 "general_no_elim_operand" "x,*roF"))] > + (match_operand:TF 1 "general_no_elim_operand" "v,*roF"))] > "TARGET_64BIT || TARGET_SSE" > { > /* This insn should be already split before reg-stack. */ > @@ -3107,8 +3111,8 @@ (define_expand "mov<mode>" > "ix86_expand_move (<MODE>mode, operands); DONE;") > > (define_insn "*movtf_internal" > - [(set (match_operand:TF 0 "nonimmediate_operand" "=x,x ,m,?*r ,!o") > - (match_operand:TF 1 "general_operand" "C ,xm,x,*roF,*rC"))] > + [(set (match_operand:TF 0 "nonimmediate_operand" "=v,v ,m,?*r ,!o") > + (match_operand:TF 1 "general_operand" "C ,vm,v,*roF,*rC"))] > "(TARGET_64BIT || TARGET_SSE) > && !(MEM_P (operands[0]) && MEM_P (operands[1])) > && (!can_create_pseudo_p () > @@ -3133,6 +3137,10 @@ (define_insn "*movtf_internal" > { > if (get_attr_mode (insn) == MODE_V4SF) > return "%vmovups\t{%1, %0|%0, %1}"; > + else if (TARGET_AVX512VL > + && (EXT_REX_SSE_REG_P (operands[0]) > + || EXT_REX_SSE_REG_P (operands[1]))) > + return "vmovdqu64\t{%1, %0|%0, %1}"; > else > return "%vmovdqu\t{%1, %0|%0, %1}"; > } > @@ -3140,6 +3148,10 @@ (define_insn "*movtf_internal" > { > if (get_attr_mode (insn) == MODE_V4SF) > return "%vmovaps\t{%1, %0|%0, %1}"; > + else if (TARGET_AVX512VL > + && (EXT_REX_SSE_REG_P (operands[0]) > + || EXT_REX_SSE_REG_P (operands[1]))) > + return "vmovdqa64\t{%1, %0|%0, %1}"; > else > return "%vmovdqa\t{%1, %0|%0, %1}"; > } > @@ -9253,10 +9265,10 @@ (define_expand "<code><mode>2" > "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;") > > (define_insn "*absneg<mode>2" > - [(set (match_operand:MODEF 0 "register_operand" "=x,x,f,!r") > + [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v,f,!r") > (match_operator:MODEF 3 "absneg_operator" > - [(match_operand:MODEF 1 "register_operand" "0,x,0,0")])) > - (use (match_operand:<ssevecmode> 2 "nonimmediate_operand" "xm,0,X,X")) > + [(match_operand:MODEF 1 "register_operand" "0,x,0,v,0,0")])) > + (use (match_operand:<ssevecmode> 2 "nonimmediate_operand" > "xm,0,vm,0,X,X")) > (clobber (reg:CC FLAGS_REG))] > "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) > || TARGET_80387" > @@ -9265,11 +9277,14 @@ (define_insn "*absneg<mode>2" > (if_then_else > (match_test ("SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH")) > (if_then_else > - (eq_attr "alternative" "2") > + (eq_attr "alternative" "4") > (symbol_ref "TARGET_MIX_SSE_I387") > - (symbol_ref "true")) > + (if_then_else > + (eq_attr "alternative" "2,3") > + (symbol_ref "TARGET_AVX512VL != 0") > + (symbol_ref "true"))) > (if_then_else > - (eq_attr "alternative" "2,3") > + (eq_attr "alternative" "4,5") > (symbol_ref "true") > (symbol_ref "false"))))]) > > @@ -9289,13 +9304,14 @@ (define_expand "<code>tf2" > "ix86_expand_fp_absneg_operator (<CODE>, TFmode, operands); DONE;") > > (define_insn "*absnegtf2_sse" > - [(set (match_operand:TF 0 "register_operand" "=x,x") > + [(set (match_operand:TF 0 "register_operand" "=x,x,v,v") > (match_operator:TF 3 "absneg_operator" > - [(match_operand:TF 1 "register_operand" "0,x")])) > - (use (match_operand:TF 2 "nonimmediate_operand" "xm,0")) > + [(match_operand:TF 1 "register_operand" "0,x,0,v")])) > + (use (match_operand:TF 2 "nonimmediate_operand" "xm,0,vm,0")) > (clobber (reg:CC FLAGS_REG))] > "TARGET_SSE" > - "#") > + "#" > + [(set_attr "isa" "*,*,avx512vl,avx512vl")]) > > ;; Splitters for fp abs and neg. > > @@ -9473,31 +9489,35 @@ (define_expand "copysign<mode>3" > "ix86_expand_copysign (operands); DONE;") > > (define_insn_and_split "copysign<mode>3_const" > - [(set (match_operand:CSGNMODE 0 "register_operand" "=x") > + [(set (match_operand:CSGNMODE 0 "register_operand" "=x,v") > (unspec:CSGNMODE > - [(match_operand:<CSGNVMODE> 1 "vector_move_operand" "xmC") > - (match_operand:CSGNMODE 2 "register_operand" "0") > - (match_operand:<CSGNVMODE> 3 "nonimmediate_operand" "xm")] > + [(match_operand:<CSGNVMODE> 1 "vector_move_operand" "xmC,vmC") > + (match_operand:CSGNMODE 2 "register_operand" "0,0") > + (match_operand:<CSGNVMODE> 3 "nonimmediate_operand" "xm,vm")] > UNSPEC_COPYSIGN))] > "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) > || (TARGET_SSE && (<MODE>mode == TFmode))" > "#" > "&& reload_completed" > [(const_int 0)] > - "ix86_split_copysign_const (operands); DONE;") > + "ix86_split_copysign_const (operands); DONE;" > + [(set_attr "isa" "*,avx512vl")]) > > (define_insn "copysign<mode>3_var" > - [(set (match_operand:CSGNMODE 0 "register_operand" "=x,x,x,x,x") > + [(set (match_operand:CSGNMODE 0 "register_operand" "=x,x,x,x,x,v,v,v,v,v") > (unspec:CSGNMODE > - [(match_operand:CSGNMODE 2 "register_operand" "x,0,0,x,x") > - (match_operand:CSGNMODE 3 "register_operand" "1,1,x,1,x") > - (match_operand:<CSGNVMODE> 4 "nonimmediate_operand" "X,xm,xm,0,0") > - (match_operand:<CSGNVMODE> 5 "nonimmediate_operand" "0,xm,1,xm,1")] > + [(match_operand:CSGNMODE 2 "register_operand" "x,0,0,x,x,v,0,0,v,v") > + (match_operand:CSGNMODE 3 "register_operand" "1,1,x,1,x,1,1,v,1,v") > + (match_operand:<CSGNVMODE> 4 > + "nonimmediate_operand" "X,xm,xm,0,0,X,vm,vm,0,0") > + (match_operand:<CSGNVMODE> 5 > + "nonimmediate_operand" "0,xm,1,xm,1,0,vm,1,vm,1")] > UNSPEC_COPYSIGN)) > - (clobber (match_scratch:<CSGNVMODE> 1 "=x,x,x,x,x"))] > + (clobber (match_scratch:<CSGNVMODE> 1 "=x,x,x,x,x,v,v,v,v,v"))] > "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) > || (TARGET_SSE && (<MODE>mode == TFmode))" > - "#") > + "#" > + [(set_attr "isa" > "*,*,*,*,*,avx512vl,avx512vl,avx512vl,avx512vl,avx512vl")]) > > (define_split > [(set (match_operand:CSGNMODE 0 "register_operand") > --- gcc/config/i386/sse.md.jj 2016-05-03 13:34:09.946986488 +0200 > +++ gcc/config/i386/sse.md 2016-05-03 17:38:02.486935094 +0200 > @@ -3000,11 +3013,11 @@ (define_expand "copysign<mode>3" > ;; because the native instructions read the full 128-bits. > > (define_insn "*andnot<mode>3" > - [(set (match_operand:MODEF 0 "register_operand" "=x,x") > + [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v") > (and:MODEF > (not:MODEF > - (match_operand:MODEF 1 "register_operand" "0,x")) > - (match_operand:MODEF 2 "register_operand" "x,x")))] > + (match_operand:MODEF 1 "register_operand" "0,x,v,v")) > + (match_operand:MODEF 2 "register_operand" "x,x,v,v")))] > "SSE_FLOAT_MODE_P (<MODE>mode)" > { > static char buf[32]; > @@ -3020,6 +3033,24 @@ (define_insn "*andnot<mode>3" > case 1: > ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; > break; > + case 2: > + if (TARGET_AVX512DQ) > + ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; > + else > + { > + suffix = <MODE>mode == DFmode ? "q" : "d"; > + ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; > + } > + break; > + case 3: > + if (TARGET_AVX512DQ) > + ops = "vandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}"; > + else > + { > + suffix = <MODE>mode == DFmode ? "q" : "d"; > + ops = "vpandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}"; > + } > + break; > default: > gcc_unreachable (); > } > @@ -3027,11 +3058,19 @@ (define_insn "*andnot<mode>3" > snprintf (buf, sizeof (buf), ops, suffix); > return buf; > } > - [(set_attr "isa" "noavx,avx") > + [(set_attr "isa" "noavx,avx,avx512vl,avx512f") > (set_attr "type" "sselog") > - (set_attr "prefix" "orig,vex") > + (set_attr "prefix" "orig,vex,evex,evex") > (set (attr "mode") > - (cond [(and (match_test "<MODE_SIZE> == 16") > + (cond [(eq_attr "alternative" "2") > + (if_then_else (match_test "TARGET_AVX512DQ") > + (const_string "<ssevecmode>") > + (const_string "TI")) > + (eq_attr "alternative" "3") > + (if_then_else (match_test "TARGET_AVX512DQ") > + (const_string "<avx512fvecmode>") > + (const_string "XI")) > + (and (match_test "<MODE_SIZE> == 16") > (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) > (const_string "V4SF") > (match_test "TARGET_AVX") > @@ -3042,16 +3081,17 @@ (define_insn "*andnot<mode>3" > (const_string "<ssevecmode>")))]) > > (define_insn "*andnottf3" > - [(set (match_operand:TF 0 "register_operand" "=x,x") > + [(set (match_operand:TF 0 "register_operand" "=x,x,v,v") > (and:TF > - (not:TF (match_operand:TF 1 "register_operand" "0,x")) > - (match_operand:TF 2 "vector_operand" "xBm,xm")))] > + (not:TF (match_operand:TF 1 "register_operand" "0,x,v,v")) > + (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))] > "TARGET_SSE" > { > static char buf[32]; > const char *ops; > const char *tmp > - = (get_attr_mode (insn) == MODE_V4SF) ? "andnps" : "pandn"; > + = (which_alternative >= 2 ? "pandnq" > + : get_attr_mode (insn) == MODE_V4SF ? "andnps" : "pandn"); > > switch (which_alternative) > { > @@ -3059,8 +3099,12 @@ (define_insn "*andnottf3" > ops = "%s\t{%%2, %%0|%%0, %%2}"; > break; > case 1: > + case 2: > ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; > break; > + case 3: > + ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}"; > + break; > default: > gcc_unreachable (); > } > @@ -3068,7 +3112,7 @@ (define_insn "*andnottf3" > snprintf (buf, sizeof (buf), ops, tmp); > return buf; > } > - [(set_attr "isa" "noavx,avx") > + [(set_attr "isa" "noavx,avx,avx512vl,avx512f") > (set_attr "type" "sselog") > (set (attr "prefix_data16") > (if_then_else > @@ -3076,9 +3120,13 @@ (define_insn "*andnottf3" > (eq_attr "mode" "TI")) > (const_string "1") > (const_string "*"))) > - (set_attr "prefix" "orig,vex") > + (set_attr "prefix" "orig,vex,evex,evex") > (set (attr "mode") > - (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") > + (cond [(eq_attr "alternative" "2") > + (const_string "TI") > + (eq_attr "alternative" "3") > + (const_string "XI") > + (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") > (const_string "V4SF") > (match_test "TARGET_AVX") > (const_string "TI") > @@ -3089,10 +3137,10 @@ (define_insn "*andnottf3" > (const_string "TI")))]) > > (define_insn "*<code><mode>3" > - [(set (match_operand:MODEF 0 "register_operand" "=x,x") > + [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v") > (any_logic:MODEF > - (match_operand:MODEF 1 "register_operand" "%0,x") > - (match_operand:MODEF 2 "register_operand" "x,x")))] > + (match_operand:MODEF 1 "register_operand" "%0,x,v,v") > + (match_operand:MODEF 2 "register_operand" "x,x,v,v")))] > "SSE_FLOAT_MODE_P (<MODE>mode)" > { > static char buf[32]; > @@ -3105,9 +3153,26 @@ (define_insn "*<code><mode>3" > case 0: > ops = "<logic>%s\t{%%2, %%0|%%0, %%2}"; > break; > + case 2: > + if (!TARGET_AVX512DQ) > + { > + suffix = <MODE>mode == DFmode ? "q" : "d"; > + ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; > + break; > + } > + /* FALLTHRU */ > case 1: > ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; > break; > + case 3: > + if (TARGET_AVX512DQ) > + ops = "v<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}"; > + else > + { > + suffix = <MODE>mode == DFmode ? "q" : "d"; > + ops = "vp<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}"; > + } > + break; > default: > gcc_unreachable (); > } > @@ -3115,11 +3180,19 @@ (define_insn "*<code><mode>3" > snprintf (buf, sizeof (buf), ops, suffix); > return buf; > } > - [(set_attr "isa" "noavx,avx") > + [(set_attr "isa" "noavx,avx,avx512vl,avx512f") > (set_attr "type" "sselog") > - (set_attr "prefix" "orig,vex") > + (set_attr "prefix" "orig,vex,evex,evex") > (set (attr "mode") > - (cond [(and (match_test "<MODE_SIZE> == 16") > + (cond [(eq_attr "alternative" "2") > + (if_then_else (match_test "TARGET_AVX512DQ") > + (const_string "<ssevecmode>") > + (const_string "TI")) > + (eq_attr "alternative" "3") > + (if_then_else (match_test "TARGET_AVX512DQ") > + (const_string "<avx512fvecmode>") > + (const_string "XI")) > + (and (match_test "<MODE_SIZE> == 16") > (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) > (const_string "V4SF") > (match_test "TARGET_AVX") > @@ -3138,17 +3211,18 @@ (define_expand "<code>tf3" > "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);") > > (define_insn "*<code>tf3" > - [(set (match_operand:TF 0 "register_operand" "=x,x") > + [(set (match_operand:TF 0 "register_operand" "=x,x,v,v") > (any_logic:TF > - (match_operand:TF 1 "vector_operand" "%0,x") > - (match_operand:TF 2 "vector_operand" "xBm,xm")))] > + (match_operand:TF 1 "vector_operand" "%0,x,v,v") > + (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))] > "TARGET_SSE > && ix86_binary_operator_ok (<CODE>, TFmode, operands)" > { > static char buf[32]; > const char *ops; > const char *tmp > - = (get_attr_mode (insn) == MODE_V4SF) ? "<logic>ps" : "p<logic>"; > + = (which_alternative >= 2 ? "p<logic>q" > + : get_attr_mode (insn) == MODE_V4SF ? "<logic>ps" : "p<logic>"); > > switch (which_alternative) > { > @@ -3156,8 +3230,12 @@ (define_insn "*<code>tf3" > ops = "%s\t{%%2, %%0|%%0, %%2}"; > break; > case 1: > + case 2: > ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; > break; > + case 3: > + ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}"; > + break; > default: > gcc_unreachable (); > } > @@ -3165,7 +3243,7 @@ (define_insn "*<code>tf3" > snprintf (buf, sizeof (buf), ops, tmp); > return buf; > } > - [(set_attr "isa" "noavx,avx") > + [(set_attr "isa" "noavx,avx,avx512vl,avx512f") > (set_attr "type" "sselog") > (set (attr "prefix_data16") > (if_then_else > @@ -3173,9 +3251,13 @@ (define_insn "*<code>tf3" > (eq_attr "mode" "TI")) > (const_string "1") > (const_string "*"))) > - (set_attr "prefix" "orig,vex") > + (set_attr "prefix" "orig,vex,evex,evex") > (set (attr "mode") > - (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") > + (cond [(eq_attr "alternative" "2") > + (const_string "TI") > + (eq_attr "alternative" "3") > + (const_string "QI") > + (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") > (const_string "V4SF") > (match_test "TARGET_AVX") > (const_string "TI") > > Jakub > void > f1 (float x) > { > register float a __asm ("xmm16"); > a = x; > asm volatile ("" : "+v" (a)); > a = __builtin_fabsf (a); > asm volatile ("" : "+v" (a)); > } > > void > f2 (float x, float y) > { > register float a __asm ("xmm16"), b __asm ("xmm17"); > a = x; > b = y; > asm volatile ("" : "+v" (a), "+v" (b)); > a = __builtin_copysignf (a, b); > asm volatile ("" : "+v" (a)); > } > > void > f3 (float x) > { > register float a __asm ("xmm16"); > a = x; > asm volatile ("" : "+v" (a)); > a = -a; > asm volatile ("" : "+v" (a)); > } > > void > f4 (double x) > { > register double a __asm ("xmm16"); > a = x; > asm volatile ("" : "+v" (a)); > a = __builtin_fabs (a); > asm volatile ("" : "+v" (a)); > } > > void > f5 (double x, double y) > { > register double a __asm ("xmm16"), b __asm ("xmm17"); > a = x; > b = y; > asm volatile ("" : "+v" (a), "+v" (b)); > a = __builtin_copysign (a, b); > asm volatile ("" : "+v" (a)); > } > > void > f6 (double x) > { > register double a __asm ("xmm16"); > a = x; > asm volatile ("" : "+v" (a)); > a = -a; > asm volatile ("" : "+v" (a)); > } > void > f1 (__float128 x) > { > register __float128 a __asm ("xmm16"); > a = x; > asm volatile ("" : "+v" (a)); > a = __builtin_fabsq (a); > asm volatile ("" : "+v" (a)); > } > > void > f2 (__float128 x, __float128 y) > { > register __float128 a __asm ("xmm16"), b __asm ("xmm17"); > a = x; > b = y; > asm volatile ("" : "+v" (a), "+v" (b)); > a = __builtin_copysignq (a, b); > asm volatile ("" : "+v" (a)); > } > > void > f3 (__float128 x) > { > register __float128 a __asm ("xmm16"); > a = x; > asm volatile ("" : "+v" (a)); > a = -a; > asm volatile ("" : "+v" (a)); > } > > __int128_t > f4 (void) > { > register __int128_t a __asm ("xmm16"); > register __int128_t __attribute__((vector_size (16))) b __asm ("xmm17"); > a = 1; > asm volatile ("" : "+v" (a)); > b[0] = a; > asm volatile ("" : "+v" (b)); > return b[0]; > }