On Fri, Aug 10, 2012 at 10:02 PM, Richard Henderson <r...@redhat.com> wrote: > On 2012-08-10 12:59, Uros Bizjak wrote: >> Actually, this is the problem you are trying to solve. The fma4 >> patterns are defined before fma3, so gcc prefers these. > > The Real Problem is that they should not be separate patterns. > They should be a single pattern that selects alternatives via > the enabled isa.
2012-08-11 Uros Bizjak <ubiz...@gmail.com> * config/i386/i386.md (isa): Add fma and fma4. (enabled): Handle fma and fma4. * config/i386/sse.md (*fma_fmadd_<mode>): Merge *fma4_fmadd_<mode>. (*fma_fmsub_<mode>): Merge *fma4_fmsub_<mode>. (*fma_fnmadd_<mode>): Merge *fma4_fnmadd_<mode>. (*fma_fnmsub_<mode>): Merge *fma4_fnmsub_<mode>. (*fma_fmaddsub_<mode>): Merge *fma4_fmaddsub_<mode>. (*fma_fmsubadd_<mode>): Merge *fma4_fmsubadd_<mode>. Tested on x86_64-pc-linux-gnu {,-m32}, committed to mainline SVN. I will wait a couple of days before backporting patches to 4.7, so please Ganesh, test mainline if everything is OK. BTW: With this patch, we can enable PTA_FMA4 for bdver2 target. Uros.
Index: config/i386/i386.md =================================================================== --- config/i386/i386.md (revision 190301) +++ config/i386/i386.md (working copy) @@ -641,7 +641,8 @@ (define_attr "movu" "0,1" (const_string "0")) ;; Used to control the "enabled" attribute on a per-instruction basis. -(define_attr "isa" "base,sse2,sse2_noavx,sse3,sse4,sse4_noavx,noavx,avx,avx2,noavx2,bmi2" +(define_attr "isa" "base,sse2,sse2_noavx,sse3,sse4,sse4_noavx,noavx,avx, + avx2,noavx2,bmi2,fma,fma4" (const_string "base")) (define_attr "enabled" "" @@ -657,6 +658,9 @@ (eq_attr "isa" "avx2") (symbol_ref "TARGET_AVX2") (eq_attr "isa" "noavx2") (symbol_ref "!TARGET_AVX2") (eq_attr "isa" "bmi2") (symbol_ref "TARGET_BMI2") + (eq_attr "isa" "fma") (symbol_ref "TARGET_FMA") + (eq_attr "isa" "fma4") + (symbol_ref "TARGET_FMA4 && !TARGET_FMA") ] (const_int 1))) Index: config/i386/sse.md =================================================================== --- config/i386/sse.md (revision 190304) +++ config/i386/sse.md (working copy) @@ -1891,21 +1891,6 @@ (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF]) -;; In order to match (*a * *b) + *c, particularly when vectorizing, allow -;; combine to generate a multiply/add with two memory references. We then -;; split this insn, into loading up the destination register with one of the -;; memory operations. If we don't manage to split the insn, reload will -;; generate the appropriate moves. The reason this is needed, is that combine -;; has already folded one of the memory references into both the multiply and -;; add insns, and it can't generate a new pseudo. I.e.: -;; (set (reg1) (mem (addr1))) -;; (set (reg2) (mult (reg1) (mem (addr2)))) -;; (set (reg3) (plus (reg2) (mem (addr3)))) -;; -;; ??? This is historic, pre-dating the gimple fma transformation. -;; We could now properly represent that only one memory operand is -;; allowed and not be penalized during optimization. - ;; The standard names for fma is only available with SSE math enabled. (define_expand "fma<mode>4" [(set (match_operand:FMAMODE 0 "register_operand") @@ -1948,118 +1933,78 @@ (match_operand:FMAMODE 3 "nonimmediate_operand")))] "TARGET_FMA || TARGET_FMA4") -;; FMA3 version - (define_insn "*fma_fmadd_<mode>" - [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x") + [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x") (fma:FMAMODE - (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x") - (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm") - (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))] - "TARGET_FMA" + (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x") + (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m") + (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))] + "TARGET_FMA || TARGET_FMA4" "@ vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} - vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssemuladd") + vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} + vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} + vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "fma,fma,fma,fma4,fma4") + (set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) (define_insn "*fma_fmsub_<mode>" - [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x") + [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x") (fma:FMAMODE - (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x") - (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm") + (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x") + (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m") (neg:FMAMODE - (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))] - "TARGET_FMA" + (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))] + "TARGET_FMA || TARGET_FMA4" "@ vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} - vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssemuladd") + vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} + vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} + vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "fma,fma,fma,fma4,fma4") + (set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) (define_insn "*fma_fnmadd_<mode>" - [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x") + [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x") (fma:FMAMODE (neg:FMAMODE - (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")) - (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm") - (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))] - "TARGET_FMA" + (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")) + (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m") + (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))] + "TARGET_FMA || TARGET_FMA4" "@ vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} - vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssemuladd") + vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} + vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} + vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "fma,fma,fma,fma4,fma4") + (set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) (define_insn "*fma_fnmsub_<mode>" - [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x") + [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x") (fma:FMAMODE (neg:FMAMODE - (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")) - (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm") + (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")) + (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m") (neg:FMAMODE - (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))] - "TARGET_FMA" + (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))] + "TARGET_FMA || TARGET_FMA4" "@ vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} - vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssemuladd") + vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} + vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} + vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "fma,fma,fma,fma4,fma4") + (set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -;; FMA4 version - -(define_insn "*fma4_fmadd_<mode>" - [(set (match_operand:FMAMODE 0 "register_operand" "=x,x") - (fma:FMAMODE - (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x") - (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m") - (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))] - "TARGET_FMA4" - "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "<MODE>")]) - -(define_insn "*fma4_fmsub_<mode>" - [(set (match_operand:FMAMODE 0 "register_operand" "=x,x") - (fma:FMAMODE - (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x") - (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m") - (neg:FMAMODE - (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))] - "TARGET_FMA4" - "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "<MODE>")]) - -(define_insn "*fma4_fnmadd_<mode>" - [(set (match_operand:FMAMODE 0 "register_operand" "=x,x") - (fma:FMAMODE - (neg:FMAMODE - (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")) - (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m") - (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))] - "TARGET_FMA4" - "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "<MODE>")]) - -(define_insn "*fma4_fnmsub_<mode>" - [(set (match_operand:FMAMODE 0 "register_operand" "=x,x") - (fma:FMAMODE - (neg:FMAMODE - (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")) - (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m") - (neg:FMAMODE - (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))] - "TARGET_FMA4" - "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "<MODE>")]) - ;; FMA parallel floating point multiply addsub and subadd operations. ;; It would be possible to represent these without the UNSPEC as @@ -2080,66 +2025,43 @@ UNSPEC_FMADDSUB))] "TARGET_FMA || TARGET_FMA4") -;; FMA3 version - (define_insn "*fma_fmaddsub_<mode>" - [(set (match_operand:VF 0 "register_operand" "=x,x,x") + [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x") (unspec:VF - [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x") - (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm") - (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")] + [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x") + (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m") + (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x")] UNSPEC_FMADDSUB))] - "TARGET_FMA" + "TARGET_FMA || TARGET_FMA4" "@ vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} - vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssemuladd") + vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} + vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} + vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "fma,fma,fma,fma4,fma4") + (set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) (define_insn "*fma_fmsubadd_<mode>" - [(set (match_operand:VF 0 "register_operand" "=x,x,x") + [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x") (unspec:VF - [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x") - (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm") + [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x") + (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m") (neg:VF - (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))] + (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x"))] UNSPEC_FMADDSUB))] - "TARGET_FMA" + "TARGET_FMA || TARGET_FMA4" "@ vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} - vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssemuladd") + vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} + vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} + vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "fma,fma,fma,fma4,fma4") + (set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -;; FMA4 version - -(define_insn "*fma4_fmaddsub_<mode>" - [(set (match_operand:VF 0 "register_operand" "=x,x") - (unspec:VF - [(match_operand:VF 1 "nonimmediate_operand" "%x,x") - (match_operand:VF 2 "nonimmediate_operand" " x,m") - (match_operand:VF 3 "nonimmediate_operand" "xm,x")] - UNSPEC_FMADDSUB))] - "TARGET_FMA4" - "vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "<MODE>")]) - -(define_insn "*fma4_fmsubadd_<mode>" - [(set (match_operand:VF 0 "register_operand" "=x,x") - (unspec:VF - [(match_operand:VF 1 "nonimmediate_operand" "%x,x") - (match_operand:VF 2 "nonimmediate_operand" " x,m") - (neg:VF - (match_operand:VF 3 "nonimmediate_operand" "xm,x"))] - UNSPEC_FMADDSUB))] - "TARGET_FMA4" - "vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "<MODE>")]) - ;; FMA3 floating point scalar intrinsics. These merge result with ;; high-order elements from the destination register.