Hello!

Attached patch implements splitting of double-mode inter-unit moves to
movd/pinsrd and movd/pextrd (and their 64bit "q" variants for TImode
moves) for SSE4 targets. This way, partial memory stalls are avoided.

2017-05-30  Uros Bizjak  <ubiz...@gmail.com>

    PR target/80833
    * config/i386/constraints.md (Yd): New constraint.
    (Ye): Ditto.
    * config/i386/i386.md (*movti_internal): Add (?r, Ye)
    and (?Yd, r) alternatives.  Update insn attributes.
    * config/i386/i386.md (*movti_internal): Add (?r, *Ye)
    and (?*Yd, r) alternatives.  Update insn attributes.
    (double-mode inter-unit splitters): Add new GR<->XMM splitters.

testsuite/ChangeLog:

2017-05-30  Uros Bizjak  <ubiz...@gmail.com>

    PR target/80833
    * gcc.target/i386/pr80833-1.c: New test.
    * gcc.target/i386/pr80833-2.c: Ditto.

Patch was bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Committed to mainline SVN.

Uros.
Index: config/i386/constraints.md
===================================================================
--- config/i386/constraints.md  (revision 248687)
+++ config/i386/constraints.md  (working copy)
@@ -102,18 +102,24 @@
 ;;  c  SSE inter-unit conversions enabled
 ;;  i  SSE2 inter-unit moves to SSE register enabled
 ;;  j  SSE2 inter-unit moves from SSE register enabled
+;;  d  any EVEX encodable SSE register for AVX512BW target or any SSE register
+;;     for SSE4_1 target, when inter-unit moves to SSE register are enabled
+;;  e  any EVEX encodable SSE register for AVX512BW target or any SSE register
+;;     for SSE4_1 target, when inter-unit moves from SSE register are enabled
 ;;  m  MMX inter-unit moves to MMX register enabled
 ;;  n  MMX inter-unit moves from MMX register enabled
+;;  p  Integer register when TARGET_PARTIAL_REG_STALL is disabled
 ;;  a  Integer register when zero extensions with AND are disabled
 ;;  b  Any register that can be used as the GOT base when calling
 ;;     ___tls_get_addr: that is, any general register except EAX
 ;;     and ESP, for -fno-plt if linker supports it.  Otherwise,
 ;;     EBX.
-;;  p  Integer register when TARGET_PARTIAL_REG_STALL is disabled
 ;;  f  x87 register when 80387 floating point arithmetic is enabled
 ;;  r  SSE regs not requiring REX prefix when prefixes avoidance is enabled
 ;;     and all SSE regs otherwise
-;;  h   EVEX encodable SSE register with number factor of four
+;;  v  any EVEX encodable SSE register for AVX512VL target,
+;;     otherwise any SSE register
+;;  h  EVEX encodable SSE register with number factor of four
 
 (define_register_constraint "Yz" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS"
  "First SSE register (@code{%xmm0}).")
@@ -130,6 +136,22 @@
  "TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC ? ALL_SSE_REGS : NO_REGS"
  "@internal Any SSE register, when SSE2 and inter-unit moves from vector 
registers are enabled.")
 
+(define_register_constraint "Yd"
+ "TARGET_INTER_UNIT_MOVES_TO_VEC
+  ? (TARGET_AVX512BW
+     ? ALL_SSE_REGS
+     : (TARGET_SSE4_1 ? SSE_REGS : NO_REGS))
+  : NO_REGS"
+ "@internal Any EVEX encodable SSE register (@code{%xmm0-%xmm31}) for AVX512BW 
target or any SSE register for SSE4_1 target, when inter-unit moves to vector 
registers are enabled.")
+
+(define_register_constraint "Ye"
+ "TARGET_INTER_UNIT_MOVES_FROM_VEC
+  ? (TARGET_AVX512BW
+     ? ALL_SSE_REGS
+     : (TARGET_SSE4_1 ? SSE_REGS : NO_REGS))
+  : NO_REGS"
+ "@internal Any EVEX encodable SSE register (@code{%xmm0-%xmm31}) for AVX512BW 
target or any SSE register for SSE4_1 target, when inter-unit moves from vector 
registers are enabled.")
+
 (define_register_constraint "Ym"
  "TARGET_MMX && TARGET_INTER_UNIT_MOVES_TO_VEC ? MMX_REGS : NO_REGS"
  "@internal Any MMX register, when inter-unit moves to vector registers are 
enabled.")
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md (revision 248687)
+++ config/i386/i386.md (working copy)
@@ -2074,8 +2074,8 @@
              (const_string "OI")))])
 
 (define_insn "*movti_internal"
-  [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v,v ,v ,m")
-       (match_operand:TI 1 "general_operand"      "riFo,re,C,BC,vm,v"))]
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v,v ,v ,m,?r,?Yd")
+       (match_operand:TI 1 "general_operand"      "riFo,re,C,BC,vm,v,Ye,r"))]
   "(TARGET_64BIT
     && !(MEM_P (operands[0]) && MEM_P (operands[1])))
    || (TARGET_SSE
@@ -2118,8 +2118,20 @@
       gcc_unreachable ();
     }
 }
-  [(set_attr "isa" "x64,x64,*,sse2,*,*")
-   (set_attr "type" "multi,multi,sselog1,sselog1,ssemov,ssemov")
+  [(set (attr "isa")
+     (cond [(eq_attr "alternative" "0,1,6,7")
+             (const_string "x64")
+           (eq_attr "alternative" "3")
+             (const_string "sse2")
+          ]
+          (const_string "*")))
+   (set (attr "type")
+     (cond [(eq_attr "alternative" "0,1,6,7")
+             (const_string "multi")
+           (eq_attr "alternative" "2,3")
+             (const_string "sselog1")
+          ]
+          (const_string "ssemov")))
    (set (attr "prefix")
      (if_then_else (eq_attr "type" "sselog1,ssemov")
        (const_string "maybe_vex")
@@ -2145,11 +2157,29 @@
               ]
               (const_string "TI")))])
 
+(define_split
+  [(set (match_operand:TI 0 "sse_reg_operand")
+        (match_operand:TI 1 "general_reg_operand"))]
+  "TARGET_64BIT && TARGET_SSE4_1 && TARGET_INTER_UNIT_MOVES_TO_VEC
+   && reload_completed"
+  [(set (match_dup 2)
+       (vec_merge:V2DI
+         (vec_duplicate:V2DI (match_dup 3))
+         (match_dup 2)
+         (const_int 2)))]
+{
+  operands[2] = lowpart_subreg (V2DImode, operands[0], TImode);
+  operands[3] = gen_highpart (DImode, operands[1]);
+
+  emit_move_insn (gen_lowpart (DImode, operands[0]),
+                 gen_lowpart (DImode, operands[1]));
+})
+
 (define_insn "*movdi_internal"
   [(set (match_operand:DI 0 "nonimmediate_operand"
-    "=r  ,o  ,r,r  ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,m,?r 
,?*Yi,?*Ym,?*Yi,*k,*k ,*r,*m")
+    "=r  ,o  ,r,r  ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,m,?r ,?*Yd,?r 
,?*Yi,?*Ym,?*Yi,*k,*k ,*r,*m")
        (match_operand:DI 1 "general_operand"
-    "riFo,riF,Z,rem,i,re,C ,*y,m  ,*y,*Yn,r   ,C ,*v,m ,*v,v,*Yj,r   ,*Yj ,*Yn 
,*r,*km,*k,*k"))]
+    "riFo,riF,Z,rem,i,re,C ,*y,m  ,*y,*Yn,r   ,C ,*v,m ,*v,v,*Ye,r   ,*Yj,r   
,*Yj ,*Yn ,*r,*km,*k,*k"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   switch (get_attr_type (insn))
@@ -2222,14 +2252,14 @@
     }
 }
   [(set (attr "isa")
-     (cond [(eq_attr "alternative" "0,1")
+     (cond [(eq_attr "alternative" "0,1,17,18")
              (const_string "nox64")
-           (eq_attr "alternative" "2,3,4,5,10,11,17,18,21,23")
+           (eq_attr "alternative" "2,3,4,5,10,11,19,20,23,25")
              (const_string "x64")
           ]
           (const_string "*")))
    (set (attr "type")
-     (cond [(eq_attr "alternative" "0,1")
+     (cond [(eq_attr "alternative" "0,1,17,18")
              (const_string "multi")
            (eq_attr "alternative" "6")
              (const_string "mmx")
@@ -2237,11 +2267,11 @@
              (const_string "mmxmov")
            (eq_attr "alternative" "12")
              (const_string "sselog1")
-           (eq_attr "alternative" "13,14,15,16,17,18")
+           (eq_attr "alternative" "13,14,15,16,19,20")
              (const_string "ssemov")
-           (eq_attr "alternative" "19,20")
+           (eq_attr "alternative" "21,22")
              (const_string "ssecvt")
-           (eq_attr "alternative" "21,22,23,24")
+           (eq_attr "alternative" "23,24,25,26")
              (const_string "mskmov")
            (and (match_operand 0 "register_operand")
                 (match_operand 1 "pic_32bit_operand"))
@@ -2260,7 +2290,7 @@
        (const_string "*")))
    (set (attr "prefix_rex")
      (if_then_else
-       (eq_attr "alternative" "10,11,17,18")
+       (eq_attr "alternative" "10,11,19,20")
        (const_string "1")
        (const_string "*")))
    (set (attr "prefix")
@@ -2308,6 +2338,23 @@
           (const_string "*")))])
 
 (define_split
+  [(set (match_operand:<DWI> 0 "general_reg_operand")
+        (match_operand:<DWI> 1 "sse_reg_operand"))]
+  "TARGET_SSE4_1 && TARGET_INTER_UNIT_MOVES_FROM_VEC
+   && reload_completed"
+  [(set (match_dup 2)
+       (vec_select:DWIH
+         (match_dup 3)
+         (parallel [(const_int 1)])))]
+{
+  operands[2] = gen_highpart (<MODE>mode, operands[0]);
+  operands[3] = lowpart_subreg (<ssevecmode>mode, operands[1], <DWI>mode);
+
+  emit_move_insn (gen_lowpart (<MODE>mode, operands[0]),
+                 gen_lowpart (<MODE>mode, operands[1]));
+})
+
+(define_split
   [(set (match_operand:DWI 0 "nonimmediate_gr_operand")
         (match_operand:DWI 1 "general_gr_operand"))]
   "reload_completed"
@@ -2314,6 +2361,24 @@
   [(const_int 0)]
   "ix86_split_long_move (operands); DONE;")
 
+(define_split
+  [(set (match_operand:DI 0 "sse_reg_operand")
+        (match_operand:DI 1 "general_reg_operand"))]
+  "!TARGET_64BIT && TARGET_SSE4_1 && TARGET_INTER_UNIT_MOVES_TO_VEC
+   && reload_completed"
+  [(set (match_dup 2)
+       (vec_merge:V4SI
+         (vec_duplicate:V4SI (match_dup 3))
+         (match_dup 2)
+         (const_int 2)))]
+{
+  operands[2] = lowpart_subreg (V4SImode, operands[0], DImode);
+  operands[3] = gen_highpart (SImode, operands[1]);
+
+  emit_move_insn (gen_lowpart (SImode, operands[0]),
+                 gen_lowpart (SImode, operands[1]));
+})
+
 (define_insn "*movsi_internal"
   [(set (match_operand:SI 0 "nonimmediate_operand"
     "=r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,?r ,?*Yi,*k,*k ,*rm")
Index: testsuite/gcc.target/i386/pr80833-1.c
===================================================================
--- testsuite/gcc.target/i386/pr80833-1.c       (nonexistent)
+++ testsuite/gcc.target/i386/pr80833-1.c       (working copy)
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4.1 -mtune=intel -mregparm=2" } */
+/* { dg-require-effective-target ia32 } */
+
+long long test (long long a)
+{
+  asm ("" : "+x" (a));
+  return a;
+}
+
+/* { dg-final { scan-assembler "pinsrd" } } */
+/* { dg-final { scan-assembler "pextrd" } } */
Index: testsuite/gcc.target/i386/pr80833-2.c
===================================================================
--- testsuite/gcc.target/i386/pr80833-2.c       (nonexistent)
+++ testsuite/gcc.target/i386/pr80833-2.c       (working copy)
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4.1 -mtune=intel" } */
+/* { dg-require-effective-target int128 } */
+
+__int128 test (__int128 a)
+{
+  asm ("" : "+x" (a));
+  return a;
+}
+
+/* { dg-final { scan-assembler "pinsrq" } } */
+/* { dg-final { scan-assembler "pextrq" } } */

Reply via email to