[rs6000] 64-bit integer loads/stores and FP instructions

Eric Botcazou Wed, 06 Feb 2019 14:17:00 -0800

Hi,

as reported e.g. at https://gcc.gnu.org/ml/gcc-help/2018-11/msg00038.html, the 
7 series of compilers started to use FP instructions for simple 64-bit integer 
loads/stores in unexpected ways.  Consider:


uint64_t var;

void foo1 (uint64_t *p)
{
  var = *p;
}

void foo2 (uint64_t *p)
{
  *p = var;
}

void foo3 (void * p)
{
  var = *(uint64_t *)p;
}

void foo4 (void *p)
{
  *(uint64_t *)p = var;
}

At -O0, the 32-bit compiler uses a double lwz/stw pairs for the 4 functions.  
At -O1, it uses a lfd/stfd pair for foo2 and foo4, but neither for foo1 nor 
foo3.  At -O2, it again uses a double lwz/stw pairs for the 4 functions.

This was introduced by this change:
  https://gcc.gnu.org/ml/gcc-patches/2016-11/msg01993.html

Now, if you try with the GCC 8 compiler, then you get back the double lwz/stw 
pairs for the 4 functions at every optimization level.

The difference between GCC 7 and GCC 8 comes from this change:
  https://gcc.gnu.org/ml/gcc-patches/2018-06/msg00584.html
which seems to imply that the first change was problematic but which was not 
backported to the 7 branch.

So at a minimum I think that the second change should be backported to the 7 
branch; it applies (almost) cleanly and gives no regressions on PowerPC/Linux.

        Backport from mainline
        2018-06-11  Segher Boessenkool  <seg...@kernel.crashing.org>

        PR target/85755
        * config/rs6000/rs6000.md (*movdi_internal32): Put constraint modifiers
        on the correct operand.
        (*movdi_internal64): Ditto.


But I also think that another part of the first change is problematic, namely 
the removal of the '*' modifier on the alternatives, which means that the 
register preference of the instruction isn't skewed towards integer registers 
any more.  That may be OK for native targets, but that is frowned upon for 
embedded targets, where people are really unhappy when the compiler emits 
floating-point instructions for pure integer code for no apparent reason.

So I suggest decoupling the (recent) native targets from the rest for this 
specific issue, see a proof of concept in the second patch.

Thoughts?

-- 
Eric Botcazou

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 8d0b58cc6d6..89812a03d74 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -8565,16 +8565,16 @@
 
 (define_insn "*movdi_internal32"
   [(set (match_operand:DI 0 "rs6000_nonimmediate_operand"
-         "=Y,        r,         r,         ^m,        ^d,         ^d,
-          r,         ^wY,       $Z,        ^wb,       $wv,        ^wi,
+         "=Y,        r,         r,         m,         ^d,         ^d,
+          r,         wY,        Z,         ^wb,       $wv,        ^wi,
           *wo,       *wo,       *wv,       *wi,       *wi,        *wv,
           *wv")
 
 	(match_operand:DI 1 "input_operand"
-          "r,        Y,         r,         d,         m,          d,
-           IJKnGHF,  wb,        wv,        wY,        Z,          wi,
-           Oj,       wM,        OjwM,      Oj,        wM,         wS,
-           wB"))]
+         "r,         Y,         r,         ^d,        m,          ^d,
+          IJKnGHF,   ^wb,       $wv,       wY,        Z,          ^wi,
+          Oj,        wM,        OjwM,      Oj,        wM,         wS,
+          wB"))]
 
   "! TARGET_POWERPC64
    && (gpc_reg_operand (operands[0], DImode)
@@ -8642,17 +8642,17 @@
 (define_insn "*movdi_internal64"
   [(set (match_operand:DI 0 "nonimmediate_operand"
                "=YZ,       r,         r,         r,         r,          r,
-                ^m,        ^d,        ^d,        ^wY,       $Z,         $wb,
+                m,         ^d,        ^d,        wY,        Z,          $wb,
                 $wv,       ^wi,       *wo,       *wo,       *wv,        *wi,
                 *wi,       *wv,       *wv,       r,         *h,         *h,
                 ?*r,       ?*wg,      ?*r,       ?*wj")
 
 	(match_operand:DI 1 "input_operand"
-                "r,        YZ,        r,         I,         L,          nF,
-                 d,        m,         d,         wb,        wv,         wY,
-                 Z,        wi,        Oj,        wM,        OjwM,       Oj,
-                 wM,       wS,        wB,        *h,        r,          0,
-                 wg,       r,         wj,        r"))]
+               "r,         YZ,        r,         I,         L,          nF,
+                ^d,        m,         ^d,        ^wb,       $wv,        wY,
+                Z,         ^wi,       Oj,        wM,        OjwM,       Oj,
+                wM,        wS,        wB,        *h,        r,          0,
+                wg,        r,         wj,        r"))]
 
   "TARGET_POWERPC64
    && (gpc_reg_operand (operands[0], DImode)

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 7d399a2227b..d48395666fc 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -4894,6 +4894,13 @@ rs6000_option_override_internal (bool global_init_p)
 				 || rs6000_tune == PROCESSOR_PPCE5500
 				 || rs6000_tune == PROCESSOR_PPCE6500);
 
+  TARGET_AVOID_FPU_FOR_INT_MOVES = (rs6000_tune != PROCESSOR_POWER4
+				    && rs6000_tune != PROCESSOR_POWER5
+				    && rs6000_tune != PROCESSOR_POWER6
+				    && rs6000_tune != PROCESSOR_POWER7
+				    && rs6000_tune != PROCESSOR_POWER8
+				    && rs6000_tune != PROCESSOR_POWER9);
+
   /* Allow debug switches to override the above settings.  These are set to -1
      in rs6000.opt to indicate the user hasn't directly set the switch.  */
   if (TARGET_ALWAYS_HINT >= 0)
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index a6929d9641b..e5b8fde8ff0 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -8586,7 +8586,8 @@
           Oj,        wM,        OjwM,      Oj,        wM,         wS,
           wB"))]
 
-  "! TARGET_POWERPC64
+  "!TARGET_POWERPC64
+   && !TARGET_AVOID_FPU_FOR_INT_MOVES
    && (gpc_reg_operand (operands[0], DImode)
        || gpc_reg_operand (operands[1], DImode))"
   "@
@@ -8616,6 +8617,25 @@
                 vecsimple")
    (set_attr "size" "64")])
 
+;; This is the pre-GCC7 variant for integer-oriented CPUs without vector units.
+(define_insn "*movdi_internal32_basic"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,?m,?*d,?*d,r")
+	(match_operand:DI 1 "input_operand" "r,Y,r,d,m,d,IJKnGHF"))]
+  "!TARGET_POWERPC64
+   && TARGET_AVOID_FPU_FOR_INT_MOVES
+   && (gpc_reg_operand (operands[0], DImode)
+       || gpc_reg_operand (operands[1], DImode))"
+  "@
+   #
+   #
+   #
+   stfd%U0%X0 %1,%0
+   lfd%U1%X1 %0,%1
+   fmr %0,%1
+   #"
+  [(set_attr "type" "store,load,*,fpstore,fpload,fpsimple,*")
+   (set_attr "size" "64")])
+
 (define_split
   [(set (match_operand:DI 0 "gpc_reg_operand")
 	(match_operand:DI 1 "const_int_operand"))]
@@ -8664,6 +8684,7 @@
                 wg,        r,         wj,        r"))]
 
   "TARGET_POWERPC64
+   && !TARGET_AVOID_FPU_FOR_INT_MOVES
    && (gpc_reg_operand (operands[0], DImode)
        || gpc_reg_operand (operands[1], DImode))"
   "@
@@ -8710,6 +8731,33 @@
                 8,         4,         4,         4,         4,          4,
                 4,         4,         4,         4")])
 
+;; This is the pre-GCC7 variant for integer-oriented CPUs without vector units.
+(define_insn "*movdi_internal64_basic"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,r,*h,*h,r,?*wg")
+	(match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,*h,r,0,*wg,r"))]
+  "TARGET_POWERPC64
+   && TARGET_AVOID_FPU_FOR_INT_MOVES
+   && (gpc_reg_operand (operands[0], DImode)
+       || gpc_reg_operand (operands[1], DImode))"
+  "@
+   std%U0%X0 %1,%0
+   ld%U1%X1 %0,%1
+   mr %0,%1
+   li %0,%1
+   lis %0,%v1
+   #
+   stfd%U0%X0 %1,%0
+   lfd%U1%X1 %0,%1
+   fmr %0,%1
+   mf%1 %0
+   mt%0 %1
+   nop
+   mftgpr %0,%1
+   mffgpr %0,%1"
+  [(set_attr "type" "store,load,*,*,*,*,fpstore,fpload,fpsimple,mfjmpr,mtjmpr,*,mftgpr,mffgpr")
+   (set_attr "size" "64")
+   (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4,4")])
+
 ; Some DImode loads are best done as a load of -1 followed by a mask
 ; instruction.
 (define_split
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index ace8a477550..381a0c94788 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -106,12 +106,13 @@ unsigned int rs6000_debug
 
 ;; Whether to enable the -mfloat128 stuff without necessarily enabling the
 ;; __float128 keyword.
-TargetSave
-unsigned char x_TARGET_FLOAT128_TYPE
-
-Variable
+TargetVariable
 unsigned char TARGET_FLOAT128_TYPE
 
+;; Whether to avoid using the FPU or other units for integer moves if possible
+TargetVariable
+unsigned char TARGET_AVOID_FPU_FOR_INT_MOVES
+
 ;; This option existed in the past, but now is always on.
 mpowerpc
 Target RejectNegative Undocumented Ignore

[rs6000] 64-bit integer loads/stores and FP instructions

Reply via email to